git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/stereo3d.h"
  33
  34 #include "bytestream.h"
  35 #include "cabac_functions.h"
  36 #include "dsputil.h"
  37 #include "golomb.h"
  38 #include "hevc.h"
  39
  40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  43
  44 static const uint8_t scan_1x1[1] = { 0 };
  45
  46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  47
  48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  49
  50 static const uint8_t horiz_scan4x4_x[16] = {
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55 };
  56
  57 static const uint8_t horiz_scan4x4_y[16] = {
  58     0, 0, 0, 0,
  59     1, 1, 1, 1,
  60     2, 2, 2, 2,
  61     3, 3, 3, 3,
  62 };
  63
  64 static const uint8_t horiz_scan8x8_inv[8][8] = {
  65     {  0,  1,  2,  3, 16, 17, 18, 19, },
  66     {  4,  5,  6,  7, 20, 21, 22, 23, },
  67     {  8,  9, 10, 11, 24, 25, 26, 27, },
  68     { 12, 13, 14, 15, 28, 29, 30, 31, },
  69     { 32, 33, 34, 35, 48, 49, 50, 51, },
  70     { 36, 37, 38, 39, 52, 53, 54, 55, },
  71     { 40, 41, 42, 43, 56, 57, 58, 59, },
  72     { 44, 45, 46, 47, 60, 61, 62, 63, },
  73 };
  74
  75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  76
  77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  78
  79 static const uint8_t diag_scan2x2_inv[2][2] = {
  80     { 0, 2, },
  81     { 1, 3, },
  82 };
  83
  84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  85     0, 0, 1, 0,
  86     1, 2, 0, 1,
  87     2, 3, 1, 2,
  88     3, 2, 3, 3,
  89 };
  90
  91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  92     0, 1, 0, 2,
  93     1, 0, 3, 2,
  94     1, 0, 3, 2,
  95     1, 3, 2, 3,
  96 };
  97
  98 static const uint8_t diag_scan4x4_inv[4][4] = {
  99     { 0,  2,  5,  9, },
 100     { 1,  4,  8, 12, },
 101     { 3,  7, 11, 14, },
 102     { 6, 10, 13, 15, },
 103 };
 104
 105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 106     0, 0, 1, 0,
 107     1, 2, 0, 1,
 108     2, 3, 0, 1,
 109     2, 3, 4, 0,
 110     1, 2, 3, 4,
 111     5, 0, 1, 2,
 112     3, 4, 5, 6,
 113     0, 1, 2, 3,
 114     4, 5, 6, 7,
 115     1, 2, 3, 4,
 116     5, 6, 7, 2,
 117     3, 4, 5, 6,
 118     7, 3, 4, 5,
 119     6, 7, 4, 5,
 120     6, 7, 5, 6,
 121     7, 6, 7, 7,
 122 };
 123
 124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 125     0, 1, 0, 2,
 126     1, 0, 3, 2,
 127     1, 0, 4, 3,
 128     2, 1, 0, 5,
 129     4, 3, 2, 1,
 130     0, 6, 5, 4,
 131     3, 2, 1, 0,
 132     7, 6, 5, 4,
 133     3, 2, 1, 0,
 134     7, 6, 5, 4,
 135     3, 2, 1, 7,
 136     6, 5, 4, 3,
 137     2, 7, 6, 5,
 138     4, 3, 7, 6,
 139     5, 4, 7, 6,
 140     5, 7, 6, 7,
 141 };
 142
 143 static const uint8_t diag_scan8x8_inv[8][8] = {
 144     {  0,  2,  5,  9, 14, 20, 27, 35, },
 145     {  1,  4,  8, 13, 19, 26, 34, 42, },
 146     {  3,  7, 12, 18, 25, 33, 41, 48, },
 147     {  6, 11, 17, 24, 32, 40, 47, 53, },
 148     { 10, 16, 23, 31, 39, 46, 52, 57, },
 149     { 15, 22, 30, 38, 45, 51, 56, 60, },
 150     { 21, 29, 37, 44, 50, 55, 59, 62, },
 151     { 28, 36, 43, 49, 54, 58, 61, 63, },
 152 };
 153
 154 /**
 155  * NOTE: Each function hls_foo correspond to the function foo in the
 156  * specification (HLS stands for High Level Syntax).
 157  */
 158
 159 /**
 160  * Section 5.7
 161  */
 162
 163 /* free everything allocated  by pic_arrays_init() */
 164 static void pic_arrays_free(HEVCContext *s)
 165 {
 166     av_freep(&s->sao);
 167     av_freep(&s->deblock);
 168     av_freep(&s->split_cu_flag);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size         = width * height;
 195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 196                            ((height >> log2_min_cb_size) + 1);
 197     int ctb_count        = sps->ctb_width * sps->ctb_height;
 198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 199
 200     s->bs_width  = width  >> 3;
 201     s->bs_height = height >> 3;
 202
 203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 205     s->split_cu_flag = av_malloc(pic_size);
 206     if (!s->sao || !s->deblock || !s->split_cu_flag)
 207         goto fail;
 208
 209     s->skip_flag    = av_malloc(pic_size_in_ctb);
 210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 211     if (!s->skip_flag || !s->tab_ct_depth)
 212         goto fail;
 213
 214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 215     s->tab_ipm  = av_malloc(min_pu_size);
 216     s->is_pcm   = av_malloc(min_pu_size);
 217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 218         goto fail;
 219
 220     s->filter_slice_edges = av_malloc(ctb_count);
 221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->tab_slice_address));
 223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 224                                       sizeof(*s->qp_y_tab));
 225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 226         goto fail;
 227
 228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     if (!s->horizontal_bs || !s->vertical_bs)
 231         goto fail;
 232
 233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 234                                           av_buffer_alloc);
 235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 236                                           av_buffer_allocz);
 237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 238         goto fail;
 239
 240     return 0;
 241
 242 fail:
 243     pic_arrays_free(s);
 244     return AVERROR(ENOMEM);
 245 }
 246
 247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 248 {
 249     int i = 0;
 250     int j = 0;
 251     uint8_t luma_weight_l0_flag[16];
 252     uint8_t chroma_weight_l0_flag[16];
 253     uint8_t luma_weight_l1_flag[16];
 254     uint8_t chroma_weight_l1_flag[16];
 255
 256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 257     if (s->sps->chroma_format_idc != 0) {
 258         int delta = get_se_golomb(gb);
 259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 260     }
 261
 262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 263         luma_weight_l0_flag[i] = get_bits1(gb);
 264         if (!luma_weight_l0_flag[i]) {
 265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 266             s->sh.luma_offset_l0[i] = 0;
 267         }
 268     }
 269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 271             chroma_weight_l0_flag[i] = get_bits1(gb);
 272     } else {
 273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 274             chroma_weight_l0_flag[i] = 0;
 275     }
 276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 277         if (luma_weight_l0_flag[i]) {
 278             int delta_luma_weight_l0 = get_se_golomb(gb);
 279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 281         }
 282         if (chroma_weight_l0_flag[i]) {
 283             for (j = 0; j < 2; j++) {
 284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 289             }
 290         } else {
 291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][0] = 0;
 293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 294             s->sh.chroma_offset_l0[i][1] = 0;
 295         }
 296     }
 297     if (s->sh.slice_type == B_SLICE) {
 298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 299             luma_weight_l1_flag[i] = get_bits1(gb);
 300             if (!luma_weight_l1_flag[i]) {
 301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 302                 s->sh.luma_offset_l1[i] = 0;
 303             }
 304         }
 305         if (s->sps->chroma_format_idc != 0) {
 306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 307                 chroma_weight_l1_flag[i] = get_bits1(gb);
 308         } else {
 309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 310                 chroma_weight_l1_flag[i] = 0;
 311         }
 312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 313             if (luma_weight_l1_flag[i]) {
 314                 int delta_luma_weight_l1 = get_se_golomb(gb);
 315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 317             }
 318             if (chroma_weight_l1_flag[i]) {
 319                 for (j = 0; j < 2; j++) {
 320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 325                 }
 326             } else {
 327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][0] = 0;
 329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 330                 s->sh.chroma_offset_l1[i][1] = 0;
 331             }
 332         }
 333     }
 334 }
 335
 336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 337 {
 338     const HEVCSPS *sps = s->sps;
 339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 340     int prev_delta_msb = 0;
 341     unsigned int nb_sps = 0, nb_sh;
 342     int i;
 343
 344     rps->nb_refs = 0;
 345     if (!sps->long_term_ref_pics_present_flag)
 346         return 0;
 347
 348     if (sps->num_long_term_ref_pics_sps > 0)
 349         nb_sps = get_ue_golomb_long(gb);
 350     nb_sh = get_ue_golomb_long(gb);
 351
 352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 353         return AVERROR_INVALIDDATA;
 354
 355     rps->nb_refs = nb_sh + nb_sps;
 356
 357     for (i = 0; i < rps->nb_refs; i++) {
 358         uint8_t delta_poc_msb_present;
 359
 360         if (i < nb_sps) {
 361             uint8_t lt_idx_sps = 0;
 362
 363             if (sps->num_long_term_ref_pics_sps > 1)
 364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 365
 366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 368         } else {
 369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 370             rps->used[i] = get_bits1(gb);
 371         }
 372
 373         delta_poc_msb_present = get_bits1(gb);
 374         if (delta_poc_msb_present) {
 375             int delta = get_ue_golomb_long(gb);
 376
 377             if (i && i != nb_sps)
 378                 delta += prev_delta_msb;
 379
 380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 381             prev_delta_msb = delta;
 382         }
 383     }
 384
 385     return 0;
 386 }
 387
 388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 389 {
 390     int ret;
 391     int num = 0, den = 0;
 392
 393     pic_arrays_free(s);
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     s->avctx->coded_width         = sps->width;
 399     s->avctx->coded_height        = sps->height;
 400     s->avctx->width               = sps->output_width;
 401     s->avctx->height              = sps->output_height;
 402     s->avctx->pix_fmt             = sps->pix_fmt;
 403     s->avctx->sample_aspect_ratio = sps->vui.sar;
 404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 405
 406     if (sps->vui.video_signal_type_present_flag)
 407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 408                                                                : AVCOL_RANGE_MPEG;
 409     else
 410         s->avctx->color_range = AVCOL_RANGE_MPEG;
 411
 412     if (sps->vui.colour_description_present_flag) {
 413         s->avctx->color_primaries = sps->vui.colour_primaries;
 414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 416     } else {
 417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 420     }
 421
 422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 425
 426     if (sps->sao_enabled) {
 427         av_frame_unref(s->tmp_frame);
 428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 429         if (ret < 0)
 430             goto fail;
 431         s->frame = s->tmp_frame;
 432     }
 433
 434     s->sps = sps;
 435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 436
 437     if (s->vps->vps_timing_info_present_flag) {
 438         num = s->vps->vps_num_units_in_tick;
 439         den = s->vps->vps_time_scale;
 440     } else if (sps->vui.vui_timing_info_present_flag) {
 441         num = sps->vui.vui_num_units_in_tick;
 442         den = sps->vui.vui_time_scale;
 443     }
 444
 445     if (num != 0 && den != 0)
 446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 447                   num, den, 1 << 30);
 448
 449     return 0;
 450
 451 fail:
 452     pic_arrays_free(s);
 453     s->sps = NULL;
 454     return ret;
 455 }
 456
 457 static int hls_slice_header(HEVCContext *s)
 458 {
 459     GetBitContext *gb = &s->HEVClc.gb;
 460     SliceHeader *sh   = &s->sh;
 461     int i, ret;
 462
 463     // Coded parameters
 464     sh->first_slice_in_pic_flag = get_bits1(gb);
 465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 466         s->seq_decode = (s->seq_decode + 1) & 0xff;
 467         s->max_ra     = INT_MAX;
 468         if (IS_IDR(s))
 469             ff_hevc_clear_refs(s);
 470     }
 471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 473
 474     sh->pps_id = get_ue_golomb_long(gb);
 475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 477         return AVERROR_INVALIDDATA;
 478     }
 479     if (!sh->first_slice_in_pic_flag &&
 480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 482         return AVERROR_INVALIDDATA;
 483     }
 484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 485
 486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 488
 489         ff_hevc_clear_refs(s);
 490         ret = set_sps(s, s->sps);
 491         if (ret < 0)
 492             return ret;
 493
 494         s->seq_decode = (s->seq_decode + 1) & 0xff;
 495         s->max_ra     = INT_MAX;
 496     }
 497
 498     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 499     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 500
 501     sh->dependent_slice_segment_flag = 0;
 502     if (!sh->first_slice_in_pic_flag) {
 503         int slice_address_length;
 504
 505         if (s->pps->dependent_slice_segments_enabled_flag)
 506             sh->dependent_slice_segment_flag = get_bits1(gb);
 507
 508         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 509                                             s->sps->ctb_height);
 510         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 511         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 512             av_log(s->avctx, AV_LOG_ERROR,
 513                    "Invalid slice segment address: %u.\n",
 514                    sh->slice_segment_addr);
 515             return AVERROR_INVALIDDATA;
 516         }
 517
 518         if (!sh->dependent_slice_segment_flag) {
 519             sh->slice_addr = sh->slice_segment_addr;
 520             s->slice_idx++;
 521         }
 522     } else {
 523         sh->slice_segment_addr = sh->slice_addr = 0;
 524         s->slice_idx           = 0;
 525         s->slice_initialized   = 0;
 526     }
 527
 528     if (!sh->dependent_slice_segment_flag) {
 529         s->slice_initialized = 0;
 530
 531         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 532             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 533
 534         sh->slice_type = get_ue_golomb_long(gb);
 535         if (!(sh->slice_type == I_SLICE ||
 536               sh->slice_type == P_SLICE ||
 537               sh->slice_type == B_SLICE)) {
 538             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 539                    sh->slice_type);
 540             return AVERROR_INVALIDDATA;
 541         }
 542         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 543             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 544             return AVERROR_INVALIDDATA;
 545         }
 546
 547         if (s->pps->output_flag_present_flag)
 548             sh->pic_output_flag = get_bits1(gb);
 549
 550         if (s->sps->separate_colour_plane_flag)
 551             sh->colour_plane_id = get_bits(gb, 2);
 552
 553         if (!IS_IDR(s)) {
 554             int short_term_ref_pic_set_sps_flag, poc;
 555
 556             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 557             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 558             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 559                 av_log(s->avctx, AV_LOG_WARNING,
 560                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 561                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 562                     return AVERROR_INVALIDDATA;
 563                 poc = s->poc;
 564             }
 565             s->poc = poc;
 566
 567             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 568             if (!short_term_ref_pic_set_sps_flag) {
 569                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 570                 if (ret < 0)
 571                     return ret;
 572
 573                 sh->short_term_rps = &sh->slice_rps;
 574             } else {
 575                 int numbits, rps_idx;
 576
 577                 if (!s->sps->nb_st_rps) {
 578                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 579                     return AVERROR_INVALIDDATA;
 580                 }
 581
 582                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 583                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 584                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 585             }
 586
 587             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 588             if (ret < 0) {
 589                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 590                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 591                     return AVERROR_INVALIDDATA;
 592             }
 593
 594             if (s->sps->sps_temporal_mvp_enabled_flag)
 595                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 596             else
 597                 sh->slice_temporal_mvp_enabled_flag = 0;
 598         } else {
 599             s->sh.short_term_rps = NULL;
 600             s->poc               = 0;
 601         }
 602
 603         /* 8.3.1 */
 604         if (s->temporal_id == 0 &&
 605             s->nal_unit_type != NAL_TRAIL_N &&
 606             s->nal_unit_type != NAL_TSA_N   &&
 607             s->nal_unit_type != NAL_STSA_N  &&
 608             s->nal_unit_type != NAL_RADL_N  &&
 609             s->nal_unit_type != NAL_RADL_R  &&
 610             s->nal_unit_type != NAL_RASL_N  &&
 611             s->nal_unit_type != NAL_RASL_R)
 612             s->pocTid0 = s->poc;
 613
 614         if (s->sps->sao_enabled) {
 615             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 616             sh->slice_sample_adaptive_offset_flag[1] =
 617             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 618         } else {
 619             sh->slice_sample_adaptive_offset_flag[0] = 0;
 620             sh->slice_sample_adaptive_offset_flag[1] = 0;
 621             sh->slice_sample_adaptive_offset_flag[2] = 0;
 622         }
 623
 624         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 625         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 626             int nb_refs;
 627
 628             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 629             if (sh->slice_type == B_SLICE)
 630                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 631
 632             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 633                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 634                 if (sh->slice_type == B_SLICE)
 635                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 636             }
 637             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 638                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 639                        sh->nb_refs[L0], sh->nb_refs[L1]);
 640                 return AVERROR_INVALIDDATA;
 641             }
 642
 643             sh->rpl_modification_flag[0] = 0;
 644             sh->rpl_modification_flag[1] = 0;
 645             nb_refs = ff_hevc_frame_nb_refs(s);
 646             if (!nb_refs) {
 647                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 648                 return AVERROR_INVALIDDATA;
 649             }
 650
 651             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 652                 sh->rpl_modification_flag[0] = get_bits1(gb);
 653                 if (sh->rpl_modification_flag[0]) {
 654                     for (i = 0; i < sh->nb_refs[L0]; i++)
 655                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 656                 }
 657
 658                 if (sh->slice_type == B_SLICE) {
 659                     sh->rpl_modification_flag[1] = get_bits1(gb);
 660                     if (sh->rpl_modification_flag[1] == 1)
 661                         for (i = 0; i < sh->nb_refs[L1]; i++)
 662                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 663                 }
 664             }
 665
 666             if (sh->slice_type == B_SLICE)
 667                 sh->mvd_l1_zero_flag = get_bits1(gb);
 668
 669             if (s->pps->cabac_init_present_flag)
 670                 sh->cabac_init_flag = get_bits1(gb);
 671             else
 672                 sh->cabac_init_flag = 0;
 673
 674             sh->collocated_ref_idx = 0;
 675             if (sh->slice_temporal_mvp_enabled_flag) {
 676                 sh->collocated_list = L0;
 677                 if (sh->slice_type == B_SLICE)
 678                     sh->collocated_list = !get_bits1(gb);
 679
 680                 if (sh->nb_refs[sh->collocated_list] > 1) {
 681                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 682                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 683                         av_log(s->avctx, AV_LOG_ERROR,
 684                                "Invalid collocated_ref_idx: %d.\n",
 685                                sh->collocated_ref_idx);
 686                         return AVERROR_INVALIDDATA;
 687                     }
 688                 }
 689             }
 690
 691             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 692                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 693                 pred_weight_table(s, gb);
 694             }
 695
 696             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 697             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 698                 av_log(s->avctx, AV_LOG_ERROR,
 699                        "Invalid number of merging MVP candidates: %d.\n",
 700                        sh->max_num_merge_cand);
 701                 return AVERROR_INVALIDDATA;
 702             }
 703         }
 704
 705         sh->slice_qp_delta = get_se_golomb(gb);
 706
 707         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 708             sh->slice_cb_qp_offset = get_se_golomb(gb);
 709             sh->slice_cr_qp_offset = get_se_golomb(gb);
 710         } else {
 711             sh->slice_cb_qp_offset = 0;
 712             sh->slice_cr_qp_offset = 0;
 713         }
 714
 715         if (s->pps->deblocking_filter_control_present_flag) {
 716             int deblocking_filter_override_flag = 0;
 717
 718             if (s->pps->deblocking_filter_override_enabled_flag)
 719                 deblocking_filter_override_flag = get_bits1(gb);
 720
 721             if (deblocking_filter_override_flag) {
 722                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 723                 if (!sh->disable_deblocking_filter_flag) {
 724                     sh->beta_offset = get_se_golomb(gb) * 2;
 725                     sh->tc_offset   = get_se_golomb(gb) * 2;
 726                 }
 727             } else {
 728                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 729                 sh->beta_offset                    = s->pps->beta_offset;
 730                 sh->tc_offset                      = s->pps->tc_offset;
 731             }
 732         } else {
 733             sh->disable_deblocking_filter_flag = 0;
 734             sh->beta_offset                    = 0;
 735             sh->tc_offset                      = 0;
 736         }
 737
 738         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 739             (sh->slice_sample_adaptive_offset_flag[0] ||
 740              sh->slice_sample_adaptive_offset_flag[1] ||
 741              !sh->disable_deblocking_filter_flag)) {
 742             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 743         } else {
 744             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 745         }
 746     } else if (!s->slice_initialized) {
 747         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 748         return AVERROR_INVALIDDATA;
 749     }
 750
 751     sh->num_entry_point_offsets = 0;
 752     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 753         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 754         if (sh->num_entry_point_offsets > 0) {
 755             int offset_len = get_ue_golomb_long(gb) + 1;
 756
 757             for (i = 0; i < sh->num_entry_point_offsets; i++)
 758                 skip_bits(gb, offset_len);
 759         }
 760     }
 761
 762     if (s->pps->slice_header_extension_present_flag) {
 763         unsigned int length = get_ue_golomb_long(gb);
 764         for (i = 0; i < length; i++)
 765             skip_bits(gb, 8);  // slice_header_extension_data_byte
 766     }
 767
 768     // Inferred parameters
 769     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 770     if (sh->slice_qp > 51 ||
 771         sh->slice_qp < -s->sps->qp_bd_offset) {
 772         av_log(s->avctx, AV_LOG_ERROR,
 773                "The slice_qp %d is outside the valid range "
 774                "[%d, 51].\n",
 775                sh->slice_qp,
 776                -s->sps->qp_bd_offset);
 777         return AVERROR_INVALIDDATA;
 778     }
 779
 780     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 781
 782     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 783         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 784         return AVERROR_INVALIDDATA;
 785     }
 786
 787     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 788
 789     if (!s->pps->cu_qp_delta_enabled_flag)
 790         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 791                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 792
 793     s->slice_initialized = 1;
 794
 795     return 0;
 796 }
 797
 798 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 799
 800 #define SET_SAO(elem, value)                            \
 801 do {                                                    \
 802     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 803         sao->elem = value;                              \
 804     else if (sao_merge_left_flag)                       \
 805         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 806     else if (sao_merge_up_flag)                         \
 807         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 808     else                                                \
 809         sao->elem = 0;                                  \
 810 } while (0)
 811
 812 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 813 {
 814     HEVCLocalContext *lc    = &s->HEVClc;
 815     int sao_merge_left_flag = 0;
 816     int sao_merge_up_flag   = 0;
 817     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 818     SAOParams *sao          = &CTB(s->sao, rx, ry);
 819     int c_idx, i;
 820
 821     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 822         s->sh.slice_sample_adaptive_offset_flag[1]) {
 823         if (rx > 0) {
 824             if (lc->ctb_left_flag)
 825                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 826         }
 827         if (ry > 0 && !sao_merge_left_flag) {
 828             if (lc->ctb_up_flag)
 829                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 830         }
 831     }
 832
 833     for (c_idx = 0; c_idx < 3; c_idx++) {
 834         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 835             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 836             continue;
 837         }
 838
 839         if (c_idx == 2) {
 840             sao->type_idx[2] = sao->type_idx[1];
 841             sao->eo_class[2] = sao->eo_class[1];
 842         } else {
 843             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 844         }
 845
 846         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 847             continue;
 848
 849         for (i = 0; i < 4; i++)
 850             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 851
 852         if (sao->type_idx[c_idx] == SAO_BAND) {
 853             for (i = 0; i < 4; i++) {
 854                 if (sao->offset_abs[c_idx][i]) {
 855                     SET_SAO(offset_sign[c_idx][i],
 856                             ff_hevc_sao_offset_sign_decode(s));
 857                 } else {
 858                     sao->offset_sign[c_idx][i] = 0;
 859                 }
 860             }
 861             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 862         } else if (c_idx != 2) {
 863             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 864         }
 865
 866         // Inferred parameters
 867         sao->offset_val[c_idx][0] = 0;
 868         for (i = 0; i < 4; i++) {
 869             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 870             if (sao->type_idx[c_idx] == SAO_EDGE) {
 871                 if (i > 1)
 872                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 873             } else if (sao->offset_sign[c_idx][i]) {
 874                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 875             }
 876         }
 877     }
 878 }
 879
 880 #undef SET_SAO
 881 #undef CTB
 882
 883 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 884                                 int log2_trafo_size, enum ScanType scan_idx,
 885                                 int c_idx)
 886 {
 887 #define GET_COORD(offset, n)                                    \
 888     do {                                                        \
 889         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 890         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 891     } while (0)
 892     HEVCLocalContext *lc    = &s->HEVClc;
 893     int transform_skip_flag = 0;
 894
 895     int last_significant_coeff_x, last_significant_coeff_y;
 896     int last_scan_pos;
 897     int n_end;
 898     int num_coeff    = 0;
 899     int greater1_ctx = 1;
 900
 901     int num_last_subset;
 902     int x_cg_last_sig, y_cg_last_sig;
 903
 904     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 905
 906     ptrdiff_t stride = s->frame->linesize[c_idx];
 907     int hshift       = s->sps->hshift[c_idx];
 908     int vshift       = s->sps->vshift[c_idx];
 909     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 910                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 911     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 912     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 913
 914     int trafo_size = 1 << log2_trafo_size;
 915     int i, qp, shift, add, scale, scale_m;
 916     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 917     const uint8_t *scale_matrix;
 918     uint8_t dc_scale;
 919
 920     // Derive QP for dequant
 921     if (!lc->cu.cu_transquant_bypass_flag) {
 922         static const int qp_c[] = {
 923             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 924         };
 925
 926         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 927             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 928             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 929             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 930         };
 931
 932         static const uint8_t div6[51 + 2 * 6 + 1] = {
 933             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 934             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 935             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 936         };
 937         int qp_y = lc->qp_y;
 938
 939         if (c_idx == 0) {
 940             qp = qp_y + s->sps->qp_bd_offset;
 941         } else {
 942             int qp_i, offset;
 943
 944             if (c_idx == 1)
 945                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 946             else
 947                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 948
 949             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 950             if (qp_i < 30)
 951                 qp = qp_i;
 952             else if (qp_i > 43)
 953                 qp = qp_i - 6;
 954             else
 955                 qp = qp_c[qp_i - 30];
 956
 957             qp += s->sps->qp_bd_offset;
 958         }
 959
 960         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 961         add      = 1 << (shift - 1);
 962         scale    = level_scale[rem6[qp]] << (div6[qp]);
 963         scale_m  = 16; // default when no custom scaling lists.
 964         dc_scale = 16;
 965
 966         if (s->sps->scaling_list_enable_flag) {
 967             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 968                                     &s->pps->scaling_list : &s->sps->scaling_list;
 969             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 970
 971             if (log2_trafo_size != 5)
 972                 matrix_id = 3 * matrix_id + c_idx;
 973
 974             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 975             if (log2_trafo_size >= 4)
 976                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 977         }
 978     }
 979
 980     if (s->pps->transform_skip_enabled_flag &&
 981         !lc->cu.cu_transquant_bypass_flag   &&
 982         log2_trafo_size == 2) {
 983         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 984     }
 985
 986     last_significant_coeff_x =
 987         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 988     last_significant_coeff_y =
 989         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 990
 991     if (last_significant_coeff_x > 3) {
 992         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 993         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 994                                    (2 + (last_significant_coeff_x & 1)) +
 995                                    suffix;
 996     }
 997
 998     if (last_significant_coeff_y > 3) {
 999         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1000         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1001                                    (2 + (last_significant_coeff_y & 1)) +
1002                                    suffix;
1003     }
1004
1005     if (scan_idx == SCAN_VERT)
1006         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1007
1008     x_cg_last_sig = last_significant_coeff_x >> 2;
1009     y_cg_last_sig = last_significant_coeff_y >> 2;
1010
1011     switch (scan_idx) {
1012     case SCAN_DIAG: {
1013         int last_x_c = last_significant_coeff_x & 3;
1014         int last_y_c = last_significant_coeff_y & 3;
1015
1016         scan_x_off = ff_hevc_diag_scan4x4_x;
1017         scan_y_off = ff_hevc_diag_scan4x4_y;
1018         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1019         if (trafo_size == 4) {
1020             scan_x_cg = scan_1x1;
1021             scan_y_cg = scan_1x1;
1022         } else if (trafo_size == 8) {
1023             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1024             scan_x_cg  = diag_scan2x2_x;
1025             scan_y_cg  = diag_scan2x2_y;
1026         } else if (trafo_size == 16) {
1027             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1028             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1029             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1030         } else { // trafo_size == 32
1031             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1032             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1033             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1034         }
1035         break;
1036     }
1037     case SCAN_HORIZ:
1038         scan_x_cg  = horiz_scan2x2_x;
1039         scan_y_cg  = horiz_scan2x2_y;
1040         scan_x_off = horiz_scan4x4_x;
1041         scan_y_off = horiz_scan4x4_y;
1042         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1043         break;
1044     default: //SCAN_VERT
1045         scan_x_cg  = horiz_scan2x2_y;
1046         scan_y_cg  = horiz_scan2x2_x;
1047         scan_x_off = horiz_scan4x4_y;
1048         scan_y_off = horiz_scan4x4_x;
1049         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1050         break;
1051     }
1052     num_coeff++;
1053     num_last_subset = (num_coeff - 1) >> 4;
1054
1055     for (i = num_last_subset; i >= 0; i--) {
1056         int n, m;
1057         int x_cg, y_cg, x_c, y_c;
1058         int implicit_non_zero_coeff = 0;
1059         int64_t trans_coeff_level;
1060         int prev_sig = 0;
1061         int offset   = i << 4;
1062
1063         uint8_t significant_coeff_flag_idx[16];
1064         uint8_t nb_significant_coeff_flag = 0;
1065
1066         x_cg = scan_x_cg[i];
1067         y_cg = scan_y_cg[i];
1068
1069         if (i < num_last_subset && i > 0) {
1070             int ctx_cg = 0;
1071             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1072                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1073             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1074                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1075
1076             significant_coeff_group_flag[x_cg][y_cg] =
1077                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1078             implicit_non_zero_coeff = 1;
1079         } else {
1080             significant_coeff_group_flag[x_cg][y_cg] =
1081                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1082                  (x_cg == 0 && y_cg == 0));
1083         }
1084
1085         last_scan_pos = num_coeff - offset - 1;
1086
1087         if (i == num_last_subset) {
1088             n_end                         = last_scan_pos - 1;
1089             significant_coeff_flag_idx[0] = last_scan_pos;
1090             nb_significant_coeff_flag     = 1;
1091         } else {
1092             n_end = 15;
1093         }
1094
1095         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1096             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1097         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1098             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1099
1100         for (n = n_end; n >= 0; n--) {
1101             GET_COORD(offset, n);
1102
1103             if (significant_coeff_group_flag[x_cg][y_cg] &&
1104                 (n > 0 || implicit_non_zero_coeff == 0)) {
1105                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1106                                                           log2_trafo_size,
1107                                                           scan_idx,
1108                                                           prev_sig) == 1) {
1109                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1110                     nb_significant_coeff_flag++;
1111                     implicit_non_zero_coeff = 0;
1112                 }
1113             } else {
1114                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1115                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1116                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1117                     nb_significant_coeff_flag++;
1118                 }
1119             }
1120         }
1121
1122         n_end = nb_significant_coeff_flag;
1123
1124         if (n_end) {
1125             int first_nz_pos_in_cg = 16;
1126             int last_nz_pos_in_cg = -1;
1127             int c_rice_param = 0;
1128             int first_greater1_coeff_idx = -1;
1129             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1130             uint16_t coeff_sign_flag;
1131             int sum_abs = 0;
1132             int sign_hidden = 0;
1133
1134             // initialize first elem of coeff_bas_level_greater1_flag
1135             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1136
1137             if (!(i == num_last_subset) && greater1_ctx == 0)
1138                 ctx_set++;
1139             greater1_ctx      = 1;
1140             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1141
1142             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1143                 int n_idx = significant_coeff_flag_idx[m];
1144                 int inc   = (ctx_set << 2) + greater1_ctx;
1145                 coeff_abs_level_greater1_flag[n_idx] =
1146                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1147                 if (coeff_abs_level_greater1_flag[n_idx]) {
1148                     greater1_ctx = 0;
1149                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1150                     greater1_ctx++;
1151                 }
1152
1153                 if (coeff_abs_level_greater1_flag[n_idx] &&
1154                     first_greater1_coeff_idx == -1)
1155                     first_greater1_coeff_idx = n_idx;
1156             }
1157             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1158             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1159                                  !lc->cu.cu_transquant_bypass_flag;
1160
1161             if (first_greater1_coeff_idx != -1) {
1162                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1163             }
1164             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1165                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1166             } else {
1167                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1168             }
1169
1170             for (m = 0; m < n_end; m++) {
1171                 n = significant_coeff_flag_idx[m];
1172                 GET_COORD(offset, n);
1173                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1174                 if (trans_coeff_level == ((m < 8) ?
1175                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1176                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1177
1178                     trans_coeff_level += last_coeff_abs_level_remaining;
1179                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1180                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1181                 }
1182                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1183                     sum_abs += trans_coeff_level;
1184                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1185                         trans_coeff_level = -trans_coeff_level;
1186                 }
1187                 if (coeff_sign_flag >> 15)
1188                     trans_coeff_level = -trans_coeff_level;
1189                 coeff_sign_flag <<= 1;
1190                 if (!lc->cu.cu_transquant_bypass_flag) {
1191                     if (s->sps->scaling_list_enable_flag) {
1192                         if (y_c || x_c || log2_trafo_size < 4) {
1193                             int pos;
1194                             switch (log2_trafo_size) {
1195                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1196                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1197                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1198                             default: pos = (y_c        << 2) +  x_c;
1199                             }
1200                             scale_m = scale_matrix[pos];
1201                         } else {
1202                             scale_m = dc_scale;
1203                         }
1204                     }
1205                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1206                     if(trans_coeff_level < 0) {
1207                         if((~trans_coeff_level) & 0xFffffffffff8000)
1208                             trans_coeff_level = -32768;
1209                     } else {
1210                         if (trans_coeff_level & 0xffffffffffff8000)
1211                             trans_coeff_level = 32767;
1212                     }
1213                 }
1214                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1215             }
1216         }
1217     }
1218
1219     if (lc->cu.cu_transquant_bypass_flag) {
1220         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1221     } else {
1222         if (transform_skip_flag)
1223             s->hevcdsp.transform_skip(dst, coeffs, stride);
1224         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1225                  log2_trafo_size == 2)
1226             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1227         else
1228             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1229     }
1230 }
1231
1232 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1233                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1234                               int log2_cb_size, int log2_trafo_size,
1235                               int trafo_depth, int blk_idx)
1236 {
1237     HEVCLocalContext *lc = &s->HEVClc;
1238
1239     if (lc->cu.pred_mode == MODE_INTRA) {
1240         int trafo_size = 1 << log2_trafo_size;
1241         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1242
1243         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1244         if (log2_trafo_size > 2) {
1245             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1246             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1247             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1248             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1249         } else if (blk_idx == 3) {
1250             trafo_size = trafo_size << s->sps->hshift[1];
1251             ff_hevc_set_neighbour_available(s, xBase, yBase,
1252                                             trafo_size, trafo_size);
1253             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1254             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1255         }
1256     }
1257
1258     if (lc->tt.cbf_luma ||
1259         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1260         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1261         int scan_idx   = SCAN_DIAG;
1262         int scan_idx_c = SCAN_DIAG;
1263
1264         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1265             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1266             if (lc->tu.cu_qp_delta != 0)
1267                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1268                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1269             lc->tu.is_cu_qp_delta_coded = 1;
1270
1271             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1272                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1273                 av_log(s->avctx, AV_LOG_ERROR,
1274                        "The cu_qp_delta %d is outside the valid range "
1275                        "[%d, %d].\n",
1276                        lc->tu.cu_qp_delta,
1277                        -(26 + s->sps->qp_bd_offset / 2),
1278                         (25 + s->sps->qp_bd_offset / 2));
1279                 return AVERROR_INVALIDDATA;
1280             }
1281
1282             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1283         }
1284
1285         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1286             if (lc->tu.cur_intra_pred_mode >= 6 &&
1287                 lc->tu.cur_intra_pred_mode <= 14) {
1288                 scan_idx = SCAN_VERT;
1289             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1290                        lc->tu.cur_intra_pred_mode <= 30) {
1291                 scan_idx = SCAN_HORIZ;
1292             }
1293
1294             if (lc->pu.intra_pred_mode_c >=  6 &&
1295                 lc->pu.intra_pred_mode_c <= 14) {
1296                 scan_idx_c = SCAN_VERT;
1297             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1298                        lc->pu.intra_pred_mode_c <= 30) {
1299                 scan_idx_c = SCAN_HORIZ;
1300             }
1301         }
1302
1303         if (lc->tt.cbf_luma)
1304             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1305         if (log2_trafo_size > 2) {
1306             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1307                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1308             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1310         } else if (blk_idx == 3) {
1311             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1312                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1313             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1315         }
1316     }
1317     return 0;
1318 }
1319
1320 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1321 {
1322     int cb_size          = 1 << log2_cb_size;
1323     int log2_min_pu_size = s->sps->log2_min_pu_size;
1324
1325     int min_pu_width     = s->sps->min_pu_width;
1326     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1327     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1328     int i, j;
1329
1330     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1331         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1332             s->is_pcm[i + j * min_pu_width] = 2;
1333 }
1334
1335 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1336                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1337                               int log2_cb_size, int log2_trafo_size,
1338                               int trafo_depth, int blk_idx)
1339 {
1340     HEVCLocalContext *lc = &s->HEVClc;
1341     uint8_t split_transform_flag;
1342     int ret;
1343
1344     if (trafo_depth > 0 && log2_trafo_size == 2) {
1345         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1346             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1347         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1348             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1349     } else {
1350         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1351         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1352     }
1353
1354     if (lc->cu.intra_split_flag) {
1355         if (trafo_depth == 1)
1356             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1357     } else {
1358         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1359     }
1360
1361     lc->tt.cbf_luma = 1;
1362
1363     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1364                               lc->cu.pred_mode == MODE_INTER &&
1365                               lc->cu.part_mode != PART_2Nx2N &&
1366                               trafo_depth == 0;
1367
1368     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1369         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1370         trafo_depth     < lc->cu.max_trafo_depth       &&
1371         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1372         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1373     } else {
1374         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1375                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1376                                lc->tt.inter_split_flag;
1377     }
1378
1379     if (log2_trafo_size > 2) {
1380         if (trafo_depth == 0 ||
1381             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1382             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1383                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1384         }
1385
1386         if (trafo_depth == 0 ||
1387             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1388             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1389                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1390         }
1391     }
1392
1393     if (split_transform_flag) {
1394         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1395         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1396
1397         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1398                                  log2_cb_size, log2_trafo_size - 1,
1399                                  trafo_depth + 1, 0);
1400         if (ret < 0)
1401             return ret;
1402         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1403                                  log2_cb_size, log2_trafo_size - 1,
1404                                  trafo_depth + 1, 1);
1405         if (ret < 0)
1406             return ret;
1407         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1408                                  log2_cb_size, log2_trafo_size - 1,
1409                                  trafo_depth + 1, 2);
1410         if (ret < 0)
1411             return ret;
1412         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1413                                  log2_cb_size, log2_trafo_size - 1,
1414                                  trafo_depth + 1, 3);
1415         if (ret < 0)
1416             return ret;
1417     } else {
1418         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1419         int log2_min_tu_size = s->sps->log2_min_tb_size;
1420         int min_tu_width     = s->sps->min_tb_width;
1421
1422         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1423             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1424             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1425             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1426         }
1427
1428         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1429                                  log2_cb_size, log2_trafo_size, trafo_depth,
1430                                  blk_idx);
1431         if (ret < 0)
1432             return ret;
1433         // TODO: store cbf_luma somewhere else
1434         if (lc->tt.cbf_luma) {
1435             int i, j;
1436             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1437                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1438                     int x_tu = (x0 + j) >> log2_min_tu_size;
1439                     int y_tu = (y0 + i) >> log2_min_tu_size;
1440                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1441                 }
1442         }
1443         if (!s->sh.disable_deblocking_filter_flag) {
1444             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1445                                                   lc->slice_or_tiles_up_boundary,
1446                                                   lc->slice_or_tiles_left_boundary);
1447             if (s->pps->transquant_bypass_enable_flag &&
1448                 lc->cu.cu_transquant_bypass_flag)
1449                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1450         }
1451     }
1452     return 0;
1453 }
1454
1455 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1456 {
1457     //TODO: non-4:2:0 support
1458     HEVCLocalContext *lc = &s->HEVClc;
1459     GetBitContext gb;
1460     int cb_size   = 1 << log2_cb_size;
1461     int stride0   = s->frame->linesize[0];
1462     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1463     int   stride1 = s->frame->linesize[1];
1464     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1465     int   stride2 = s->frame->linesize[2];
1466     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1467
1468     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1469     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1470     int ret;
1471
1472     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1473                                           lc->slice_or_tiles_up_boundary,
1474                                           lc->slice_or_tiles_left_boundary);
1475
1476     ret = init_get_bits(&gb, pcm, length);
1477     if (ret < 0)
1478         return ret;
1479
1480     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1481     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1482     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1483     return 0;
1484 }
1485
1486 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1487 {
1488     HEVCLocalContext *lc = &s->HEVClc;
1489     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1490     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1491
1492     if (x)
1493         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1494     if (y)
1495         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1496
1497     switch (x) {
1498     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1499     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1500     case 0: lc->pu.mvd.x = 0;                               break;
1501     }
1502
1503     switch (y) {
1504     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1505     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1506     case 0: lc->pu.mvd.y = 0;                               break;
1507     }
1508 }
1509
1510 /**
1511  * 8.5.3.2.2.1 Luma sample interpolation process
1512  *
1513  * @param s HEVC decoding context
1514  * @param dst target buffer for block data at block position
1515  * @param dststride stride of the dst buffer
1516  * @param ref reference picture buffer at origin (0, 0)
1517  * @param mv motion vector (relative to block position) to get pixel data from
1518  * @param x_off horizontal position of block from origin (0, 0)
1519  * @param y_off vertical position of block from origin (0, 0)
1520  * @param block_w width of block
1521  * @param block_h height of block
1522  */
1523 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1524                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1525                     int block_w, int block_h)
1526 {
1527     HEVCLocalContext *lc = &s->HEVClc;
1528     uint8_t *src         = ref->data[0];
1529     ptrdiff_t srcstride  = ref->linesize[0];
1530     int pic_width        = s->sps->width;
1531     int pic_height       = s->sps->height;
1532
1533     int mx         = mv->x & 3;
1534     int my         = mv->y & 3;
1535     int extra_left = ff_hevc_qpel_extra_before[mx];
1536     int extra_top  = ff_hevc_qpel_extra_before[my];
1537
1538     x_off += mv->x >> 2;
1539     y_off += mv->y >> 2;
1540     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1541
1542     if (x_off < extra_left || y_off < extra_top ||
1543         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1544         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1545         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1546         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1547         int buf_offset = extra_top *
1548                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1549
1550         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1551                                  edge_emu_stride, srcstride,
1552                                  block_w + ff_hevc_qpel_extra[mx],
1553                                  block_h + ff_hevc_qpel_extra[my],
1554                                  x_off - extra_left, y_off - extra_top,
1555                                  pic_width, pic_height);
1556         src = lc->edge_emu_buffer + buf_offset;
1557         srcstride = edge_emu_stride;
1558     }
1559     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1560                                      block_h, lc->mc_buffer);
1561 }
1562
1563 /**
1564  * 8.5.3.2.2.2 Chroma sample interpolation process
1565  *
1566  * @param s HEVC decoding context
1567  * @param dst1 target buffer for block data at block position (U plane)
1568  * @param dst2 target buffer for block data at block position (V plane)
1569  * @param dststride stride of the dst1 and dst2 buffers
1570  * @param ref reference picture buffer at origin (0, 0)
1571  * @param mv motion vector (relative to block position) to get pixel data from
1572  * @param x_off horizontal position of block from origin (0, 0)
1573  * @param y_off vertical position of block from origin (0, 0)
1574  * @param block_w width of block
1575  * @param block_h height of block
1576  */
1577 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1578                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1579                       int x_off, int y_off, int block_w, int block_h)
1580 {
1581     HEVCLocalContext *lc = &s->HEVClc;
1582     uint8_t *src1        = ref->data[1];
1583     uint8_t *src2        = ref->data[2];
1584     ptrdiff_t src1stride = ref->linesize[1];
1585     ptrdiff_t src2stride = ref->linesize[2];
1586     int pic_width        = s->sps->width >> 1;
1587     int pic_height       = s->sps->height >> 1;
1588
1589     int mx = mv->x & 7;
1590     int my = mv->y & 7;
1591
1592     x_off += mv->x >> 3;
1593     y_off += mv->y >> 3;
1594     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1595     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1596
1597     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1598         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1599         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1600         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1601         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1602         int buf_offset1 = EPEL_EXTRA_BEFORE *
1603                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1604         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1605         int buf_offset2 = EPEL_EXTRA_BEFORE *
1606                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1607
1608         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1609                                  edge_emu_stride, src1stride,
1610                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1611                                  x_off - EPEL_EXTRA_BEFORE,
1612                                  y_off - EPEL_EXTRA_BEFORE,
1613                                  pic_width, pic_height);
1614
1615         src1 = lc->edge_emu_buffer + buf_offset1;
1616         src1stride = edge_emu_stride;
1617         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1618                                              block_w, block_h, mx, my, lc->mc_buffer);
1619
1620         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1621                                  edge_emu_stride, src2stride,
1622                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1623                                  x_off - EPEL_EXTRA_BEFORE,
1624                                  y_off - EPEL_EXTRA_BEFORE,
1625                                  pic_width, pic_height);
1626         src2 = lc->edge_emu_buffer + buf_offset2;
1627         src2stride = edge_emu_stride;
1628
1629         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1630                                              block_w, block_h, mx, my,
1631                                              lc->mc_buffer);
1632     } else {
1633         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1634                                              block_w, block_h, mx, my,
1635                                              lc->mc_buffer);
1636         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1637                                              block_w, block_h, mx, my,
1638                                              lc->mc_buffer);
1639     }
1640 }
1641
1642 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1643                                 const Mv *mv, int y0, int height)
1644 {
1645     int y = (mv->y >> 2) + y0 + height + 9;
1646     ff_thread_await_progress(&ref->tf, y, 0);
1647 }
1648
1649 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1650                                 int nPbW, int nPbH,
1651                                 int log2_cb_size, int partIdx)
1652 {
1653 #define POS(c_idx, x, y)                                                              \
1654     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1655                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1656     HEVCLocalContext *lc = &s->HEVClc;
1657     int merge_idx = 0;
1658     struct MvField current_mv = {{{ 0 }}};
1659
1660     int min_pu_width = s->sps->min_pu_width;
1661
1662     MvField *tab_mvf = s->ref->tab_mvf;
1663     RefPicList  *refPicList = s->ref->refPicList;
1664     HEVCFrame *ref0, *ref1;
1665
1666     int tmpstride = MAX_PB_SIZE;
1667
1668     uint8_t *dst0 = POS(0, x0, y0);
1669     uint8_t *dst1 = POS(1, x0, y0);
1670     uint8_t *dst2 = POS(2, x0, y0);
1671     int log2_min_cb_size = s->sps->log2_min_cb_size;
1672     int min_cb_width     = s->sps->min_cb_width;
1673     int x_cb             = x0 >> log2_min_cb_size;
1674     int y_cb             = y0 >> log2_min_cb_size;
1675     int ref_idx[2];
1676     int mvp_flag[2];
1677     int x_pu, y_pu;
1678     int i, j;
1679
1680     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1681         if (s->sh.max_num_merge_cand > 1)
1682             merge_idx = ff_hevc_merge_idx_decode(s);
1683         else
1684             merge_idx = 0;
1685
1686         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1687                                    1 << log2_cb_size,
1688                                    1 << log2_cb_size,
1689                                    log2_cb_size, partIdx,
1690                                    merge_idx, &current_mv);
1691         x_pu = x0 >> s->sps->log2_min_pu_size;
1692         y_pu = y0 >> s->sps->log2_min_pu_size;
1693
1694         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1695             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1696                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1697     } else { /* MODE_INTER */
1698         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1699         if (lc->pu.merge_flag) {
1700             if (s->sh.max_num_merge_cand > 1)
1701                 merge_idx = ff_hevc_merge_idx_decode(s);
1702             else
1703                 merge_idx = 0;
1704
1705             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1706                                        partIdx, merge_idx, &current_mv);
1707             x_pu = x0 >> s->sps->log2_min_pu_size;
1708             y_pu = y0 >> s->sps->log2_min_pu_size;
1709
1710             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1711                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1712                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1713         } else {
1714             enum InterPredIdc inter_pred_idc = PRED_L0;
1715             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1716             if (s->sh.slice_type == B_SLICE)
1717                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1718
1719             if (inter_pred_idc != PRED_L1) {
1720                 if (s->sh.nb_refs[L0]) {
1721                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1722                     current_mv.ref_idx[0] = ref_idx[0];
1723                 }
1724                 current_mv.pred_flag[0] = 1;
1725                 hls_mvd_coding(s, x0, y0, 0);
1726                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1727                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1728                                          partIdx, merge_idx, &current_mv,
1729                                          mvp_flag[0], 0);
1730                 current_mv.mv[0].x += lc->pu.mvd.x;
1731                 current_mv.mv[0].y += lc->pu.mvd.y;
1732             }
1733
1734             if (inter_pred_idc != PRED_L0) {
1735                 if (s->sh.nb_refs[L1]) {
1736                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1737                     current_mv.ref_idx[1] = ref_idx[1];
1738                 }
1739
1740                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1741                     lc->pu.mvd.x = 0;
1742                     lc->pu.mvd.y = 0;
1743                 } else {
1744                     hls_mvd_coding(s, x0, y0, 1);
1745                 }
1746
1747                 current_mv.pred_flag[1] = 1;
1748                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1749                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1750                                          partIdx, merge_idx, &current_mv,
1751                                          mvp_flag[1], 1);
1752                 current_mv.mv[1].x += lc->pu.mvd.x;
1753                 current_mv.mv[1].y += lc->pu.mvd.y;
1754             }
1755
1756             x_pu = x0 >> s->sps->log2_min_pu_size;
1757             y_pu = y0 >> s->sps->log2_min_pu_size;
1758
1759             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1760                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1761                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1762         }
1763     }
1764
1765     if (current_mv.pred_flag[0]) {
1766         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1767         if (!ref0)
1768             return;
1769         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1770     }
1771     if (current_mv.pred_flag[1]) {
1772         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1773         if (!ref1)
1774             return;
1775         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1776     }
1777
1778     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1779         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1780         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1781
1782         luma_mc(s, tmp, tmpstride, ref0->frame,
1783                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1784
1785         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1786             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1787             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1788                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1789                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1790                                      dst0, s->frame->linesize[0], tmp,
1791                                      tmpstride, nPbW, nPbH);
1792         } else {
1793             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1794         }
1795         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1796                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1797
1798         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1799             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1800             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1801                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1802                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1803                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1804                                      nPbW / 2, nPbH / 2);
1805             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1806                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1807                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1808                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1809                                      nPbW / 2, nPbH / 2);
1810         } else {
1811             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1812             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1813         }
1814     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1815         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1816         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1817
1818         if (!ref1)
1819             return;
1820
1821         luma_mc(s, tmp, tmpstride, ref1->frame,
1822                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1823
1824         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1825             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1826             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1827                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1828                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1829                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1830                                       nPbW, nPbH);
1831         } else {
1832             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1833         }
1834
1835         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1836                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1837
1838         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1839             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1840             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1841                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1842                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1843                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1844             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1845                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1846                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1847                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1848         } else {
1849             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1850             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1851         }
1852     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1853         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1854         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1855         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1856         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1857         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1858         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1859
1860         if (!ref0 || !ref1)
1861             return;
1862
1863         luma_mc(s, tmp, tmpstride, ref0->frame,
1864                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1865         luma_mc(s, tmp2, tmpstride, ref1->frame,
1866                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1867
1868         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1869             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1870             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1871                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1872                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1873                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1874                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1875                                          dst0, s->frame->linesize[0],
1876                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1877         } else {
1878             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1879                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1880         }
1881
1882         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1883                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1884         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1885                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1886
1887         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1888             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1889             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1890                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1891                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1892                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1893                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1894                                          dst1, s->frame->linesize[1], tmp, tmp3,
1895                                          tmpstride, nPbW / 2, nPbH / 2);
1896             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1897                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1898                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1899                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1900                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1901                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1902                                          tmpstride, nPbW / 2, nPbH / 2);
1903         } else {
1904             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1905             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1906         }
1907     }
1908 }
1909
1910 /**
1911  * 8.4.1
1912  */
1913 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1914                                 int prev_intra_luma_pred_flag)
1915 {
1916     HEVCLocalContext *lc = &s->HEVClc;
1917     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1918     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1919     int min_pu_width     = s->sps->min_pu_width;
1920     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1921     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1922     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1923
1924     int cand_up   = (lc->ctb_up_flag || y0b) ?
1925                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1926     int cand_left = (lc->ctb_left_flag || x0b) ?
1927                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1928
1929     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1930
1931     MvField *tab_mvf = s->ref->tab_mvf;
1932     int intra_pred_mode;
1933     int candidate[3];
1934     int i, j;
1935
1936     // intra_pred_mode prediction does not cross vertical CTB boundaries
1937     if ((y0 - 1) < y_ctb)
1938         cand_up = INTRA_DC;
1939
1940     if (cand_left == cand_up) {
1941         if (cand_left < 2) {
1942             candidate[0] = INTRA_PLANAR;
1943             candidate[1] = INTRA_DC;
1944             candidate[2] = INTRA_ANGULAR_26;
1945         } else {
1946             candidate[0] = cand_left;
1947             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1948             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1949         }
1950     } else {
1951         candidate[0] = cand_left;
1952         candidate[1] = cand_up;
1953         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1954             candidate[2] = INTRA_PLANAR;
1955         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1956             candidate[2] = INTRA_DC;
1957         } else {
1958             candidate[2] = INTRA_ANGULAR_26;
1959         }
1960     }
1961
1962     if (prev_intra_luma_pred_flag) {
1963         intra_pred_mode = candidate[lc->pu.mpm_idx];
1964     } else {
1965         if (candidate[0] > candidate[1])
1966             FFSWAP(uint8_t, candidate[0], candidate[1]);
1967         if (candidate[0] > candidate[2])
1968             FFSWAP(uint8_t, candidate[0], candidate[2]);
1969         if (candidate[1] > candidate[2])
1970             FFSWAP(uint8_t, candidate[1], candidate[2]);
1971
1972         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1973         for (i = 0; i < 3; i++)
1974             if (intra_pred_mode >= candidate[i])
1975                 intra_pred_mode++;
1976     }
1977
1978     /* write the intra prediction units into the mv array */
1979     if (!size_in_pus)
1980         size_in_pus = 1;
1981     for (i = 0; i < size_in_pus; i++) {
1982         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1983                intra_pred_mode, size_in_pus);
1984
1985         for (j = 0; j < size_in_pus; j++) {
1986             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1987             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1991             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1992             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1993             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1994             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1995         }
1996     }
1997
1998     return intra_pred_mode;
1999 }
2000
2001 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2002                                           int log2_cb_size, int ct_depth)
2003 {
2004     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2005     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2006     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2007     int y;
2008
2009     for (y = 0; y < length; y++)
2010         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2011                ct_depth, length);
2012 }
2013
2014 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2015                                   int log2_cb_size)
2016 {
2017     HEVCLocalContext *lc = &s->HEVClc;
2018     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2019     uint8_t prev_intra_luma_pred_flag[4];
2020     int split   = lc->cu.part_mode == PART_NxN;
2021     int pb_size = (1 << log2_cb_size) >> split;
2022     int side    = split + 1;
2023     int chroma_mode;
2024     int i, j;
2025
2026     for (i = 0; i < side; i++)
2027         for (j = 0; j < side; j++)
2028             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2029
2030     for (i = 0; i < side; i++) {
2031         for (j = 0; j < side; j++) {
2032             if (prev_intra_luma_pred_flag[2 * i + j])
2033                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2034             else
2035                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2036
2037             lc->pu.intra_pred_mode[2 * i + j] =
2038                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2039                                      prev_intra_luma_pred_flag[2 * i + j]);
2040         }
2041     }
2042
2043     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2044     if (chroma_mode != 4) {
2045         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2046             lc->pu.intra_pred_mode_c = 34;
2047         else
2048             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2049     } else {
2050         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2051     }
2052 }
2053
2054 static void intra_prediction_unit_default_value(HEVCContext *s,
2055                                                 int x0, int y0,
2056                                                 int log2_cb_size)
2057 {
2058     HEVCLocalContext *lc = &s->HEVClc;
2059     int pb_size          = 1 << log2_cb_size;
2060     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2061     int min_pu_width     = s->sps->min_pu_width;
2062     MvField *tab_mvf     = s->ref->tab_mvf;
2063     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2064     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2065     int j, k;
2066
2067     if (size_in_pus == 0)
2068         size_in_pus = 1;
2069     for (j = 0; j < size_in_pus; j++) {
2070         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2071         for (k = 0; k < size_in_pus; k++)
2072             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2073     }
2074 }
2075
2076 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2077 {
2078     int cb_size          = 1 << log2_cb_size;
2079     HEVCLocalContext *lc = &s->HEVClc;
2080     int log2_min_cb_size = s->sps->log2_min_cb_size;
2081     int length           = cb_size >> log2_min_cb_size;
2082     int min_cb_width     = s->sps->min_cb_width;
2083     int x_cb             = x0 >> log2_min_cb_size;
2084     int y_cb             = y0 >> log2_min_cb_size;
2085     int x, y, ret;
2086
2087     lc->cu.x                = x0;
2088     lc->cu.y                = y0;
2089     lc->cu.rqt_root_cbf     = 1;
2090     lc->cu.pred_mode        = MODE_INTRA;
2091     lc->cu.part_mode        = PART_2Nx2N;
2092     lc->cu.intra_split_flag = 0;
2093     lc->cu.pcm_flag         = 0;
2094
2095     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2096     for (x = 0; x < 4; x++)
2097         lc->pu.intra_pred_mode[x] = 1;
2098     if (s->pps->transquant_bypass_enable_flag) {
2099         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2100         if (lc->cu.cu_transquant_bypass_flag)
2101             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2102     } else
2103         lc->cu.cu_transquant_bypass_flag = 0;
2104
2105     if (s->sh.slice_type != I_SLICE) {
2106         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2107
2108         lc->cu.pred_mode = MODE_SKIP;
2109         x = y_cb * min_cb_width + x_cb;
2110         for (y = 0; y < length; y++) {
2111             memset(&s->skip_flag[x], skip_flag, length);
2112             x += min_cb_width;
2113         }
2114         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2115     }
2116
2117     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2118         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2119         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2120
2121         if (!s->sh.disable_deblocking_filter_flag)
2122             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2123                                                   lc->slice_or_tiles_up_boundary,
2124                                                   lc->slice_or_tiles_left_boundary);
2125     } else {
2126         if (s->sh.slice_type != I_SLICE)
2127             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2128         if (lc->cu.pred_mode != MODE_INTRA ||
2129             log2_cb_size == s->sps->log2_min_cb_size) {
2130             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2131             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2132                                       lc->cu.pred_mode == MODE_INTRA;
2133         }
2134
2135         if (lc->cu.pred_mode == MODE_INTRA) {
2136             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2137                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2138                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2139                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2140             }
2141             if (lc->cu.pcm_flag) {
2142                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2143                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2144                 if (s->sps->pcm.loop_filter_disable_flag)
2145                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2146
2147                 if (ret < 0)
2148                     return ret;
2149             } else {
2150                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2151             }
2152         } else {
2153             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2154             switch (lc->cu.part_mode) {
2155             case PART_2Nx2N:
2156                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2157                 break;
2158             case PART_2NxN:
2159                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2160                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2161                 break;
2162             case PART_Nx2N:
2163                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2164                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2165                 break;
2166             case PART_2NxnU:
2167                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2168                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2169                 break;
2170             case PART_2NxnD:
2171                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2172                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2173                 break;
2174             case PART_nLx2N:
2175                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2176                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2177                 break;
2178             case PART_nRx2N:
2179                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2180                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2181                 break;
2182             case PART_NxN:
2183                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2184                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2185                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2186                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2187                 break;
2188             }
2189         }
2190
2191         if (!lc->cu.pcm_flag) {
2192             if (lc->cu.pred_mode != MODE_INTRA &&
2193                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2194                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2195             }
2196             if (lc->cu.rqt_root_cbf) {
2197                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2198                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2199                                          s->sps->max_transform_hierarchy_depth_inter;
2200                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2201                                          log2_cb_size,
2202                                          log2_cb_size, 0, 0);
2203                 if (ret < 0)
2204                     return ret;
2205             } else {
2206                 if (!s->sh.disable_deblocking_filter_flag)
2207                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2208                                                           lc->slice_or_tiles_up_boundary,
2209                                                           lc->slice_or_tiles_left_boundary);
2210             }
2211         }
2212     }
2213
2214     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2215         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2216
2217     x = y_cb * min_cb_width + x_cb;
2218     for (y = 0; y < length; y++) {
2219         memset(&s->qp_y_tab[x], lc->qp_y, length);
2220         x += min_cb_width;
2221     }
2222
2223     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2224
2225     return 0;
2226 }
2227
2228 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2229                                int log2_cb_size, int cb_depth)
2230 {
2231     HEVCLocalContext *lc = &s->HEVClc;
2232     const int cb_size    = 1 << log2_cb_size;
2233
2234     lc->ct.depth = cb_depth;
2235     if (x0 + cb_size <= s->sps->width  &&
2236         y0 + cb_size <= s->sps->height &&
2237         log2_cb_size > s->sps->log2_min_cb_size) {
2238         SAMPLE(s->split_cu_flag, x0, y0) =
2239             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2240     } else {
2241         SAMPLE(s->split_cu_flag, x0, y0) =
2242             (log2_cb_size > s->sps->log2_min_cb_size);
2243     }
2244     if (s->pps->cu_qp_delta_enabled_flag &&
2245         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2246         lc->tu.is_cu_qp_delta_coded = 0;
2247         lc->tu.cu_qp_delta          = 0;
2248     }
2249
2250     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2251         const int cb_size_split = cb_size >> 1;
2252         const int x1 = x0 + cb_size_split;
2253         const int y1 = y0 + cb_size_split;
2254
2255         log2_cb_size--;
2256         cb_depth++;
2257
2258 #define SUBDIVIDE(x, y)                                                \
2259 do {                                                                   \
2260     if (x < s->sps->width && y < s->sps->height) {                     \
2261         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2262         if (ret < 0)                                                   \
2263             return ret;                                                \
2264     }                                                                  \
2265 } while (0)
2266
2267         SUBDIVIDE(x0, y0);
2268         SUBDIVIDE(x1, y0);
2269         SUBDIVIDE(x0, y1);
2270         SUBDIVIDE(x1, y1);
2271     } else {
2272         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2273         if (ret < 0)
2274             return ret;
2275     }
2276
2277     return 0;
2278 }
2279
2280 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2281                                  int ctb_addr_ts)
2282 {
2283     HEVCLocalContext *lc  = &s->HEVClc;
2284     int ctb_size          = 1 << s->sps->log2_ctb_size;
2285     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2286     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2287
2288     int tile_left_boundary, tile_up_boundary;
2289     int slice_left_boundary, slice_up_boundary;
2290
2291     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2292
2293     if (s->pps->entropy_coding_sync_enabled_flag) {
2294         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2295             lc->first_qp_group = 1;
2296         lc->end_of_tiles_x = s->sps->width;
2297     } else if (s->pps->tiles_enabled_flag) {
2298         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2299             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2300             lc->start_of_tiles_x = x_ctb;
2301             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2302             lc->first_qp_group   = 1;
2303         }
2304     } else {
2305         lc->end_of_tiles_x = s->sps->width;
2306     }
2307
2308     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2309
2310     if (s->pps->tiles_enabled_flag) {
2311         tile_left_boundary  = x_ctb > 0 &&
2312                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2313         slice_left_boundary = x_ctb > 0 &&
2314                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2315         tile_up_boundary  = y_ctb > 0 &&
2316                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2317         slice_up_boundary = y_ctb > 0 &&
2318                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2319     } else {
2320         tile_left_boundary  =
2321         tile_up_boundary    = 1;
2322         slice_left_boundary = ctb_addr_in_slice > 0;
2323         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2324     }
2325     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2326     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2327     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2328     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2329     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2330     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2331 }
2332
2333 static int hls_slice_data(HEVCContext *s)
2334 {
2335     int ctb_size    = 1 << s->sps->log2_ctb_size;
2336     int more_data   = 1;
2337     int x_ctb       = 0;
2338     int y_ctb       = 0;
2339     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2340     int ret;
2341
2342     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2343         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2344
2345         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2346         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2347         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2348
2349         ff_hevc_cabac_init(s, ctb_addr_ts);
2350
2351         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2352
2353         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2354         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2355         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2356
2357         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2358         if (ret < 0)
2359             return ret;
2360         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2361
2362         ctb_addr_ts++;
2363         ff_hevc_save_states(s, ctb_addr_ts);
2364         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2365     }
2366
2367     if (x_ctb + ctb_size >= s->sps->width &&
2368         y_ctb + ctb_size >= s->sps->height)
2369         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2370
2371     return ctb_addr_ts;
2372 }
2373
2374 /**
2375  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2376  * 0 if the unit should be skipped, 1 otherwise
2377  */
2378 static int hls_nal_unit(HEVCContext *s)
2379 {
2380     GetBitContext *gb = &s->HEVClc.gb;
2381     int nuh_layer_id;
2382
2383     if (get_bits1(gb) != 0)
2384         return AVERROR_INVALIDDATA;
2385
2386     s->nal_unit_type = get_bits(gb, 6);
2387
2388     nuh_layer_id   = get_bits(gb, 6);
2389     s->temporal_id = get_bits(gb, 3) - 1;
2390     if (s->temporal_id < 0)
2391         return AVERROR_INVALIDDATA;
2392
2393     av_log(s->avctx, AV_LOG_DEBUG,
2394            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2395            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2396
2397     return nuh_layer_id == 0;
2398 }
2399
2400 static void restore_tqb_pixels(HEVCContext *s)
2401 {
2402     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2403     int x, y, c_idx;
2404
2405     for (c_idx = 0; c_idx < 3; c_idx++) {
2406         ptrdiff_t stride = s->frame->linesize[c_idx];
2407         int hshift       = s->sps->hshift[c_idx];
2408         int vshift       = s->sps->vshift[c_idx];
2409         for (y = 0; y < s->sps->min_pu_height; y++) {
2410             for (x = 0; x < s->sps->min_pu_width; x++) {
2411                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2412                     int n;
2413                     int len      = min_pu_size >> hshift;
2414                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2415                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2416                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2417                         memcpy(dst, src, len);
2418                         src += stride;
2419                         dst += stride;
2420                     }
2421                 }
2422             }
2423         }
2424     }
2425 }
2426
2427 static int set_side_data(HEVCContext *s)
2428 {
2429     AVFrame *out = s->ref->frame;
2430
2431     if (s->sei_frame_packing_present &&
2432         s->frame_packing_arrangement_type >= 3 &&
2433         s->frame_packing_arrangement_type <= 5 &&
2434         s->content_interpretation_type > 0 &&
2435         s->content_interpretation_type < 3) {
2436         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2437         if (!stereo)
2438             return AVERROR(ENOMEM);
2439
2440         switch (s->frame_packing_arrangement_type) {
2441         case 3:
2442             if (s->quincunx_subsampling)
2443                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2444             else
2445                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2446             break;
2447         case 4:
2448             stereo->type = AV_STEREO3D_TOPBOTTOM;
2449             break;
2450         case 5:
2451             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2452             break;
2453         }
2454
2455         if (s->content_interpretation_type == 2)
2456             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2457     }
2458
2459     return 0;
2460 }
2461
2462 static int hevc_frame_start(HEVCContext *s)
2463 {
2464     HEVCLocalContext *lc = &s->HEVClc;
2465     int ret;
2466
2467     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2468     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2469     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2470     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2471
2472     lc->start_of_tiles_x = 0;
2473     s->is_decoded        = 0;
2474
2475     if (s->pps->tiles_enabled_flag)
2476         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2477
2478     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2479                               s->poc);
2480     if (ret < 0)
2481         goto fail;
2482
2483     ret = ff_hevc_frame_rps(s);
2484     if (ret < 0) {
2485         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2486         goto fail;
2487     }
2488
2489     ret = set_side_data(s);
2490     if (ret < 0)
2491         goto fail;
2492
2493     av_frame_unref(s->output_frame);
2494     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2495     if (ret < 0)
2496         goto fail;
2497
2498     ff_thread_finish_setup(s->avctx);
2499
2500     return 0;
2501
2502 fail:
2503     if (s->ref)
2504         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2505     s->ref = NULL;
2506     return ret;
2507 }
2508
2509 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2510 {
2511     HEVCLocalContext *lc = &s->HEVClc;
2512     GetBitContext *gb    = &lc->gb;
2513     int ctb_addr_ts, ret;
2514
2515     ret = init_get_bits8(gb, nal, length);
2516     if (ret < 0)
2517         return ret;
2518
2519     ret = hls_nal_unit(s);
2520     if (ret < 0) {
2521         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2522                s->nal_unit_type);
2523         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2524             return ret;
2525         return 0;
2526     } else if (!ret)
2527         return 0;
2528
2529     switch (s->nal_unit_type) {
2530     case NAL_VPS:
2531         ret = ff_hevc_decode_nal_vps(s);
2532         if (ret < 0)
2533             return ret;
2534         break;
2535     case NAL_SPS:
2536         ret = ff_hevc_decode_nal_sps(s);
2537         if (ret < 0)
2538             return ret;
2539         break;
2540     case NAL_PPS:
2541         ret = ff_hevc_decode_nal_pps(s);
2542         if (ret < 0)
2543             return ret;
2544         break;
2545     case NAL_SEI_PREFIX:
2546     case NAL_SEI_SUFFIX:
2547         ret = ff_hevc_decode_nal_sei(s);
2548         if (ret < 0)
2549             return ret;
2550         break;
2551     case NAL_TRAIL_R:
2552     case NAL_TRAIL_N:
2553     case NAL_TSA_N:
2554     case NAL_TSA_R:
2555     case NAL_STSA_N:
2556     case NAL_STSA_R:
2557     case NAL_BLA_W_LP:
2558     case NAL_BLA_W_RADL:
2559     case NAL_BLA_N_LP:
2560     case NAL_IDR_W_RADL:
2561     case NAL_IDR_N_LP:
2562     case NAL_CRA_NUT:
2563     case NAL_RADL_N:
2564     case NAL_RADL_R:
2565     case NAL_RASL_N:
2566     case NAL_RASL_R:
2567         ret = hls_slice_header(s);
2568         if (ret < 0)
2569             return ret;
2570
2571         if (s->max_ra == INT_MAX) {
2572             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2573                 s->max_ra = s->poc;
2574             } else {
2575                 if (IS_IDR(s))
2576                     s->max_ra = INT_MIN;
2577             }
2578         }
2579
2580         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2581             s->poc <= s->max_ra) {
2582             s->is_decoded = 0;
2583             break;
2584         } else {
2585             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2586                 s->max_ra = INT_MIN;
2587         }
2588
2589         if (s->sh.first_slice_in_pic_flag) {
2590             ret = hevc_frame_start(s);
2591             if (ret < 0)
2592                 return ret;
2593         } else if (!s->ref) {
2594             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2595             return AVERROR_INVALIDDATA;
2596         }
2597
2598         if (!s->sh.dependent_slice_segment_flag &&
2599             s->sh.slice_type != I_SLICE) {
2600             ret = ff_hevc_slice_rpl(s);
2601             if (ret < 0) {
2602                 av_log(s->avctx, AV_LOG_WARNING,
2603                        "Error constructing the reference lists for the current slice.\n");
2604                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2605                     return ret;
2606             }
2607         }
2608
2609         ctb_addr_ts = hls_slice_data(s);
2610         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2611             s->is_decoded = 1;
2612             if ((s->pps->transquant_bypass_enable_flag ||
2613                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2614                 s->sps->sao_enabled)
2615                 restore_tqb_pixels(s);
2616         }
2617
2618         if (ctb_addr_ts < 0)
2619             return ctb_addr_ts;
2620         break;
2621     case NAL_EOS_NUT:
2622     case NAL_EOB_NUT:
2623         s->seq_decode = (s->seq_decode + 1) & 0xff;
2624         s->max_ra     = INT_MAX;
2625         break;
2626     case NAL_AUD:
2627     case NAL_FD_NUT:
2628         break;
2629     default:
2630         av_log(s->avctx, AV_LOG_INFO,
2631                "Skipping NAL unit %d\n", s->nal_unit_type);
2632     }
2633
2634     return 0;
2635 }
2636
2637 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2638  * between these functions would be nice. */
2639 static int extract_rbsp(const uint8_t *src, int length,
2640                         HEVCNAL *nal)
2641 {
2642     int i, si, di;
2643     uint8_t *dst;
2644
2645 #define STARTCODE_TEST                                                  \
2646         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2647             if (src[i + 2] != 3) {                                      \
2648                 /* startcode, so we must be past the end */             \
2649                 length = i;                                             \
2650             }                                                           \
2651             break;                                                      \
2652         }
2653 #if HAVE_FAST_UNALIGNED
2654 #define FIND_FIRST_ZERO                                                 \
2655         if (i > 0 && !src[i])                                           \
2656             i--;                                                        \
2657         while (src[i])                                                  \
2658             i++
2659 #if HAVE_FAST_64BIT
2660     for (i = 0; i + 1 < length; i += 9) {
2661         if (!((~AV_RN64A(src + i) &
2662                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2663               0x8000800080008080ULL))
2664             continue;
2665         FIND_FIRST_ZERO;
2666         STARTCODE_TEST;
2667         i -= 7;
2668     }
2669 #else
2670     for (i = 0; i + 1 < length; i += 5) {
2671         if (!((~AV_RN32A(src + i) &
2672                (AV_RN32A(src + i) - 0x01000101U)) &
2673               0x80008080U))
2674             continue;
2675         FIND_FIRST_ZERO;
2676         STARTCODE_TEST;
2677         i -= 3;
2678     }
2679 #endif /* HAVE_FAST_64BIT */
2680 #else
2681     for (i = 0; i + 1 < length; i += 2) {
2682         if (src[i])
2683             continue;
2684         if (i > 0 && src[i - 1] == 0)
2685             i--;
2686         STARTCODE_TEST;
2687     }
2688 #endif /* HAVE_FAST_UNALIGNED */
2689
2690     if (i >= length - 1) { // no escaped 0
2691         nal->data = src;
2692         nal->size = length;
2693         return length;
2694     }
2695
2696     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2697                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2698     if (!nal->rbsp_buffer)
2699         return AVERROR(ENOMEM);
2700
2701     dst = nal->rbsp_buffer;
2702
2703     memcpy(dst, src, i);
2704     si = di = i;
2705     while (si + 2 < length) {
2706         // remove escapes (very rare 1:2^22)
2707         if (src[si + 2] > 3) {
2708             dst[di++] = src[si++];
2709             dst[di++] = src[si++];
2710         } else if (src[si] == 0 && src[si + 1] == 0) {
2711             if (src[si + 2] == 3) { // escape
2712                 dst[di++] = 0;
2713                 dst[di++] = 0;
2714                 si       += 3;
2715
2716                 continue;
2717             } else // next start code
2718                 goto nsc;
2719         }
2720
2721         dst[di++] = src[si++];
2722     }
2723     while (si < length)
2724         dst[di++] = src[si++];
2725
2726 nsc:
2727     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2728
2729     nal->data = dst;
2730     nal->size = di;
2731     return si;
2732 }
2733
2734 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2735 {
2736     int i, consumed, ret = 0;
2737
2738     s->ref = NULL;
2739     s->eos = 0;
2740
2741     /* split the input packet into NAL units, so we know the upper bound on the
2742      * number of slices in the frame */
2743     s->nb_nals = 0;
2744     while (length >= 4) {
2745         HEVCNAL *nal;
2746         int extract_length = 0;
2747
2748         if (s->is_nalff) {
2749             int i;
2750             for (i = 0; i < s->nal_length_size; i++)
2751                 extract_length = (extract_length << 8) | buf[i];
2752             buf    += s->nal_length_size;
2753             length -= s->nal_length_size;
2754
2755             if (extract_length > length) {
2756                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2757                 ret = AVERROR_INVALIDDATA;
2758                 goto fail;
2759             }
2760         } else {
2761             if (buf[2] == 0) {
2762                 length--;
2763                 buf++;
2764                 continue;
2765             }
2766             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2767                 ret = AVERROR_INVALIDDATA;
2768                 goto fail;
2769             }
2770
2771             buf           += 3;
2772             length        -= 3;
2773             extract_length = length;
2774         }
2775
2776         if (s->nals_allocated < s->nb_nals + 1) {
2777             int new_size = s->nals_allocated + 1;
2778             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2779             if (!tmp) {
2780                 ret = AVERROR(ENOMEM);
2781                 goto fail;
2782             }
2783             s->nals = tmp;
2784             memset(s->nals + s->nals_allocated, 0,
2785                    (new_size - s->nals_allocated) * sizeof(*tmp));
2786             s->nals_allocated = new_size;
2787         }
2788         nal = &s->nals[s->nb_nals++];
2789
2790         consumed = extract_rbsp(buf, extract_length, nal);
2791         if (consumed < 0) {
2792             ret = consumed;
2793             goto fail;
2794         }
2795
2796         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2797         if (ret < 0)
2798             goto fail;
2799         hls_nal_unit(s);
2800
2801         if (s->nal_unit_type == NAL_EOB_NUT ||
2802             s->nal_unit_type == NAL_EOS_NUT)
2803             s->eos = 1;
2804
2805         buf    += consumed;
2806         length -= consumed;
2807     }
2808
2809     /* parse the NAL units */
2810     for (i = 0; i < s->nb_nals; i++) {
2811         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2812         if (ret < 0) {
2813             av_log(s->avctx, AV_LOG_WARNING,
2814                    "Error parsing NAL unit #%d.\n", i);
2815             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2816                 goto fail;
2817         }
2818     }
2819
2820 fail:
2821     if (s->ref)
2822         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2823
2824     return ret;
2825 }
2826
2827 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2828 {
2829     int i;
2830     for (i = 0; i < 16; i++)
2831         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2832 }
2833
2834 static int verify_md5(HEVCContext *s, AVFrame *frame)
2835 {
2836     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2837     int pixel_shift;
2838     int i, j;
2839
2840     if (!desc)
2841         return AVERROR(EINVAL);
2842
2843     pixel_shift = desc->comp[0].depth_minus1 > 7;
2844
2845     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2846            s->poc);
2847
2848     /* the checksums are LE, so we have to byteswap for >8bpp formats
2849      * on BE arches */
2850 #if HAVE_BIGENDIAN
2851     if (pixel_shift && !s->checksum_buf) {
2852         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2853                        FFMAX3(frame->linesize[0], frame->linesize[1],
2854                               frame->linesize[2]));
2855         if (!s->checksum_buf)
2856             return AVERROR(ENOMEM);
2857     }
2858 #endif
2859
2860     for (i = 0; frame->data[i]; i++) {
2861         int width  = s->avctx->coded_width;
2862         int height = s->avctx->coded_height;
2863         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2864         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2865         uint8_t md5[16];
2866
2867         av_md5_init(s->md5_ctx);
2868         for (j = 0; j < h; j++) {
2869             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2870 #if HAVE_BIGENDIAN
2871             if (pixel_shift) {
2872                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2873                                    (const uint16_t*)src, w);
2874                 src = s->checksum_buf;
2875             }
2876 #endif
2877             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2878         }
2879         av_md5_final(s->md5_ctx, md5);
2880
2881         if (!memcmp(md5, s->md5[i], 16)) {
2882             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2883             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2884             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2885         } else {
2886             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2887             print_md5(s->avctx, AV_LOG_ERROR, md5);
2888             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2889             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2890             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2891             return AVERROR_INVALIDDATA;
2892         }
2893     }
2894
2895     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2896
2897     return 0;
2898 }
2899
2900 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2901                              AVPacket *avpkt)
2902 {
2903     int ret;
2904     HEVCContext *s = avctx->priv_data;
2905
2906     if (!avpkt->size) {
2907         ret = ff_hevc_output_frame(s, data, 1);
2908         if (ret < 0)
2909             return ret;
2910
2911         *got_output = ret;
2912         return 0;
2913     }
2914
2915     s->ref = NULL;
2916     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2917     if (ret < 0)
2918         return ret;
2919
2920     /* verify the SEI checksum */
2921     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2922         s->is_md5) {
2923         ret = verify_md5(s, s->ref->frame);
2924         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2925             ff_hevc_unref_frame(s, s->ref, ~0);
2926             return ret;
2927         }
2928     }
2929     s->is_md5 = 0;
2930
2931     if (s->is_decoded) {
2932         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2933         s->is_decoded = 0;
2934     }
2935
2936     if (s->output_frame->buf[0]) {
2937         av_frame_move_ref(data, s->output_frame);
2938         *got_output = 1;
2939     }
2940
2941     return avpkt->size;
2942 }
2943
2944 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2945 {
2946     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2947     if (ret < 0)
2948         return ret;
2949
2950     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2951     if (!dst->tab_mvf_buf)
2952         goto fail;
2953     dst->tab_mvf = src->tab_mvf;
2954
2955     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2956     if (!dst->rpl_tab_buf)
2957         goto fail;
2958     dst->rpl_tab = src->rpl_tab;
2959
2960     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2961     if (!dst->rpl_buf)
2962         goto fail;
2963
2964     dst->poc        = src->poc;
2965     dst->ctb_count  = src->ctb_count;
2966     dst->window     = src->window;
2967     dst->flags      = src->flags;
2968     dst->sequence   = src->sequence;
2969
2970     return 0;
2971 fail:
2972     ff_hevc_unref_frame(s, dst, ~0);
2973     return AVERROR(ENOMEM);
2974 }
2975
2976 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2977 {
2978     HEVCContext       *s = avctx->priv_data;
2979     int i;
2980
2981     pic_arrays_free(s);
2982
2983     av_freep(&s->md5_ctx);
2984
2985     av_frame_free(&s->tmp_frame);
2986     av_frame_free(&s->output_frame);
2987
2988     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2989         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2990         av_frame_free(&s->DPB[i].frame);
2991     }
2992
2993     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2994         av_buffer_unref(&s->vps_list[i]);
2995     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2996         av_buffer_unref(&s->sps_list[i]);
2997     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2998         av_buffer_unref(&s->pps_list[i]);
2999
3000     for (i = 0; i < s->nals_allocated; i++)
3001         av_freep(&s->nals[i].rbsp_buffer);
3002     av_freep(&s->nals);
3003     s->nals_allocated = 0;
3004
3005     return 0;
3006 }
3007
3008 static av_cold int hevc_init_context(AVCodecContext *avctx)
3009 {
3010     HEVCContext *s = avctx->priv_data;
3011     int i;
3012
3013     s->avctx = avctx;
3014
3015     s->tmp_frame = av_frame_alloc();
3016     if (!s->tmp_frame)
3017         goto fail;
3018
3019     s->output_frame = av_frame_alloc();
3020     if (!s->output_frame)
3021         goto fail;
3022
3023     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3024         s->DPB[i].frame = av_frame_alloc();
3025         if (!s->DPB[i].frame)
3026             goto fail;
3027         s->DPB[i].tf.f = s->DPB[i].frame;
3028     }
3029
3030     s->max_ra = INT_MAX;
3031
3032     s->md5_ctx = av_md5_alloc();
3033     if (!s->md5_ctx)
3034         goto fail;
3035
3036     ff_dsputil_init(&s->dsp, avctx);
3037
3038     s->context_initialized = 1;
3039
3040     return 0;
3041
3042 fail:
3043     hevc_decode_free(avctx);
3044     return AVERROR(ENOMEM);
3045 }
3046
3047 static int hevc_update_thread_context(AVCodecContext *dst,
3048                                       const AVCodecContext *src)
3049 {
3050     HEVCContext *s  = dst->priv_data;
3051     HEVCContext *s0 = src->priv_data;
3052     int i, ret;
3053
3054     if (!s->context_initialized) {
3055         ret = hevc_init_context(dst);
3056         if (ret < 0)
3057             return ret;
3058     }
3059
3060     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3061         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3062         if (s0->DPB[i].frame->buf[0]) {
3063             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3064             if (ret < 0)
3065                 return ret;
3066         }
3067     }
3068
3069     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3070         av_buffer_unref(&s->vps_list[i]);
3071         if (s0->vps_list[i]) {
3072             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3073             if (!s->vps_list[i])
3074                 return AVERROR(ENOMEM);
3075         }
3076     }
3077
3078     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3079         av_buffer_unref(&s->sps_list[i]);
3080         if (s0->sps_list[i]) {
3081             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3082             if (!s->sps_list[i])
3083                 return AVERROR(ENOMEM);
3084         }
3085     }
3086
3087     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3088         av_buffer_unref(&s->pps_list[i]);
3089         if (s0->pps_list[i]) {
3090             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3091             if (!s->pps_list[i])
3092                 return AVERROR(ENOMEM);
3093         }
3094     }
3095
3096     if (s->sps != s0->sps)
3097         ret = set_sps(s, s0->sps);
3098
3099     s->seq_decode = s0->seq_decode;
3100     s->seq_output = s0->seq_output;
3101     s->pocTid0    = s0->pocTid0;
3102     s->max_ra     = s0->max_ra;
3103
3104     s->is_nalff        = s0->is_nalff;
3105     s->nal_length_size = s0->nal_length_size;
3106
3107     if (s0->eos) {
3108         s->seq_decode = (s->seq_decode + 1) & 0xff;
3109         s->max_ra = INT_MAX;
3110     }
3111
3112     return 0;
3113 }
3114
3115 static int hevc_decode_extradata(HEVCContext *s)
3116 {
3117     AVCodecContext *avctx = s->avctx;
3118     GetByteContext gb;
3119     int ret;
3120
3121     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3122
3123     if (avctx->extradata_size > 3 &&
3124         (avctx->extradata[0] || avctx->extradata[1] ||
3125          avctx->extradata[2] > 1)) {
3126         /* It seems the extradata is encoded as hvcC format.
3127          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3128          * is finalized. When finalized, configurationVersion will be 1 and we
3129          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3130         int i, j, num_arrays, nal_len_size;
3131
3132         s->is_nalff = 1;
3133
3134         bytestream2_skip(&gb, 21);
3135         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3136         num_arrays   = bytestream2_get_byte(&gb);
3137
3138         /* nal units in the hvcC always have length coded with 2 bytes,
3139          * so put a fake nal_length_size = 2 while parsing them */
3140         s->nal_length_size = 2;
3141
3142         /* Decode nal units from hvcC. */
3143         for (i = 0; i < num_arrays; i++) {
3144             int type = bytestream2_get_byte(&gb) & 0x3f;
3145             int cnt  = bytestream2_get_be16(&gb);
3146
3147             for (j = 0; j < cnt; j++) {
3148                 // +2 for the nal size field
3149                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3150                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3151                     av_log(s->avctx, AV_LOG_ERROR,
3152                            "Invalid NAL unit size in extradata.\n");
3153                     return AVERROR_INVALIDDATA;
3154                 }
3155
3156                 ret = decode_nal_units(s, gb.buffer, nalsize);
3157                 if (ret < 0) {
3158                     av_log(avctx, AV_LOG_ERROR,
3159                            "Decoding nal unit %d %d from hvcC failed\n",
3160                            type, i);
3161                     return ret;
3162                 }
3163                 bytestream2_skip(&gb, nalsize);
3164             }
3165         }
3166
3167         /* Now store right nal length size, that will be used to parse
3168          * all other nals */
3169         s->nal_length_size = nal_len_size;
3170     } else {
3171         s->is_nalff = 0;
3172         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3173         if (ret < 0)
3174             return ret;
3175     }
3176     return 0;
3177 }
3178
3179 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3180 {
3181     HEVCContext *s = avctx->priv_data;
3182     int ret;
3183
3184     ff_init_cabac_states();
3185
3186     avctx->internal->allocate_progress = 1;
3187
3188     ret = hevc_init_context(avctx);
3189     if (ret < 0)
3190         return ret;
3191
3192     if (avctx->extradata_size > 0 && avctx->extradata) {
3193         ret = hevc_decode_extradata(s);
3194         if (ret < 0) {
3195             hevc_decode_free(avctx);
3196             return ret;
3197         }
3198     }
3199
3200     return 0;
3201 }
3202
3203 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3204 {
3205     HEVCContext *s = avctx->priv_data;
3206     int ret;
3207
3208     memset(s, 0, sizeof(*s));
3209
3210     ret = hevc_init_context(avctx);
3211     if (ret < 0)
3212         return ret;
3213
3214     return 0;
3215 }
3216
3217 static void hevc_decode_flush(AVCodecContext *avctx)
3218 {
3219     HEVCContext *s = avctx->priv_data;
3220     ff_hevc_flush_dpb(s);
3221     s->max_ra = INT_MAX;
3222 }
3223
3224 #define OFFSET(x) offsetof(HEVCContext, x)
3225 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3226
3227 static const AVProfile profiles[] = {
3228     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3229     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3230     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3231     { FF_PROFILE_UNKNOWN },
3232 };
3233
3234 static const AVOption options[] = {
3235     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3236         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3237     { NULL },
3238 };
3239
3240 static const AVClass hevc_decoder_class = {
3241     .class_name = "HEVC decoder",
3242     .item_name  = av_default_item_name,
3243     .option     = options,
3244     .version    = LIBAVUTIL_VERSION_INT,
3245 };
3246
3247 AVCodec ff_hevc_decoder = {
3248     .name                  = "hevc",
3249     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3250     .type                  = AVMEDIA_TYPE_VIDEO,
3251     .id                    = AV_CODEC_ID_HEVC,
3252     .priv_data_size        = sizeof(HEVCContext),
3253     .priv_class            = &hevc_decoder_class,
3254     .init                  = hevc_decode_init,
3255     .close                 = hevc_decode_free,
3256     .decode                = hevc_decode_frame,
3257     .flush                 = hevc_decode_flush,
3258     .update_thread_context = hevc_update_thread_context,
3259     .init_thread_copy      = hevc_init_thread_copy,
3260     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3261                              CODEC_CAP_FRAME_THREADS,
3262     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3263 };