git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/stereo3d.h"
  33
  34 #include "bytestream.h"
  35 #include "cabac_functions.h"
  36 #include "dsputil.h"
  37 #include "golomb.h"
  38 #include "hevc.h"
  39
  40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  43
  44 static const uint8_t scan_1x1[1] = { 0 };
  45
  46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  47
  48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  49
  50 static const uint8_t horiz_scan4x4_x[16] = {
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55 };
  56
  57 static const uint8_t horiz_scan4x4_y[16] = {
  58     0, 0, 0, 0,
  59     1, 1, 1, 1,
  60     2, 2, 2, 2,
  61     3, 3, 3, 3,
  62 };
  63
  64 static const uint8_t horiz_scan8x8_inv[8][8] = {
  65     {  0,  1,  2,  3, 16, 17, 18, 19, },
  66     {  4,  5,  6,  7, 20, 21, 22, 23, },
  67     {  8,  9, 10, 11, 24, 25, 26, 27, },
  68     { 12, 13, 14, 15, 28, 29, 30, 31, },
  69     { 32, 33, 34, 35, 48, 49, 50, 51, },
  70     { 36, 37, 38, 39, 52, 53, 54, 55, },
  71     { 40, 41, 42, 43, 56, 57, 58, 59, },
  72     { 44, 45, 46, 47, 60, 61, 62, 63, },
  73 };
  74
  75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  76
  77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  78
  79 static const uint8_t diag_scan2x2_inv[2][2] = {
  80     { 0, 2, },
  81     { 1, 3, },
  82 };
  83
  84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  85     0, 0, 1, 0,
  86     1, 2, 0, 1,
  87     2, 3, 1, 2,
  88     3, 2, 3, 3,
  89 };
  90
  91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  92     0, 1, 0, 2,
  93     1, 0, 3, 2,
  94     1, 0, 3, 2,
  95     1, 3, 2, 3,
  96 };
  97
  98 static const uint8_t diag_scan4x4_inv[4][4] = {
  99     { 0,  2,  5,  9, },
 100     { 1,  4,  8, 12, },
 101     { 3,  7, 11, 14, },
 102     { 6, 10, 13, 15, },
 103 };
 104
 105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 106     0, 0, 1, 0,
 107     1, 2, 0, 1,
 108     2, 3, 0, 1,
 109     2, 3, 4, 0,
 110     1, 2, 3, 4,
 111     5, 0, 1, 2,
 112     3, 4, 5, 6,
 113     0, 1, 2, 3,
 114     4, 5, 6, 7,
 115     1, 2, 3, 4,
 116     5, 6, 7, 2,
 117     3, 4, 5, 6,
 118     7, 3, 4, 5,
 119     6, 7, 4, 5,
 120     6, 7, 5, 6,
 121     7, 6, 7, 7,
 122 };
 123
 124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 125     0, 1, 0, 2,
 126     1, 0, 3, 2,
 127     1, 0, 4, 3,
 128     2, 1, 0, 5,
 129     4, 3, 2, 1,
 130     0, 6, 5, 4,
 131     3, 2, 1, 0,
 132     7, 6, 5, 4,
 133     3, 2, 1, 0,
 134     7, 6, 5, 4,
 135     3, 2, 1, 7,
 136     6, 5, 4, 3,
 137     2, 7, 6, 5,
 138     4, 3, 7, 6,
 139     5, 4, 7, 6,
 140     5, 7, 6, 7,
 141 };
 142
 143 static const uint8_t diag_scan8x8_inv[8][8] = {
 144     {  0,  2,  5,  9, 14, 20, 27, 35, },
 145     {  1,  4,  8, 13, 19, 26, 34, 42, },
 146     {  3,  7, 12, 18, 25, 33, 41, 48, },
 147     {  6, 11, 17, 24, 32, 40, 47, 53, },
 148     { 10, 16, 23, 31, 39, 46, 52, 57, },
 149     { 15, 22, 30, 38, 45, 51, 56, 60, },
 150     { 21, 29, 37, 44, 50, 55, 59, 62, },
 151     { 28, 36, 43, 49, 54, 58, 61, 63, },
 152 };
 153
 154 /**
 155  * NOTE: Each function hls_foo correspond to the function foo in the
 156  * specification (HLS stands for High Level Syntax).
 157  */
 158
 159 /**
 160  * Section 5.7
 161  */
 162
 163 /* free everything allocated  by pic_arrays_init() */
 164 static void pic_arrays_free(HEVCContext *s)
 165 {
 166     av_freep(&s->sao);
 167     av_freep(&s->deblock);
 168     av_freep(&s->split_cu_flag);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size         = width * height;
 195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 196                            ((height >> log2_min_cb_size) + 1);
 197     int ctb_count        = sps->ctb_width * sps->ctb_height;
 198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 199
 200     s->bs_width  = width  >> 3;
 201     s->bs_height = height >> 3;
 202
 203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 205     s->split_cu_flag = av_malloc(pic_size);
 206     if (!s->sao || !s->deblock || !s->split_cu_flag)
 207         goto fail;
 208
 209     s->skip_flag    = av_malloc(pic_size_in_ctb);
 210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 211     if (!s->skip_flag || !s->tab_ct_depth)
 212         goto fail;
 213
 214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 215     s->tab_ipm  = av_malloc(min_pu_size);
 216     s->is_pcm   = av_malloc(min_pu_size);
 217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 218         goto fail;
 219
 220     s->filter_slice_edges = av_malloc(ctb_count);
 221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->tab_slice_address));
 223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 224                                       sizeof(*s->qp_y_tab));
 225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 226         goto fail;
 227
 228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     if (!s->horizontal_bs || !s->vertical_bs)
 231         goto fail;
 232
 233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 234                                           av_buffer_alloc);
 235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 236                                           av_buffer_allocz);
 237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 238         goto fail;
 239
 240     return 0;
 241
 242 fail:
 243     pic_arrays_free(s);
 244     return AVERROR(ENOMEM);
 245 }
 246
 247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 248 {
 249     int i = 0;
 250     int j = 0;
 251     uint8_t luma_weight_l0_flag[16];
 252     uint8_t chroma_weight_l0_flag[16];
 253     uint8_t luma_weight_l1_flag[16];
 254     uint8_t chroma_weight_l1_flag[16];
 255
 256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 257     if (s->sps->chroma_format_idc != 0) {
 258         int delta = get_se_golomb(gb);
 259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 260     }
 261
 262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 263         luma_weight_l0_flag[i] = get_bits1(gb);
 264         if (!luma_weight_l0_flag[i]) {
 265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 266             s->sh.luma_offset_l0[i] = 0;
 267         }
 268     }
 269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 271             chroma_weight_l0_flag[i] = get_bits1(gb);
 272     } else {
 273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 274             chroma_weight_l0_flag[i] = 0;
 275     }
 276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 277         if (luma_weight_l0_flag[i]) {
 278             int delta_luma_weight_l0 = get_se_golomb(gb);
 279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 281         }
 282         if (chroma_weight_l0_flag[i]) {
 283             for (j = 0; j < 2; j++) {
 284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 289             }
 290         } else {
 291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][0] = 0;
 293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 294             s->sh.chroma_offset_l0[i][1] = 0;
 295         }
 296     }
 297     if (s->sh.slice_type == B_SLICE) {
 298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 299             luma_weight_l1_flag[i] = get_bits1(gb);
 300             if (!luma_weight_l1_flag[i]) {
 301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 302                 s->sh.luma_offset_l1[i] = 0;
 303             }
 304         }
 305         if (s->sps->chroma_format_idc != 0) {
 306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 307                 chroma_weight_l1_flag[i] = get_bits1(gb);
 308         } else {
 309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 310                 chroma_weight_l1_flag[i] = 0;
 311         }
 312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 313             if (luma_weight_l1_flag[i]) {
 314                 int delta_luma_weight_l1 = get_se_golomb(gb);
 315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 317             }
 318             if (chroma_weight_l1_flag[i]) {
 319                 for (j = 0; j < 2; j++) {
 320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 325                 }
 326             } else {
 327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][0] = 0;
 329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 330                 s->sh.chroma_offset_l1[i][1] = 0;
 331             }
 332         }
 333     }
 334 }
 335
 336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 337 {
 338     const HEVCSPS *sps = s->sps;
 339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 340     int prev_delta_msb = 0;
 341     unsigned int nb_sps = 0, nb_sh;
 342     int i;
 343
 344     rps->nb_refs = 0;
 345     if (!sps->long_term_ref_pics_present_flag)
 346         return 0;
 347
 348     if (sps->num_long_term_ref_pics_sps > 0)
 349         nb_sps = get_ue_golomb_long(gb);
 350     nb_sh = get_ue_golomb_long(gb);
 351
 352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 353         return AVERROR_INVALIDDATA;
 354
 355     rps->nb_refs = nb_sh + nb_sps;
 356
 357     for (i = 0; i < rps->nb_refs; i++) {
 358         uint8_t delta_poc_msb_present;
 359
 360         if (i < nb_sps) {
 361             uint8_t lt_idx_sps = 0;
 362
 363             if (sps->num_long_term_ref_pics_sps > 1)
 364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 365
 366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 368         } else {
 369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 370             rps->used[i] = get_bits1(gb);
 371         }
 372
 373         delta_poc_msb_present = get_bits1(gb);
 374         if (delta_poc_msb_present) {
 375             int delta = get_ue_golomb_long(gb);
 376
 377             if (i && i != nb_sps)
 378                 delta += prev_delta_msb;
 379
 380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 381             prev_delta_msb = delta;
 382         }
 383     }
 384
 385     return 0;
 386 }
 387
 388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 389 {
 390     int ret;
 391     int num = 0, den = 0;
 392
 393     pic_arrays_free(s);
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     s->avctx->coded_width         = sps->width;
 399     s->avctx->coded_height        = sps->height;
 400     s->avctx->width               = sps->output_width;
 401     s->avctx->height              = sps->output_height;
 402     s->avctx->pix_fmt             = sps->pix_fmt;
 403     s->avctx->sample_aspect_ratio = sps->vui.sar;
 404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 405
 406     if (sps->vui.video_signal_type_present_flag)
 407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 408                                                                : AVCOL_RANGE_MPEG;
 409     else
 410         s->avctx->color_range = AVCOL_RANGE_MPEG;
 411
 412     if (sps->vui.colour_description_present_flag) {
 413         s->avctx->color_primaries = sps->vui.colour_primaries;
 414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 416     } else {
 417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 420     }
 421
 422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 425
 426     if (sps->sao_enabled) {
 427         av_frame_unref(s->tmp_frame);
 428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 429         if (ret < 0)
 430             goto fail;
 431         s->frame = s->tmp_frame;
 432     }
 433
 434     s->sps = sps;
 435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 436
 437     if (s->vps->vps_timing_info_present_flag) {
 438         num = s->vps->vps_num_units_in_tick;
 439         den = s->vps->vps_time_scale;
 440     } else if (sps->vui.vui_timing_info_present_flag) {
 441         num = sps->vui.vui_num_units_in_tick;
 442         den = sps->vui.vui_time_scale;
 443     }
 444
 445     if (num != 0 && den != 0)
 446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 447                   num, den, 1 << 30);
 448
 449     return 0;
 450
 451 fail:
 452     pic_arrays_free(s);
 453     s->sps = NULL;
 454     return ret;
 455 }
 456
 457 static int hls_slice_header(HEVCContext *s)
 458 {
 459     GetBitContext *gb = &s->HEVClc.gb;
 460     SliceHeader *sh   = &s->sh;
 461     int i, ret;
 462
 463     // Coded parameters
 464     sh->first_slice_in_pic_flag = get_bits1(gb);
 465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 466         s->seq_decode = (s->seq_decode + 1) & 0xff;
 467         s->max_ra     = INT_MAX;
 468         if (IS_IDR(s))
 469             ff_hevc_clear_refs(s);
 470     }
 471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 473
 474     sh->pps_id = get_ue_golomb_long(gb);
 475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 477         return AVERROR_INVALIDDATA;
 478     }
 479     if (!sh->first_slice_in_pic_flag &&
 480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 482         return AVERROR_INVALIDDATA;
 483     }
 484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 485
 486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 488
 489         ff_hevc_clear_refs(s);
 490         ret = set_sps(s, s->sps);
 491         if (ret < 0)
 492             return ret;
 493
 494         s->seq_decode = (s->seq_decode + 1) & 0xff;
 495         s->max_ra     = INT_MAX;
 496     }
 497
 498     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 499     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 500
 501     sh->dependent_slice_segment_flag = 0;
 502     if (!sh->first_slice_in_pic_flag) {
 503         int slice_address_length;
 504
 505         if (s->pps->dependent_slice_segments_enabled_flag)
 506             sh->dependent_slice_segment_flag = get_bits1(gb);
 507
 508         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 509                                             s->sps->ctb_height);
 510         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 511         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 512             av_log(s->avctx, AV_LOG_ERROR,
 513                    "Invalid slice segment address: %u.\n",
 514                    sh->slice_segment_addr);
 515             return AVERROR_INVALIDDATA;
 516         }
 517
 518         if (!sh->dependent_slice_segment_flag) {
 519             sh->slice_addr = sh->slice_segment_addr;
 520             s->slice_idx++;
 521         }
 522     } else {
 523         sh->slice_segment_addr = sh->slice_addr = 0;
 524         s->slice_idx           = 0;
 525         s->slice_initialized   = 0;
 526     }
 527
 528     if (!sh->dependent_slice_segment_flag) {
 529         s->slice_initialized = 0;
 530
 531         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 532             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 533
 534         sh->slice_type = get_ue_golomb_long(gb);
 535         if (!(sh->slice_type == I_SLICE ||
 536               sh->slice_type == P_SLICE ||
 537               sh->slice_type == B_SLICE)) {
 538             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 539                    sh->slice_type);
 540             return AVERROR_INVALIDDATA;
 541         }
 542         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 543             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 544             return AVERROR_INVALIDDATA;
 545         }
 546
 547         if (s->pps->output_flag_present_flag)
 548             sh->pic_output_flag = get_bits1(gb);
 549
 550         if (s->sps->separate_colour_plane_flag)
 551             sh->colour_plane_id = get_bits(gb, 2);
 552
 553         if (!IS_IDR(s)) {
 554             int short_term_ref_pic_set_sps_flag, poc;
 555
 556             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 557             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 558             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 559                 av_log(s->avctx, AV_LOG_WARNING,
 560                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 561                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 562                     return AVERROR_INVALIDDATA;
 563                 poc = s->poc;
 564             }
 565             s->poc = poc;
 566
 567             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 568             if (!short_term_ref_pic_set_sps_flag) {
 569                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 570                 if (ret < 0)
 571                     return ret;
 572
 573                 sh->short_term_rps = &sh->slice_rps;
 574             } else {
 575                 int numbits, rps_idx;
 576
 577                 if (!s->sps->nb_st_rps) {
 578                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 579                     return AVERROR_INVALIDDATA;
 580                 }
 581
 582                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 583                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 584                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 585             }
 586
 587             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 588             if (ret < 0) {
 589                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 590                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 591                     return AVERROR_INVALIDDATA;
 592             }
 593
 594             if (s->sps->sps_temporal_mvp_enabled_flag)
 595                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 596             else
 597                 sh->slice_temporal_mvp_enabled_flag = 0;
 598         } else {
 599             s->sh.short_term_rps = NULL;
 600             s->poc               = 0;
 601         }
 602
 603         /* 8.3.1 */
 604         if (s->temporal_id == 0 &&
 605             s->nal_unit_type != NAL_TRAIL_N &&
 606             s->nal_unit_type != NAL_TSA_N   &&
 607             s->nal_unit_type != NAL_STSA_N  &&
 608             s->nal_unit_type != NAL_RADL_N  &&
 609             s->nal_unit_type != NAL_RADL_R  &&
 610             s->nal_unit_type != NAL_RASL_N  &&
 611             s->nal_unit_type != NAL_RASL_R)
 612             s->pocTid0 = s->poc;
 613
 614         if (s->sps->sao_enabled) {
 615             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 616             sh->slice_sample_adaptive_offset_flag[1] =
 617             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 618         } else {
 619             sh->slice_sample_adaptive_offset_flag[0] = 0;
 620             sh->slice_sample_adaptive_offset_flag[1] = 0;
 621             sh->slice_sample_adaptive_offset_flag[2] = 0;
 622         }
 623
 624         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 625         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 626             int nb_refs;
 627
 628             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 629             if (sh->slice_type == B_SLICE)
 630                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 631
 632             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 633                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 634                 if (sh->slice_type == B_SLICE)
 635                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 636             }
 637             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 638                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 639                        sh->nb_refs[L0], sh->nb_refs[L1]);
 640                 return AVERROR_INVALIDDATA;
 641             }
 642
 643             sh->rpl_modification_flag[0] = 0;
 644             sh->rpl_modification_flag[1] = 0;
 645             nb_refs = ff_hevc_frame_nb_refs(s);
 646             if (!nb_refs) {
 647                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 648                 return AVERROR_INVALIDDATA;
 649             }
 650
 651             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 652                 sh->rpl_modification_flag[0] = get_bits1(gb);
 653                 if (sh->rpl_modification_flag[0]) {
 654                     for (i = 0; i < sh->nb_refs[L0]; i++)
 655                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 656                 }
 657
 658                 if (sh->slice_type == B_SLICE) {
 659                     sh->rpl_modification_flag[1] = get_bits1(gb);
 660                     if (sh->rpl_modification_flag[1] == 1)
 661                         for (i = 0; i < sh->nb_refs[L1]; i++)
 662                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 663                 }
 664             }
 665
 666             if (sh->slice_type == B_SLICE)
 667                 sh->mvd_l1_zero_flag = get_bits1(gb);
 668
 669             if (s->pps->cabac_init_present_flag)
 670                 sh->cabac_init_flag = get_bits1(gb);
 671             else
 672                 sh->cabac_init_flag = 0;
 673
 674             sh->collocated_ref_idx = 0;
 675             if (sh->slice_temporal_mvp_enabled_flag) {
 676                 sh->collocated_list = L0;
 677                 if (sh->slice_type == B_SLICE)
 678                     sh->collocated_list = !get_bits1(gb);
 679
 680                 if (sh->nb_refs[sh->collocated_list] > 1) {
 681                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 682                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 683                         av_log(s->avctx, AV_LOG_ERROR,
 684                                "Invalid collocated_ref_idx: %d.\n",
 685                                sh->collocated_ref_idx);
 686                         return AVERROR_INVALIDDATA;
 687                     }
 688                 }
 689             }
 690
 691             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 692                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 693                 pred_weight_table(s, gb);
 694             }
 695
 696             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 697             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 698                 av_log(s->avctx, AV_LOG_ERROR,
 699                        "Invalid number of merging MVP candidates: %d.\n",
 700                        sh->max_num_merge_cand);
 701                 return AVERROR_INVALIDDATA;
 702             }
 703         }
 704
 705         sh->slice_qp_delta = get_se_golomb(gb);
 706         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 707             sh->slice_cb_qp_offset = get_se_golomb(gb);
 708             sh->slice_cr_qp_offset = get_se_golomb(gb);
 709         } else {
 710             sh->slice_cb_qp_offset = 0;
 711             sh->slice_cr_qp_offset = 0;
 712         }
 713
 714         if (s->pps->deblocking_filter_control_present_flag) {
 715             int deblocking_filter_override_flag = 0;
 716
 717             if (s->pps->deblocking_filter_override_enabled_flag)
 718                 deblocking_filter_override_flag = get_bits1(gb);
 719
 720             if (deblocking_filter_override_flag) {
 721                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 722                 if (!sh->disable_deblocking_filter_flag) {
 723                     sh->beta_offset = get_se_golomb(gb) * 2;
 724                     sh->tc_offset   = get_se_golomb(gb) * 2;
 725                 }
 726             } else {
 727                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 728                 sh->beta_offset                    = s->pps->beta_offset;
 729                 sh->tc_offset                      = s->pps->tc_offset;
 730             }
 731         } else {
 732             sh->disable_deblocking_filter_flag = 0;
 733             sh->beta_offset                    = 0;
 734             sh->tc_offset                      = 0;
 735         }
 736
 737         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 738             (sh->slice_sample_adaptive_offset_flag[0] ||
 739              sh->slice_sample_adaptive_offset_flag[1] ||
 740              !sh->disable_deblocking_filter_flag)) {
 741             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 742         } else {
 743             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 744         }
 745     } else if (!s->slice_initialized) {
 746         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 747         return AVERROR_INVALIDDATA;
 748     }
 749
 750     sh->num_entry_point_offsets = 0;
 751     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 752         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 753         if (sh->num_entry_point_offsets > 0) {
 754             int offset_len = get_ue_golomb_long(gb) + 1;
 755
 756             for (i = 0; i < sh->num_entry_point_offsets; i++)
 757                 skip_bits(gb, offset_len);
 758         }
 759     }
 760
 761     if (s->pps->slice_header_extension_present_flag) {
 762         unsigned int length = get_ue_golomb_long(gb);
 763         for (i = 0; i < length; i++)
 764             skip_bits(gb, 8);  // slice_header_extension_data_byte
 765     }
 766
 767     // Inferred parameters
 768     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 769     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 770
 771     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 772
 773     if (!s->pps->cu_qp_delta_enabled_flag)
 774         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 775                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 776
 777     s->slice_initialized = 1;
 778
 779     return 0;
 780 }
 781
 782 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 783
 784 #define SET_SAO(elem, value)                            \
 785 do {                                                    \
 786     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 787         sao->elem = value;                              \
 788     else if (sao_merge_left_flag)                       \
 789         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 790     else if (sao_merge_up_flag)                         \
 791         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 792     else                                                \
 793         sao->elem = 0;                                  \
 794 } while (0)
 795
 796 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 797 {
 798     HEVCLocalContext *lc    = &s->HEVClc;
 799     int sao_merge_left_flag = 0;
 800     int sao_merge_up_flag   = 0;
 801     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 802     SAOParams *sao          = &CTB(s->sao, rx, ry);
 803     int c_idx, i;
 804
 805     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 806         s->sh.slice_sample_adaptive_offset_flag[1]) {
 807         if (rx > 0) {
 808             if (lc->ctb_left_flag)
 809                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 810         }
 811         if (ry > 0 && !sao_merge_left_flag) {
 812             if (lc->ctb_up_flag)
 813                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 814         }
 815     }
 816
 817     for (c_idx = 0; c_idx < 3; c_idx++) {
 818         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 819             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 820             continue;
 821         }
 822
 823         if (c_idx == 2) {
 824             sao->type_idx[2] = sao->type_idx[1];
 825             sao->eo_class[2] = sao->eo_class[1];
 826         } else {
 827             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 828         }
 829
 830         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 831             continue;
 832
 833         for (i = 0; i < 4; i++)
 834             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 835
 836         if (sao->type_idx[c_idx] == SAO_BAND) {
 837             for (i = 0; i < 4; i++) {
 838                 if (sao->offset_abs[c_idx][i]) {
 839                     SET_SAO(offset_sign[c_idx][i],
 840                             ff_hevc_sao_offset_sign_decode(s));
 841                 } else {
 842                     sao->offset_sign[c_idx][i] = 0;
 843                 }
 844             }
 845             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 846         } else if (c_idx != 2) {
 847             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 848         }
 849
 850         // Inferred parameters
 851         sao->offset_val[c_idx][0] = 0;
 852         for (i = 0; i < 4; i++) {
 853             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 854             if (sao->type_idx[c_idx] == SAO_EDGE) {
 855                 if (i > 1)
 856                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 857             } else if (sao->offset_sign[c_idx][i]) {
 858                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 859             }
 860         }
 861     }
 862 }
 863
 864 #undef SET_SAO
 865 #undef CTB
 866
 867 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 868                                 int log2_trafo_size, enum ScanType scan_idx,
 869                                 int c_idx)
 870 {
 871 #define GET_COORD(offset, n)                                    \
 872     do {                                                        \
 873         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 874         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 875     } while (0)
 876     HEVCLocalContext *lc    = &s->HEVClc;
 877     int transform_skip_flag = 0;
 878
 879     int last_significant_coeff_x, last_significant_coeff_y;
 880     int last_scan_pos;
 881     int n_end;
 882     int num_coeff    = 0;
 883     int greater1_ctx = 1;
 884
 885     int num_last_subset;
 886     int x_cg_last_sig, y_cg_last_sig;
 887
 888     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 889
 890     ptrdiff_t stride = s->frame->linesize[c_idx];
 891     int hshift       = s->sps->hshift[c_idx];
 892     int vshift       = s->sps->vshift[c_idx];
 893     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 894                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 895     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 896     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 897
 898     int trafo_size = 1 << log2_trafo_size;
 899     int i, qp, shift, add, scale, scale_m;
 900     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 901     const uint8_t *scale_matrix;
 902     uint8_t dc_scale;
 903
 904     // Derive QP for dequant
 905     if (!lc->cu.cu_transquant_bypass_flag) {
 906         static const int qp_c[] = {
 907             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 908         };
 909
 910         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 911             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 912             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 913             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 914         };
 915
 916         static const uint8_t div6[51 + 2 * 6 + 1] = {
 917             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 918             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 919             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 920         };
 921         int qp_y = lc->qp_y;
 922
 923         if (c_idx == 0) {
 924             qp = qp_y + s->sps->qp_bd_offset;
 925         } else {
 926             int qp_i, offset;
 927
 928             if (c_idx == 1)
 929                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 930             else
 931                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 932
 933             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 934             if (qp_i < 30)
 935                 qp = qp_i;
 936             else if (qp_i > 43)
 937                 qp = qp_i - 6;
 938             else
 939                 qp = qp_c[qp_i - 30];
 940
 941             qp += s->sps->qp_bd_offset;
 942         }
 943
 944         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 945         add      = 1 << (shift - 1);
 946         scale    = level_scale[rem6[qp]] << (div6[qp]);
 947         scale_m  = 16; // default when no custom scaling lists.
 948         dc_scale = 16;
 949
 950         if (s->sps->scaling_list_enable_flag) {
 951             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 952                                     &s->pps->scaling_list : &s->sps->scaling_list;
 953             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 954
 955             if (log2_trafo_size != 5)
 956                 matrix_id = 3 * matrix_id + c_idx;
 957
 958             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 959             if (log2_trafo_size >= 4)
 960                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 961         }
 962     }
 963
 964     if (s->pps->transform_skip_enabled_flag &&
 965         !lc->cu.cu_transquant_bypass_flag   &&
 966         log2_trafo_size == 2) {
 967         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 968     }
 969
 970     last_significant_coeff_x =
 971         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 972     last_significant_coeff_y =
 973         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 974
 975     if (last_significant_coeff_x > 3) {
 976         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 977         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 978                                    (2 + (last_significant_coeff_x & 1)) +
 979                                    suffix;
 980     }
 981
 982     if (last_significant_coeff_y > 3) {
 983         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 984         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 985                                    (2 + (last_significant_coeff_y & 1)) +
 986                                    suffix;
 987     }
 988
 989     if (scan_idx == SCAN_VERT)
 990         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 991
 992     x_cg_last_sig = last_significant_coeff_x >> 2;
 993     y_cg_last_sig = last_significant_coeff_y >> 2;
 994
 995     switch (scan_idx) {
 996     case SCAN_DIAG: {
 997         int last_x_c = last_significant_coeff_x & 3;
 998         int last_y_c = last_significant_coeff_y & 3;
 999
1000         scan_x_off = ff_hevc_diag_scan4x4_x;
1001         scan_y_off = ff_hevc_diag_scan4x4_y;
1002         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1003         if (trafo_size == 4) {
1004             scan_x_cg = scan_1x1;
1005             scan_y_cg = scan_1x1;
1006         } else if (trafo_size == 8) {
1007             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1008             scan_x_cg  = diag_scan2x2_x;
1009             scan_y_cg  = diag_scan2x2_y;
1010         } else if (trafo_size == 16) {
1011             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1012             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1013             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1014         } else { // trafo_size == 32
1015             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1016             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1017             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1018         }
1019         break;
1020     }
1021     case SCAN_HORIZ:
1022         scan_x_cg  = horiz_scan2x2_x;
1023         scan_y_cg  = horiz_scan2x2_y;
1024         scan_x_off = horiz_scan4x4_x;
1025         scan_y_off = horiz_scan4x4_y;
1026         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1027         break;
1028     default: //SCAN_VERT
1029         scan_x_cg  = horiz_scan2x2_y;
1030         scan_y_cg  = horiz_scan2x2_x;
1031         scan_x_off = horiz_scan4x4_y;
1032         scan_y_off = horiz_scan4x4_x;
1033         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1034         break;
1035     }
1036     num_coeff++;
1037     num_last_subset = (num_coeff - 1) >> 4;
1038
1039     for (i = num_last_subset; i >= 0; i--) {
1040         int n, m;
1041         int x_cg, y_cg, x_c, y_c;
1042         int implicit_non_zero_coeff = 0;
1043         int64_t trans_coeff_level;
1044         int prev_sig = 0;
1045         int offset   = i << 4;
1046
1047         uint8_t significant_coeff_flag_idx[16];
1048         uint8_t nb_significant_coeff_flag = 0;
1049
1050         x_cg = scan_x_cg[i];
1051         y_cg = scan_y_cg[i];
1052
1053         if (i < num_last_subset && i > 0) {
1054             int ctx_cg = 0;
1055             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1056                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1057             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1058                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1059
1060             significant_coeff_group_flag[x_cg][y_cg] =
1061                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1062             implicit_non_zero_coeff = 1;
1063         } else {
1064             significant_coeff_group_flag[x_cg][y_cg] =
1065                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1066                  (x_cg == 0 && y_cg == 0));
1067         }
1068
1069         last_scan_pos = num_coeff - offset - 1;
1070
1071         if (i == num_last_subset) {
1072             n_end                         = last_scan_pos - 1;
1073             significant_coeff_flag_idx[0] = last_scan_pos;
1074             nb_significant_coeff_flag     = 1;
1075         } else {
1076             n_end = 15;
1077         }
1078
1079         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1080             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1081         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1082             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1083
1084         for (n = n_end; n >= 0; n--) {
1085             GET_COORD(offset, n);
1086
1087             if (significant_coeff_group_flag[x_cg][y_cg] &&
1088                 (n > 0 || implicit_non_zero_coeff == 0)) {
1089                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1090                                                           log2_trafo_size,
1091                                                           scan_idx,
1092                                                           prev_sig) == 1) {
1093                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1094                     nb_significant_coeff_flag++;
1095                     implicit_non_zero_coeff = 0;
1096                 }
1097             } else {
1098                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1099                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1100                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1101                     nb_significant_coeff_flag++;
1102                 }
1103             }
1104         }
1105
1106         n_end = nb_significant_coeff_flag;
1107
1108         if (n_end) {
1109             int first_nz_pos_in_cg = 16;
1110             int last_nz_pos_in_cg = -1;
1111             int c_rice_param = 0;
1112             int first_greater1_coeff_idx = -1;
1113             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1114             uint16_t coeff_sign_flag;
1115             int sum_abs = 0;
1116             int sign_hidden = 0;
1117
1118             // initialize first elem of coeff_bas_level_greater1_flag
1119             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1120
1121             if (!(i == num_last_subset) && greater1_ctx == 0)
1122                 ctx_set++;
1123             greater1_ctx      = 1;
1124             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1125
1126             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1127                 int n_idx = significant_coeff_flag_idx[m];
1128                 int inc   = (ctx_set << 2) + greater1_ctx;
1129                 coeff_abs_level_greater1_flag[n_idx] =
1130                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1131                 if (coeff_abs_level_greater1_flag[n_idx]) {
1132                     greater1_ctx = 0;
1133                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1134                     greater1_ctx++;
1135                 }
1136
1137                 if (coeff_abs_level_greater1_flag[n_idx] &&
1138                     first_greater1_coeff_idx == -1)
1139                     first_greater1_coeff_idx = n_idx;
1140             }
1141             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1142             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1143                                  !lc->cu.cu_transquant_bypass_flag;
1144
1145             if (first_greater1_coeff_idx != -1) {
1146                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1147             }
1148             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1149                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1150             } else {
1151                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1152             }
1153
1154             for (m = 0; m < n_end; m++) {
1155                 n = significant_coeff_flag_idx[m];
1156                 GET_COORD(offset, n);
1157                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1158                 if (trans_coeff_level == ((m < 8) ?
1159                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1160                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1161
1162                     trans_coeff_level += last_coeff_abs_level_remaining;
1163                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1164                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1165                 }
1166                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1167                     sum_abs += trans_coeff_level;
1168                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1169                         trans_coeff_level = -trans_coeff_level;
1170                 }
1171                 if (coeff_sign_flag >> 15)
1172                     trans_coeff_level = -trans_coeff_level;
1173                 coeff_sign_flag <<= 1;
1174                 if (!lc->cu.cu_transquant_bypass_flag) {
1175                     if (s->sps->scaling_list_enable_flag) {
1176                         if (y_c || x_c || log2_trafo_size < 4) {
1177                             int pos;
1178                             switch (log2_trafo_size) {
1179                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1180                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1181                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1182                             default: pos = (y_c        << 2) +  x_c;
1183                             }
1184                             scale_m = scale_matrix[pos];
1185                         } else {
1186                             scale_m = dc_scale;
1187                         }
1188                     }
1189                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1190                     if(trans_coeff_level < 0) {
1191                         if((~trans_coeff_level) & 0xFffffffffff8000)
1192                             trans_coeff_level = -32768;
1193                     } else {
1194                         if (trans_coeff_level & 0xffffffffffff8000)
1195                             trans_coeff_level = 32767;
1196                     }
1197                 }
1198                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1199             }
1200         }
1201     }
1202
1203     if (lc->cu.cu_transquant_bypass_flag) {
1204         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1205     } else {
1206         if (transform_skip_flag)
1207             s->hevcdsp.transform_skip(dst, coeffs, stride);
1208         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1209                  log2_trafo_size == 2)
1210             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1211         else
1212             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1213     }
1214 }
1215
1216 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1217                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1218                                int log2_cb_size, int log2_trafo_size,
1219                                int trafo_depth, int blk_idx)
1220 {
1221     HEVCLocalContext *lc = &s->HEVClc;
1222
1223     if (lc->cu.pred_mode == MODE_INTRA) {
1224         int trafo_size = 1 << log2_trafo_size;
1225         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1226
1227         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1228         if (log2_trafo_size > 2) {
1229             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1230             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1231             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1232             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1233         } else if (blk_idx == 3) {
1234             trafo_size = trafo_size << s->sps->hshift[1];
1235             ff_hevc_set_neighbour_available(s, xBase, yBase,
1236                                             trafo_size, trafo_size);
1237             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1238             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1239         }
1240     }
1241
1242     if (lc->tt.cbf_luma ||
1243         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1244         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1245         int scan_idx   = SCAN_DIAG;
1246         int scan_idx_c = SCAN_DIAG;
1247
1248         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1249             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1250             if (lc->tu.cu_qp_delta != 0)
1251                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1252                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1253             lc->tu.is_cu_qp_delta_coded = 1;
1254             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1255         }
1256
1257         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1258             if (lc->tu.cur_intra_pred_mode >= 6 &&
1259                 lc->tu.cur_intra_pred_mode <= 14) {
1260                 scan_idx = SCAN_VERT;
1261             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1262                        lc->tu.cur_intra_pred_mode <= 30) {
1263                 scan_idx = SCAN_HORIZ;
1264             }
1265
1266             if (lc->pu.intra_pred_mode_c >=  6 &&
1267                 lc->pu.intra_pred_mode_c <= 14) {
1268                 scan_idx_c = SCAN_VERT;
1269             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1270                        lc->pu.intra_pred_mode_c <= 30) {
1271                 scan_idx_c = SCAN_HORIZ;
1272             }
1273         }
1274
1275         if (lc->tt.cbf_luma)
1276             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1277         if (log2_trafo_size > 2) {
1278             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1279                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1280             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1281                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1282         } else if (blk_idx == 3) {
1283             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1284                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1285             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1286                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1287         }
1288     }
1289 }
1290
1291 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1292 {
1293     int cb_size          = 1 << log2_cb_size;
1294     int log2_min_pu_size = s->sps->log2_min_pu_size;
1295
1296     int min_pu_width     = s->sps->min_pu_width;
1297     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1298     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1299     int i, j;
1300
1301     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1302         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1303             s->is_pcm[i + j * min_pu_width] = 2;
1304 }
1305
1306 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1307                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1308                                int log2_cb_size, int log2_trafo_size,
1309                                int trafo_depth, int blk_idx)
1310 {
1311     HEVCLocalContext *lc = &s->HEVClc;
1312     uint8_t split_transform_flag;
1313
1314     if (trafo_depth > 0 && log2_trafo_size == 2) {
1315         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1316             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1317         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1318             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1319     } else {
1320         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1321         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1322     }
1323
1324     if (lc->cu.intra_split_flag) {
1325         if (trafo_depth == 1)
1326             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1327     } else {
1328         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1329     }
1330
1331     lc->tt.cbf_luma = 1;
1332
1333     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1334                               lc->cu.pred_mode == MODE_INTER &&
1335                               lc->cu.part_mode != PART_2Nx2N &&
1336                               trafo_depth == 0;
1337
1338     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1339         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1340         trafo_depth     < lc->cu.max_trafo_depth       &&
1341         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1342         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1343     } else {
1344         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1345                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1346                                lc->tt.inter_split_flag;
1347     }
1348
1349     if (log2_trafo_size > 2) {
1350         if (trafo_depth == 0 ||
1351             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1352             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1353                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1354         }
1355
1356         if (trafo_depth == 0 ||
1357             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1358             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1359                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1360         }
1361     }
1362
1363     if (split_transform_flag) {
1364         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1365         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1366
1367         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1368                            log2_trafo_size - 1, trafo_depth + 1, 0);
1369         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1370                            log2_trafo_size - 1, trafo_depth + 1, 1);
1371         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1372                            log2_trafo_size - 1, trafo_depth + 1, 2);
1373         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1374                            log2_trafo_size - 1, trafo_depth + 1, 3);
1375     } else {
1376         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1377         int log2_min_tu_size = s->sps->log2_min_tb_size;
1378         int min_tu_width     = s->sps->min_tb_width;
1379
1380         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1381             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1382             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1383             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1384         }
1385
1386         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1387                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1388
1389         // TODO: store cbf_luma somewhere else
1390         if (lc->tt.cbf_luma) {
1391             int i, j;
1392             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1393                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1394                     int x_tu = (x0 + j) >> log2_min_tu_size;
1395                     int y_tu = (y0 + i) >> log2_min_tu_size;
1396                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1397                 }
1398         }
1399         if (!s->sh.disable_deblocking_filter_flag) {
1400             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1401                                                   lc->slice_or_tiles_up_boundary,
1402                                                   lc->slice_or_tiles_left_boundary);
1403             if (s->pps->transquant_bypass_enable_flag &&
1404                 lc->cu.cu_transquant_bypass_flag)
1405                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1406         }
1407     }
1408 }
1409
1410 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1411 {
1412     //TODO: non-4:2:0 support
1413     HEVCLocalContext *lc = &s->HEVClc;
1414     GetBitContext gb;
1415     int cb_size   = 1 << log2_cb_size;
1416     int stride0   = s->frame->linesize[0];
1417     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1418     int   stride1 = s->frame->linesize[1];
1419     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1420     int   stride2 = s->frame->linesize[2];
1421     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1422
1423     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1424     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1425     int ret;
1426
1427     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1428                                           lc->slice_or_tiles_up_boundary,
1429                                           lc->slice_or_tiles_left_boundary);
1430
1431     ret = init_get_bits(&gb, pcm, length);
1432     if (ret < 0)
1433         return ret;
1434
1435     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1436     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1437     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1438     return 0;
1439 }
1440
1441 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1442 {
1443     HEVCLocalContext *lc = &s->HEVClc;
1444     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1445     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1446
1447     if (x)
1448         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1449     if (y)
1450         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1451
1452     switch (x) {
1453     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1454     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1455     case 0: lc->pu.mvd.x = 0;                               break;
1456     }
1457
1458     switch (y) {
1459     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1460     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1461     case 0: lc->pu.mvd.y = 0;                               break;
1462     }
1463 }
1464
1465 /**
1466  * 8.5.3.2.2.1 Luma sample interpolation process
1467  *
1468  * @param s HEVC decoding context
1469  * @param dst target buffer for block data at block position
1470  * @param dststride stride of the dst buffer
1471  * @param ref reference picture buffer at origin (0, 0)
1472  * @param mv motion vector (relative to block position) to get pixel data from
1473  * @param x_off horizontal position of block from origin (0, 0)
1474  * @param y_off vertical position of block from origin (0, 0)
1475  * @param block_w width of block
1476  * @param block_h height of block
1477  */
1478 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1479                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1480                     int block_w, int block_h)
1481 {
1482     HEVCLocalContext *lc = &s->HEVClc;
1483     uint8_t *src         = ref->data[0];
1484     ptrdiff_t srcstride  = ref->linesize[0];
1485     int pic_width        = s->sps->width;
1486     int pic_height       = s->sps->height;
1487
1488     int mx         = mv->x & 3;
1489     int my         = mv->y & 3;
1490     int extra_left = ff_hevc_qpel_extra_before[mx];
1491     int extra_top  = ff_hevc_qpel_extra_before[my];
1492
1493     x_off += mv->x >> 2;
1494     y_off += mv->y >> 2;
1495     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1496
1497     if (x_off < extra_left || y_off < extra_top ||
1498         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1499         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1500         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1501         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1502         int buf_offset = extra_top *
1503                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1504
1505         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1506                                  edge_emu_stride, srcstride,
1507                                  block_w + ff_hevc_qpel_extra[mx],
1508                                  block_h + ff_hevc_qpel_extra[my],
1509                                  x_off - extra_left, y_off - extra_top,
1510                                  pic_width, pic_height);
1511         src = lc->edge_emu_buffer + buf_offset;
1512         srcstride = edge_emu_stride;
1513     }
1514     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1515                                      block_h, lc->mc_buffer);
1516 }
1517
1518 /**
1519  * 8.5.3.2.2.2 Chroma sample interpolation process
1520  *
1521  * @param s HEVC decoding context
1522  * @param dst1 target buffer for block data at block position (U plane)
1523  * @param dst2 target buffer for block data at block position (V plane)
1524  * @param dststride stride of the dst1 and dst2 buffers
1525  * @param ref reference picture buffer at origin (0, 0)
1526  * @param mv motion vector (relative to block position) to get pixel data from
1527  * @param x_off horizontal position of block from origin (0, 0)
1528  * @param y_off vertical position of block from origin (0, 0)
1529  * @param block_w width of block
1530  * @param block_h height of block
1531  */
1532 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1533                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1534                       int x_off, int y_off, int block_w, int block_h)
1535 {
1536     HEVCLocalContext *lc = &s->HEVClc;
1537     uint8_t *src1        = ref->data[1];
1538     uint8_t *src2        = ref->data[2];
1539     ptrdiff_t src1stride = ref->linesize[1];
1540     ptrdiff_t src2stride = ref->linesize[2];
1541     int pic_width        = s->sps->width >> 1;
1542     int pic_height       = s->sps->height >> 1;
1543
1544     int mx = mv->x & 7;
1545     int my = mv->y & 7;
1546
1547     x_off += mv->x >> 3;
1548     y_off += mv->y >> 3;
1549     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1550     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1551
1552     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1553         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1554         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1555         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1556         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1557         int buf_offset1 = EPEL_EXTRA_BEFORE *
1558                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1559         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1560         int buf_offset2 = EPEL_EXTRA_BEFORE *
1561                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1562
1563         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1564                                  edge_emu_stride, src1stride,
1565                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1566                                  x_off - EPEL_EXTRA_BEFORE,
1567                                  y_off - EPEL_EXTRA_BEFORE,
1568                                  pic_width, pic_height);
1569
1570         src1 = lc->edge_emu_buffer + buf_offset1;
1571         src1stride = edge_emu_stride;
1572         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1573                                              block_w, block_h, mx, my, lc->mc_buffer);
1574
1575         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1576                                  edge_emu_stride, src2stride,
1577                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1578                                  x_off - EPEL_EXTRA_BEFORE,
1579                                  y_off - EPEL_EXTRA_BEFORE,
1580                                  pic_width, pic_height);
1581         src2 = lc->edge_emu_buffer + buf_offset2;
1582         src2stride = edge_emu_stride;
1583
1584         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1585                                              block_w, block_h, mx, my,
1586                                              lc->mc_buffer);
1587     } else {
1588         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1589                                              block_w, block_h, mx, my,
1590                                              lc->mc_buffer);
1591         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1592                                              block_w, block_h, mx, my,
1593                                              lc->mc_buffer);
1594     }
1595 }
1596
1597 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1598                                 const Mv *mv, int y0, int height)
1599 {
1600     int y = (mv->y >> 2) + y0 + height + 9;
1601     ff_thread_await_progress(&ref->tf, y, 0);
1602 }
1603
1604 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1605                                 int nPbW, int nPbH,
1606                                 int log2_cb_size, int partIdx)
1607 {
1608 #define POS(c_idx, x, y)                                                              \
1609     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1610                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1611     HEVCLocalContext *lc = &s->HEVClc;
1612     int merge_idx = 0;
1613     struct MvField current_mv = {{{ 0 }}};
1614
1615     int min_pu_width = s->sps->min_pu_width;
1616
1617     MvField *tab_mvf = s->ref->tab_mvf;
1618     RefPicList  *refPicList = s->ref->refPicList;
1619     HEVCFrame *ref0, *ref1;
1620
1621     int tmpstride = MAX_PB_SIZE;
1622
1623     uint8_t *dst0 = POS(0, x0, y0);
1624     uint8_t *dst1 = POS(1, x0, y0);
1625     uint8_t *dst2 = POS(2, x0, y0);
1626     int log2_min_cb_size = s->sps->log2_min_cb_size;
1627     int min_cb_width     = s->sps->min_cb_width;
1628     int x_cb             = x0 >> log2_min_cb_size;
1629     int y_cb             = y0 >> log2_min_cb_size;
1630     int ref_idx[2];
1631     int mvp_flag[2];
1632     int x_pu, y_pu;
1633     int i, j;
1634
1635     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1636         if (s->sh.max_num_merge_cand > 1)
1637             merge_idx = ff_hevc_merge_idx_decode(s);
1638         else
1639             merge_idx = 0;
1640
1641         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1642                                    1 << log2_cb_size,
1643                                    1 << log2_cb_size,
1644                                    log2_cb_size, partIdx,
1645                                    merge_idx, &current_mv);
1646         x_pu = x0 >> s->sps->log2_min_pu_size;
1647         y_pu = y0 >> s->sps->log2_min_pu_size;
1648
1649         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1650             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1651                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1652     } else { /* MODE_INTER */
1653         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1654         if (lc->pu.merge_flag) {
1655             if (s->sh.max_num_merge_cand > 1)
1656                 merge_idx = ff_hevc_merge_idx_decode(s);
1657             else
1658                 merge_idx = 0;
1659
1660             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1661                                        partIdx, merge_idx, &current_mv);
1662             x_pu = x0 >> s->sps->log2_min_pu_size;
1663             y_pu = y0 >> s->sps->log2_min_pu_size;
1664
1665             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1666                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1667                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1668         } else {
1669             enum InterPredIdc inter_pred_idc = PRED_L0;
1670             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1671             if (s->sh.slice_type == B_SLICE)
1672                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1673
1674             if (inter_pred_idc != PRED_L1) {
1675                 if (s->sh.nb_refs[L0]) {
1676                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1677                     current_mv.ref_idx[0] = ref_idx[0];
1678                 }
1679                 current_mv.pred_flag[0] = 1;
1680                 hls_mvd_coding(s, x0, y0, 0);
1681                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1682                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1683                                          partIdx, merge_idx, &current_mv,
1684                                          mvp_flag[0], 0);
1685                 current_mv.mv[0].x += lc->pu.mvd.x;
1686                 current_mv.mv[0].y += lc->pu.mvd.y;
1687             }
1688
1689             if (inter_pred_idc != PRED_L0) {
1690                 if (s->sh.nb_refs[L1]) {
1691                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1692                     current_mv.ref_idx[1] = ref_idx[1];
1693                 }
1694
1695                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1696                     lc->pu.mvd.x = 0;
1697                     lc->pu.mvd.y = 0;
1698                 } else {
1699                     hls_mvd_coding(s, x0, y0, 1);
1700                 }
1701
1702                 current_mv.pred_flag[1] = 1;
1703                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1704                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1705                                          partIdx, merge_idx, &current_mv,
1706                                          mvp_flag[1], 1);
1707                 current_mv.mv[1].x += lc->pu.mvd.x;
1708                 current_mv.mv[1].y += lc->pu.mvd.y;
1709             }
1710
1711             x_pu = x0 >> s->sps->log2_min_pu_size;
1712             y_pu = y0 >> s->sps->log2_min_pu_size;
1713
1714             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1715                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1716                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1717         }
1718     }
1719
1720     if (current_mv.pred_flag[0]) {
1721         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1722         if (!ref0)
1723             return;
1724         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1725     }
1726     if (current_mv.pred_flag[1]) {
1727         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1728         if (!ref1)
1729             return;
1730         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1731     }
1732
1733     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1734         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1735         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1736
1737         luma_mc(s, tmp, tmpstride, ref0->frame,
1738                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1739
1740         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1741             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1742             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1743                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1744                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1745                                      dst0, s->frame->linesize[0], tmp,
1746                                      tmpstride, nPbW, nPbH);
1747         } else {
1748             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1749         }
1750         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1751                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1752
1753         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1754             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1755             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1756                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1757                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1758                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1759                                      nPbW / 2, nPbH / 2);
1760             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1761                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1762                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1763                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1764                                      nPbW / 2, nPbH / 2);
1765         } else {
1766             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1767             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1768         }
1769     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1770         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1771         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1772
1773         if (!ref1)
1774             return;
1775
1776         luma_mc(s, tmp, tmpstride, ref1->frame,
1777                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1778
1779         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1780             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1781             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1782                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1783                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1784                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1785                                       nPbW, nPbH);
1786         } else {
1787             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1788         }
1789
1790         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1791                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1792
1793         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1794             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1795             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1796                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1797                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1798                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1799             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1800                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1801                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1802                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1803         } else {
1804             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1805             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1806         }
1807     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1808         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1809         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1810         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1811         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1812         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1813         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1814
1815         if (!ref0 || !ref1)
1816             return;
1817
1818         luma_mc(s, tmp, tmpstride, ref0->frame,
1819                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1820         luma_mc(s, tmp2, tmpstride, ref1->frame,
1821                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1822
1823         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1824             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1825             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1826                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1827                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1828                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1829                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1830                                          dst0, s->frame->linesize[0],
1831                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1832         } else {
1833             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1834                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1835         }
1836
1837         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1838                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1839         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1840                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1841
1842         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1843             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1844             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1845                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1846                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1847                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1848                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1849                                          dst1, s->frame->linesize[1], tmp, tmp3,
1850                                          tmpstride, nPbW / 2, nPbH / 2);
1851             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1852                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1853                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1854                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1855                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1856                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1857                                          tmpstride, nPbW / 2, nPbH / 2);
1858         } else {
1859             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1860             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1861         }
1862     }
1863 }
1864
1865 /**
1866  * 8.4.1
1867  */
1868 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1869                                 int prev_intra_luma_pred_flag)
1870 {
1871     HEVCLocalContext *lc = &s->HEVClc;
1872     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1873     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1874     int min_pu_width     = s->sps->min_pu_width;
1875     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1876     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1877     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1878
1879     int cand_up   = (lc->ctb_up_flag || y0b) ?
1880                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1881     int cand_left = (lc->ctb_left_flag || x0b) ?
1882                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1883
1884     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1885
1886     MvField *tab_mvf = s->ref->tab_mvf;
1887     int intra_pred_mode;
1888     int candidate[3];
1889     int i, j;
1890
1891     // intra_pred_mode prediction does not cross vertical CTB boundaries
1892     if ((y0 - 1) < y_ctb)
1893         cand_up = INTRA_DC;
1894
1895     if (cand_left == cand_up) {
1896         if (cand_left < 2) {
1897             candidate[0] = INTRA_PLANAR;
1898             candidate[1] = INTRA_DC;
1899             candidate[2] = INTRA_ANGULAR_26;
1900         } else {
1901             candidate[0] = cand_left;
1902             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1903             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1904         }
1905     } else {
1906         candidate[0] = cand_left;
1907         candidate[1] = cand_up;
1908         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1909             candidate[2] = INTRA_PLANAR;
1910         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1911             candidate[2] = INTRA_DC;
1912         } else {
1913             candidate[2] = INTRA_ANGULAR_26;
1914         }
1915     }
1916
1917     if (prev_intra_luma_pred_flag) {
1918         intra_pred_mode = candidate[lc->pu.mpm_idx];
1919     } else {
1920         if (candidate[0] > candidate[1])
1921             FFSWAP(uint8_t, candidate[0], candidate[1]);
1922         if (candidate[0] > candidate[2])
1923             FFSWAP(uint8_t, candidate[0], candidate[2]);
1924         if (candidate[1] > candidate[2])
1925             FFSWAP(uint8_t, candidate[1], candidate[2]);
1926
1927         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1928         for (i = 0; i < 3; i++)
1929             if (intra_pred_mode >= candidate[i])
1930                 intra_pred_mode++;
1931     }
1932
1933     /* write the intra prediction units into the mv array */
1934     if (!size_in_pus)
1935         size_in_pus = 1;
1936     for (i = 0; i < size_in_pus; i++) {
1937         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1938                intra_pred_mode, size_in_pus);
1939
1940         for (j = 0; j < size_in_pus; j++) {
1941             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1942             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1943             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1944             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1945             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1946             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1947             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1948             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1949             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1950         }
1951     }
1952
1953     return intra_pred_mode;
1954 }
1955
1956 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1957                                           int log2_cb_size, int ct_depth)
1958 {
1959     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1960     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1961     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1962     int y;
1963
1964     for (y = 0; y < length; y++)
1965         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1966                ct_depth, length);
1967 }
1968
1969 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1970                                   int log2_cb_size)
1971 {
1972     HEVCLocalContext *lc = &s->HEVClc;
1973     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1974     uint8_t prev_intra_luma_pred_flag[4];
1975     int split   = lc->cu.part_mode == PART_NxN;
1976     int pb_size = (1 << log2_cb_size) >> split;
1977     int side    = split + 1;
1978     int chroma_mode;
1979     int i, j;
1980
1981     for (i = 0; i < side; i++)
1982         for (j = 0; j < side; j++)
1983             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1984
1985     for (i = 0; i < side; i++) {
1986         for (j = 0; j < side; j++) {
1987             if (prev_intra_luma_pred_flag[2 * i + j])
1988                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1989             else
1990                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1991
1992             lc->pu.intra_pred_mode[2 * i + j] =
1993                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1994                                      prev_intra_luma_pred_flag[2 * i + j]);
1995         }
1996     }
1997
1998     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1999     if (chroma_mode != 4) {
2000         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2001             lc->pu.intra_pred_mode_c = 34;
2002         else
2003             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2004     } else {
2005         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2006     }
2007 }
2008
2009 static void intra_prediction_unit_default_value(HEVCContext *s,
2010                                                 int x0, int y0,
2011                                                 int log2_cb_size)
2012 {
2013     HEVCLocalContext *lc = &s->HEVClc;
2014     int pb_size          = 1 << log2_cb_size;
2015     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2016     int min_pu_width     = s->sps->min_pu_width;
2017     MvField *tab_mvf     = s->ref->tab_mvf;
2018     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2019     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2020     int j, k;
2021
2022     if (size_in_pus == 0)
2023         size_in_pus = 1;
2024     for (j = 0; j < size_in_pus; j++) {
2025         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2026         for (k = 0; k < size_in_pus; k++)
2027             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2028     }
2029 }
2030
2031 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2032 {
2033     int cb_size          = 1 << log2_cb_size;
2034     HEVCLocalContext *lc = &s->HEVClc;
2035     int log2_min_cb_size = s->sps->log2_min_cb_size;
2036     int length           = cb_size >> log2_min_cb_size;
2037     int min_cb_width     = s->sps->min_cb_width;
2038     int x_cb             = x0 >> log2_min_cb_size;
2039     int y_cb             = y0 >> log2_min_cb_size;
2040     int x, y;
2041
2042     lc->cu.x                = x0;
2043     lc->cu.y                = y0;
2044     lc->cu.rqt_root_cbf     = 1;
2045     lc->cu.pred_mode        = MODE_INTRA;
2046     lc->cu.part_mode        = PART_2Nx2N;
2047     lc->cu.intra_split_flag = 0;
2048     lc->cu.pcm_flag         = 0;
2049
2050     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2051     for (x = 0; x < 4; x++)
2052         lc->pu.intra_pred_mode[x] = 1;
2053     if (s->pps->transquant_bypass_enable_flag) {
2054         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2055         if (lc->cu.cu_transquant_bypass_flag)
2056             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2057     } else
2058         lc->cu.cu_transquant_bypass_flag = 0;
2059
2060     if (s->sh.slice_type != I_SLICE) {
2061         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2062
2063         lc->cu.pred_mode = MODE_SKIP;
2064         x = y_cb * min_cb_width + x_cb;
2065         for (y = 0; y < length; y++) {
2066             memset(&s->skip_flag[x], skip_flag, length);
2067             x += min_cb_width;
2068         }
2069         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2070     }
2071
2072     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2073         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2074         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2075
2076         if (!s->sh.disable_deblocking_filter_flag)
2077             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2078                                                   lc->slice_or_tiles_up_boundary,
2079                                                   lc->slice_or_tiles_left_boundary);
2080     } else {
2081         if (s->sh.slice_type != I_SLICE)
2082             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2083         if (lc->cu.pred_mode != MODE_INTRA ||
2084             log2_cb_size == s->sps->log2_min_cb_size) {
2085             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2086             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2087                                       lc->cu.pred_mode == MODE_INTRA;
2088         }
2089
2090         if (lc->cu.pred_mode == MODE_INTRA) {
2091             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2092                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2093                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2094                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2095             }
2096             if (lc->cu.pcm_flag) {
2097                 int ret;
2098                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2099                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2100                 if (s->sps->pcm.loop_filter_disable_flag)
2101                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2102
2103                 if (ret < 0)
2104                     return ret;
2105             } else {
2106                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2107             }
2108         } else {
2109             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2110             switch (lc->cu.part_mode) {
2111             case PART_2Nx2N:
2112                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2113                 break;
2114             case PART_2NxN:
2115                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2116                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2117                 break;
2118             case PART_Nx2N:
2119                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2120                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2121                 break;
2122             case PART_2NxnU:
2123                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2124                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2125                 break;
2126             case PART_2NxnD:
2127                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2128                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2129                 break;
2130             case PART_nLx2N:
2131                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2132                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2133                 break;
2134             case PART_nRx2N:
2135                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2136                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2137                 break;
2138             case PART_NxN:
2139                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2140                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2141                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2142                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2143                 break;
2144             }
2145         }
2146
2147         if (!lc->cu.pcm_flag) {
2148             if (lc->cu.pred_mode != MODE_INTRA &&
2149                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2150                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2151             }
2152             if (lc->cu.rqt_root_cbf) {
2153                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2154                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2155                                          s->sps->max_transform_hierarchy_depth_inter;
2156                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2157                                    log2_cb_size, 0, 0);
2158             } else {
2159                 if (!s->sh.disable_deblocking_filter_flag)
2160                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2161                                                           lc->slice_or_tiles_up_boundary,
2162                                                           lc->slice_or_tiles_left_boundary);
2163             }
2164         }
2165     }
2166
2167     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2168         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2169
2170     x = y_cb * min_cb_width + x_cb;
2171     for (y = 0; y < length; y++) {
2172         memset(&s->qp_y_tab[x], lc->qp_y, length);
2173         x += min_cb_width;
2174     }
2175
2176     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2177
2178     return 0;
2179 }
2180
2181 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2182                                int log2_cb_size, int cb_depth)
2183 {
2184     HEVCLocalContext *lc = &s->HEVClc;
2185     const int cb_size    = 1 << log2_cb_size;
2186
2187     lc->ct.depth = cb_depth;
2188     if (x0 + cb_size <= s->sps->width  &&
2189         y0 + cb_size <= s->sps->height &&
2190         log2_cb_size > s->sps->log2_min_cb_size) {
2191         SAMPLE(s->split_cu_flag, x0, y0) =
2192             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2193     } else {
2194         SAMPLE(s->split_cu_flag, x0, y0) =
2195             (log2_cb_size > s->sps->log2_min_cb_size);
2196     }
2197     if (s->pps->cu_qp_delta_enabled_flag &&
2198         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2199         lc->tu.is_cu_qp_delta_coded = 0;
2200         lc->tu.cu_qp_delta          = 0;
2201     }
2202
2203     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2204         const int cb_size_split = cb_size >> 1;
2205         const int x1 = x0 + cb_size_split;
2206         const int y1 = y0 + cb_size_split;
2207
2208         log2_cb_size--;
2209         cb_depth++;
2210
2211 #define SUBDIVIDE(x, y)                                                \
2212 do {                                                                   \
2213     if (x < s->sps->width && y < s->sps->height) {                     \
2214         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2215         if (ret < 0)                                                   \
2216             return ret;                                                \
2217     }                                                                  \
2218 } while (0)
2219
2220         SUBDIVIDE(x0, y0);
2221         SUBDIVIDE(x1, y0);
2222         SUBDIVIDE(x0, y1);
2223         SUBDIVIDE(x1, y1);
2224     } else {
2225         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2226         if (ret < 0)
2227             return ret;
2228     }
2229
2230     return 0;
2231 }
2232
2233 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2234                                  int ctb_addr_ts)
2235 {
2236     HEVCLocalContext *lc  = &s->HEVClc;
2237     int ctb_size          = 1 << s->sps->log2_ctb_size;
2238     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2239     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2240
2241     int tile_left_boundary, tile_up_boundary;
2242     int slice_left_boundary, slice_up_boundary;
2243
2244     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2245
2246     if (s->pps->entropy_coding_sync_enabled_flag) {
2247         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2248             lc->first_qp_group = 1;
2249         lc->end_of_tiles_x = s->sps->width;
2250     } else if (s->pps->tiles_enabled_flag) {
2251         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2252             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2253             lc->start_of_tiles_x = x_ctb;
2254             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2255             lc->first_qp_group   = 1;
2256         }
2257     } else {
2258         lc->end_of_tiles_x = s->sps->width;
2259     }
2260
2261     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2262
2263     if (s->pps->tiles_enabled_flag) {
2264         tile_left_boundary  = x_ctb > 0 &&
2265                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2266         slice_left_boundary = x_ctb > 0 &&
2267                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2268         tile_up_boundary  = y_ctb > 0 &&
2269                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2270         slice_up_boundary = y_ctb > 0 &&
2271                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2272     } else {
2273         tile_left_boundary  =
2274         tile_up_boundary    = 1;
2275         slice_left_boundary = ctb_addr_in_slice > 0;
2276         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2277     }
2278     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2279     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2280     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2281     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2282     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2283     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2284 }
2285
2286 static int hls_slice_data(HEVCContext *s)
2287 {
2288     int ctb_size    = 1 << s->sps->log2_ctb_size;
2289     int more_data   = 1;
2290     int x_ctb       = 0;
2291     int y_ctb       = 0;
2292     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2293     int ret;
2294
2295     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2296         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2297
2298         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2299         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2300         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2301
2302         ff_hevc_cabac_init(s, ctb_addr_ts);
2303
2304         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2305
2306         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2307         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2308         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2309
2310         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2311         if (ret < 0)
2312             return ret;
2313         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2314
2315         ctb_addr_ts++;
2316         ff_hevc_save_states(s, ctb_addr_ts);
2317         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2318     }
2319
2320     if (x_ctb + ctb_size >= s->sps->width &&
2321         y_ctb + ctb_size >= s->sps->height)
2322         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2323
2324     return ctb_addr_ts;
2325 }
2326
2327 /**
2328  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2329  * 0 if the unit should be skipped, 1 otherwise
2330  */
2331 static int hls_nal_unit(HEVCContext *s)
2332 {
2333     GetBitContext *gb = &s->HEVClc.gb;
2334     int nuh_layer_id;
2335
2336     if (get_bits1(gb) != 0)
2337         return AVERROR_INVALIDDATA;
2338
2339     s->nal_unit_type = get_bits(gb, 6);
2340
2341     nuh_layer_id   = get_bits(gb, 6);
2342     s->temporal_id = get_bits(gb, 3) - 1;
2343     if (s->temporal_id < 0)
2344         return AVERROR_INVALIDDATA;
2345
2346     av_log(s->avctx, AV_LOG_DEBUG,
2347            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2348            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2349
2350     return nuh_layer_id == 0;
2351 }
2352
2353 static void restore_tqb_pixels(HEVCContext *s)
2354 {
2355     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2356     int x, y, c_idx;
2357
2358     for (c_idx = 0; c_idx < 3; c_idx++) {
2359         ptrdiff_t stride = s->frame->linesize[c_idx];
2360         int hshift       = s->sps->hshift[c_idx];
2361         int vshift       = s->sps->vshift[c_idx];
2362         for (y = 0; y < s->sps->min_pu_height; y++) {
2363             for (x = 0; x < s->sps->min_pu_width; x++) {
2364                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2365                     int n;
2366                     int len      = min_pu_size >> hshift;
2367                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2368                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2369                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2370                         memcpy(dst, src, len);
2371                         src += stride;
2372                         dst += stride;
2373                     }
2374                 }
2375             }
2376         }
2377     }
2378 }
2379
2380 static int set_side_data(HEVCContext *s)
2381 {
2382     AVFrame *out = s->ref->frame;
2383
2384     if (s->sei_frame_packing_present &&
2385         s->frame_packing_arrangement_type >= 3 &&
2386         s->frame_packing_arrangement_type <= 5 &&
2387         s->content_interpretation_type > 0 &&
2388         s->content_interpretation_type < 3) {
2389         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2390         if (!stereo)
2391             return AVERROR(ENOMEM);
2392
2393         switch (s->frame_packing_arrangement_type) {
2394         case 3:
2395             if (s->quincunx_subsampling)
2396                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2397             else
2398                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2399             break;
2400         case 4:
2401             stereo->type = AV_STEREO3D_TOPBOTTOM;
2402             break;
2403         case 5:
2404             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2405             break;
2406         }
2407
2408         if (s->content_interpretation_type == 2)
2409             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2410     }
2411
2412     return 0;
2413 }
2414
2415 static int hevc_frame_start(HEVCContext *s)
2416 {
2417     HEVCLocalContext *lc = &s->HEVClc;
2418     int ret;
2419
2420     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2421     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2422     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2423     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2424
2425     lc->start_of_tiles_x = 0;
2426     s->is_decoded        = 0;
2427
2428     if (s->pps->tiles_enabled_flag)
2429         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2430
2431     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2432                               s->poc);
2433     if (ret < 0)
2434         goto fail;
2435
2436     ret = ff_hevc_frame_rps(s);
2437     if (ret < 0) {
2438         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2439         goto fail;
2440     }
2441
2442     ret = set_side_data(s);
2443     if (ret < 0)
2444         goto fail;
2445
2446     av_frame_unref(s->output_frame);
2447     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2448     if (ret < 0)
2449         goto fail;
2450
2451     ff_thread_finish_setup(s->avctx);
2452
2453     return 0;
2454
2455 fail:
2456     if (s->ref)
2457         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2458     s->ref = NULL;
2459     return ret;
2460 }
2461
2462 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2463 {
2464     HEVCLocalContext *lc = &s->HEVClc;
2465     GetBitContext *gb    = &lc->gb;
2466     int ctb_addr_ts, ret;
2467
2468     ret = init_get_bits8(gb, nal, length);
2469     if (ret < 0)
2470         return ret;
2471
2472     ret = hls_nal_unit(s);
2473     if (ret < 0) {
2474         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2475                s->nal_unit_type);
2476         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2477             return ret;
2478         return 0;
2479     } else if (!ret)
2480         return 0;
2481
2482     switch (s->nal_unit_type) {
2483     case NAL_VPS:
2484         ret = ff_hevc_decode_nal_vps(s);
2485         if (ret < 0)
2486             return ret;
2487         break;
2488     case NAL_SPS:
2489         ret = ff_hevc_decode_nal_sps(s);
2490         if (ret < 0)
2491             return ret;
2492         break;
2493     case NAL_PPS:
2494         ret = ff_hevc_decode_nal_pps(s);
2495         if (ret < 0)
2496             return ret;
2497         break;
2498     case NAL_SEI_PREFIX:
2499     case NAL_SEI_SUFFIX:
2500         ret = ff_hevc_decode_nal_sei(s);
2501         if (ret < 0)
2502             return ret;
2503         break;
2504     case NAL_TRAIL_R:
2505     case NAL_TRAIL_N:
2506     case NAL_TSA_N:
2507     case NAL_TSA_R:
2508     case NAL_STSA_N:
2509     case NAL_STSA_R:
2510     case NAL_BLA_W_LP:
2511     case NAL_BLA_W_RADL:
2512     case NAL_BLA_N_LP:
2513     case NAL_IDR_W_RADL:
2514     case NAL_IDR_N_LP:
2515     case NAL_CRA_NUT:
2516     case NAL_RADL_N:
2517     case NAL_RADL_R:
2518     case NAL_RASL_N:
2519     case NAL_RASL_R:
2520         ret = hls_slice_header(s);
2521         if (ret < 0)
2522             return ret;
2523
2524         if (s->max_ra == INT_MAX) {
2525             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2526                 s->max_ra = s->poc;
2527             } else {
2528                 if (IS_IDR(s))
2529                     s->max_ra = INT_MIN;
2530             }
2531         }
2532
2533         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2534             s->poc <= s->max_ra) {
2535             s->is_decoded = 0;
2536             break;
2537         } else {
2538             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2539                 s->max_ra = INT_MIN;
2540         }
2541
2542         if (s->sh.first_slice_in_pic_flag) {
2543             ret = hevc_frame_start(s);
2544             if (ret < 0)
2545                 return ret;
2546         } else if (!s->ref) {
2547             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2548             return AVERROR_INVALIDDATA;
2549         }
2550
2551         if (!s->sh.dependent_slice_segment_flag &&
2552             s->sh.slice_type != I_SLICE) {
2553             ret = ff_hevc_slice_rpl(s);
2554             if (ret < 0) {
2555                 av_log(s->avctx, AV_LOG_WARNING,
2556                        "Error constructing the reference lists for the current slice.\n");
2557                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2558                     return ret;
2559             }
2560         }
2561
2562         ctb_addr_ts = hls_slice_data(s);
2563         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2564             s->is_decoded = 1;
2565             if ((s->pps->transquant_bypass_enable_flag ||
2566                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2567                 s->sps->sao_enabled)
2568                 restore_tqb_pixels(s);
2569         }
2570
2571         if (ctb_addr_ts < 0)
2572             return ctb_addr_ts;
2573         break;
2574     case NAL_EOS_NUT:
2575     case NAL_EOB_NUT:
2576         s->seq_decode = (s->seq_decode + 1) & 0xff;
2577         s->max_ra     = INT_MAX;
2578         break;
2579     case NAL_AUD:
2580     case NAL_FD_NUT:
2581         break;
2582     default:
2583         av_log(s->avctx, AV_LOG_INFO,
2584                "Skipping NAL unit %d\n", s->nal_unit_type);
2585     }
2586
2587     return 0;
2588 }
2589
2590 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2591  * between these functions would be nice. */
2592 static int extract_rbsp(const uint8_t *src, int length,
2593                         HEVCNAL *nal)
2594 {
2595     int i, si, di;
2596     uint8_t *dst;
2597
2598 #define STARTCODE_TEST                                                  \
2599         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2600             if (src[i + 2] != 3) {                                      \
2601                 /* startcode, so we must be past the end */             \
2602                 length = i;                                             \
2603             }                                                           \
2604             break;                                                      \
2605         }
2606 #if HAVE_FAST_UNALIGNED
2607 #define FIND_FIRST_ZERO                                                 \
2608         if (i > 0 && !src[i])                                           \
2609             i--;                                                        \
2610         while (src[i])                                                  \
2611             i++
2612 #if HAVE_FAST_64BIT
2613     for (i = 0; i + 1 < length; i += 9) {
2614         if (!((~AV_RN64A(src + i) &
2615                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2616               0x8000800080008080ULL))
2617             continue;
2618         FIND_FIRST_ZERO;
2619         STARTCODE_TEST;
2620         i -= 7;
2621     }
2622 #else
2623     for (i = 0; i + 1 < length; i += 5) {
2624         if (!((~AV_RN32A(src + i) &
2625                (AV_RN32A(src + i) - 0x01000101U)) &
2626               0x80008080U))
2627             continue;
2628         FIND_FIRST_ZERO;
2629         STARTCODE_TEST;
2630         i -= 3;
2631     }
2632 #endif /* HAVE_FAST_64BIT */
2633 #else
2634     for (i = 0; i + 1 < length; i += 2) {
2635         if (src[i])
2636             continue;
2637         if (i > 0 && src[i - 1] == 0)
2638             i--;
2639         STARTCODE_TEST;
2640     }
2641 #endif /* HAVE_FAST_UNALIGNED */
2642
2643     if (i >= length - 1) { // no escaped 0
2644         nal->data = src;
2645         nal->size = length;
2646         return length;
2647     }
2648
2649     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2650                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2651     if (!nal->rbsp_buffer)
2652         return AVERROR(ENOMEM);
2653
2654     dst = nal->rbsp_buffer;
2655
2656     memcpy(dst, src, i);
2657     si = di = i;
2658     while (si + 2 < length) {
2659         // remove escapes (very rare 1:2^22)
2660         if (src[si + 2] > 3) {
2661             dst[di++] = src[si++];
2662             dst[di++] = src[si++];
2663         } else if (src[si] == 0 && src[si + 1] == 0) {
2664             if (src[si + 2] == 3) { // escape
2665                 dst[di++] = 0;
2666                 dst[di++] = 0;
2667                 si       += 3;
2668
2669                 continue;
2670             } else // next start code
2671                 goto nsc;
2672         }
2673
2674         dst[di++] = src[si++];
2675     }
2676     while (si < length)
2677         dst[di++] = src[si++];
2678
2679 nsc:
2680     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2681
2682     nal->data = dst;
2683     nal->size = di;
2684     return si;
2685 }
2686
2687 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2688 {
2689     int i, consumed, ret = 0;
2690
2691     s->ref = NULL;
2692     s->eos = 0;
2693
2694     /* split the input packet into NAL units, so we know the upper bound on the
2695      * number of slices in the frame */
2696     s->nb_nals = 0;
2697     while (length >= 4) {
2698         HEVCNAL *nal;
2699         int extract_length = 0;
2700
2701         if (s->is_nalff) {
2702             int i;
2703             for (i = 0; i < s->nal_length_size; i++)
2704                 extract_length = (extract_length << 8) | buf[i];
2705             buf    += s->nal_length_size;
2706             length -= s->nal_length_size;
2707
2708             if (extract_length > length) {
2709                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2710                 ret = AVERROR_INVALIDDATA;
2711                 goto fail;
2712             }
2713         } else {
2714             if (buf[2] == 0) {
2715                 length--;
2716                 buf++;
2717                 continue;
2718             }
2719             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2720                 ret = AVERROR_INVALIDDATA;
2721                 goto fail;
2722             }
2723
2724             buf           += 3;
2725             length        -= 3;
2726             extract_length = length;
2727         }
2728
2729         if (s->nals_allocated < s->nb_nals + 1) {
2730             int new_size = s->nals_allocated + 1;
2731             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2732             if (!tmp) {
2733                 ret = AVERROR(ENOMEM);
2734                 goto fail;
2735             }
2736             s->nals = tmp;
2737             memset(s->nals + s->nals_allocated, 0,
2738                    (new_size - s->nals_allocated) * sizeof(*tmp));
2739             s->nals_allocated = new_size;
2740         }
2741         nal = &s->nals[s->nb_nals++];
2742
2743         consumed = extract_rbsp(buf, extract_length, nal);
2744         if (consumed < 0) {
2745             ret = consumed;
2746             goto fail;
2747         }
2748
2749         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2750         if (ret < 0)
2751             goto fail;
2752         hls_nal_unit(s);
2753
2754         if (s->nal_unit_type == NAL_EOB_NUT ||
2755             s->nal_unit_type == NAL_EOS_NUT)
2756             s->eos = 1;
2757
2758         buf    += consumed;
2759         length -= consumed;
2760     }
2761
2762     /* parse the NAL units */
2763     for (i = 0; i < s->nb_nals; i++) {
2764         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2765         if (ret < 0) {
2766             av_log(s->avctx, AV_LOG_WARNING,
2767                    "Error parsing NAL unit #%d.\n", i);
2768             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2769                 goto fail;
2770         }
2771     }
2772
2773 fail:
2774     if (s->ref)
2775         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2776
2777     return ret;
2778 }
2779
2780 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2781 {
2782     int i;
2783     for (i = 0; i < 16; i++)
2784         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2785 }
2786
2787 static int verify_md5(HEVCContext *s, AVFrame *frame)
2788 {
2789     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2790     int pixel_shift;
2791     int i, j;
2792
2793     if (!desc)
2794         return AVERROR(EINVAL);
2795
2796     pixel_shift = desc->comp[0].depth_minus1 > 7;
2797
2798     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2799            s->poc);
2800
2801     /* the checksums are LE, so we have to byteswap for >8bpp formats
2802      * on BE arches */
2803 #if HAVE_BIGENDIAN
2804     if (pixel_shift && !s->checksum_buf) {
2805         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2806                        FFMAX3(frame->linesize[0], frame->linesize[1],
2807                               frame->linesize[2]));
2808         if (!s->checksum_buf)
2809             return AVERROR(ENOMEM);
2810     }
2811 #endif
2812
2813     for (i = 0; frame->data[i]; i++) {
2814         int width  = s->avctx->coded_width;
2815         int height = s->avctx->coded_height;
2816         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2817         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2818         uint8_t md5[16];
2819
2820         av_md5_init(s->md5_ctx);
2821         for (j = 0; j < h; j++) {
2822             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2823 #if HAVE_BIGENDIAN
2824             if (pixel_shift) {
2825                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2826                                    (const uint16_t*)src, w);
2827                 src = s->checksum_buf;
2828             }
2829 #endif
2830             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2831         }
2832         av_md5_final(s->md5_ctx, md5);
2833
2834         if (!memcmp(md5, s->md5[i], 16)) {
2835             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2836             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2837             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2838         } else {
2839             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2840             print_md5(s->avctx, AV_LOG_ERROR, md5);
2841             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2842             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2843             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2844             return AVERROR_INVALIDDATA;
2845         }
2846     }
2847
2848     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2849
2850     return 0;
2851 }
2852
2853 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2854                              AVPacket *avpkt)
2855 {
2856     int ret;
2857     HEVCContext *s = avctx->priv_data;
2858
2859     if (!avpkt->size) {
2860         ret = ff_hevc_output_frame(s, data, 1);
2861         if (ret < 0)
2862             return ret;
2863
2864         *got_output = ret;
2865         return 0;
2866     }
2867
2868     s->ref = NULL;
2869     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2870     if (ret < 0)
2871         return ret;
2872
2873     /* verify the SEI checksum */
2874     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2875         s->is_md5) {
2876         ret = verify_md5(s, s->ref->frame);
2877         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2878             ff_hevc_unref_frame(s, s->ref, ~0);
2879             return ret;
2880         }
2881     }
2882     s->is_md5 = 0;
2883
2884     if (s->is_decoded) {
2885         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2886         s->is_decoded = 0;
2887     }
2888
2889     if (s->output_frame->buf[0]) {
2890         av_frame_move_ref(data, s->output_frame);
2891         *got_output = 1;
2892     }
2893
2894     return avpkt->size;
2895 }
2896
2897 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2898 {
2899     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2900     if (ret < 0)
2901         return ret;
2902
2903     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2904     if (!dst->tab_mvf_buf)
2905         goto fail;
2906     dst->tab_mvf = src->tab_mvf;
2907
2908     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2909     if (!dst->rpl_tab_buf)
2910         goto fail;
2911     dst->rpl_tab = src->rpl_tab;
2912
2913     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2914     if (!dst->rpl_buf)
2915         goto fail;
2916
2917     dst->poc        = src->poc;
2918     dst->ctb_count  = src->ctb_count;
2919     dst->window     = src->window;
2920     dst->flags      = src->flags;
2921     dst->sequence   = src->sequence;
2922
2923     return 0;
2924 fail:
2925     ff_hevc_unref_frame(s, dst, ~0);
2926     return AVERROR(ENOMEM);
2927 }
2928
2929 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2930 {
2931     HEVCContext       *s = avctx->priv_data;
2932     int i;
2933
2934     pic_arrays_free(s);
2935
2936     av_freep(&s->md5_ctx);
2937
2938     av_frame_free(&s->tmp_frame);
2939     av_frame_free(&s->output_frame);
2940
2941     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2942         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2943         av_frame_free(&s->DPB[i].frame);
2944     }
2945
2946     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2947         av_buffer_unref(&s->vps_list[i]);
2948     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2949         av_buffer_unref(&s->sps_list[i]);
2950     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2951         av_buffer_unref(&s->pps_list[i]);
2952
2953     for (i = 0; i < s->nals_allocated; i++)
2954         av_freep(&s->nals[i].rbsp_buffer);
2955     av_freep(&s->nals);
2956     s->nals_allocated = 0;
2957
2958     return 0;
2959 }
2960
2961 static av_cold int hevc_init_context(AVCodecContext *avctx)
2962 {
2963     HEVCContext *s = avctx->priv_data;
2964     int i;
2965
2966     s->avctx = avctx;
2967
2968     s->tmp_frame = av_frame_alloc();
2969     if (!s->tmp_frame)
2970         goto fail;
2971
2972     s->output_frame = av_frame_alloc();
2973     if (!s->output_frame)
2974         goto fail;
2975
2976     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2977         s->DPB[i].frame = av_frame_alloc();
2978         if (!s->DPB[i].frame)
2979             goto fail;
2980         s->DPB[i].tf.f = s->DPB[i].frame;
2981     }
2982
2983     s->max_ra = INT_MAX;
2984
2985     s->md5_ctx = av_md5_alloc();
2986     if (!s->md5_ctx)
2987         goto fail;
2988
2989     ff_dsputil_init(&s->dsp, avctx);
2990
2991     s->context_initialized = 1;
2992
2993     return 0;
2994
2995 fail:
2996     hevc_decode_free(avctx);
2997     return AVERROR(ENOMEM);
2998 }
2999
3000 static int hevc_update_thread_context(AVCodecContext *dst,
3001                                       const AVCodecContext *src)
3002 {
3003     HEVCContext *s  = dst->priv_data;
3004     HEVCContext *s0 = src->priv_data;
3005     int i, ret;
3006
3007     if (!s->context_initialized) {
3008         ret = hevc_init_context(dst);
3009         if (ret < 0)
3010             return ret;
3011     }
3012
3013     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3014         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3015         if (s0->DPB[i].frame->buf[0]) {
3016             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3017             if (ret < 0)
3018                 return ret;
3019         }
3020     }
3021
3022     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3023         av_buffer_unref(&s->vps_list[i]);
3024         if (s0->vps_list[i]) {
3025             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3026             if (!s->vps_list[i])
3027                 return AVERROR(ENOMEM);
3028         }
3029     }
3030
3031     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3032         av_buffer_unref(&s->sps_list[i]);
3033         if (s0->sps_list[i]) {
3034             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3035             if (!s->sps_list[i])
3036                 return AVERROR(ENOMEM);
3037         }
3038     }
3039
3040     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3041         av_buffer_unref(&s->pps_list[i]);
3042         if (s0->pps_list[i]) {
3043             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3044             if (!s->pps_list[i])
3045                 return AVERROR(ENOMEM);
3046         }
3047     }
3048
3049     if (s->sps != s0->sps)
3050         ret = set_sps(s, s0->sps);
3051
3052     s->seq_decode = s0->seq_decode;
3053     s->seq_output = s0->seq_output;
3054     s->pocTid0    = s0->pocTid0;
3055     s->max_ra     = s0->max_ra;
3056
3057     s->is_nalff        = s0->is_nalff;
3058     s->nal_length_size = s0->nal_length_size;
3059
3060     if (s0->eos) {
3061         s->seq_decode = (s->seq_decode + 1) & 0xff;
3062         s->max_ra = INT_MAX;
3063     }
3064
3065     return 0;
3066 }
3067
3068 static int hevc_decode_extradata(HEVCContext *s)
3069 {
3070     AVCodecContext *avctx = s->avctx;
3071     GetByteContext gb;
3072     int ret;
3073
3074     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3075
3076     if (avctx->extradata_size > 3 &&
3077         (avctx->extradata[0] || avctx->extradata[1] ||
3078          avctx->extradata[2] > 1)) {
3079         /* It seems the extradata is encoded as hvcC format.
3080          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3081          * is finalized. When finalized, configurationVersion will be 1 and we
3082          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3083         int i, j, num_arrays, nal_len_size;
3084
3085         s->is_nalff = 1;
3086
3087         bytestream2_skip(&gb, 21);
3088         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3089         num_arrays   = bytestream2_get_byte(&gb);
3090
3091         /* nal units in the hvcC always have length coded with 2 bytes,
3092          * so put a fake nal_length_size = 2 while parsing them */
3093         s->nal_length_size = 2;
3094
3095         /* Decode nal units from hvcC. */
3096         for (i = 0; i < num_arrays; i++) {
3097             int type = bytestream2_get_byte(&gb) & 0x3f;
3098             int cnt  = bytestream2_get_be16(&gb);
3099
3100             for (j = 0; j < cnt; j++) {
3101                 // +2 for the nal size field
3102                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3103                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3104                     av_log(s->avctx, AV_LOG_ERROR,
3105                            "Invalid NAL unit size in extradata.\n");
3106                     return AVERROR_INVALIDDATA;
3107                 }
3108
3109                 ret = decode_nal_units(s, gb.buffer, nalsize);
3110                 if (ret < 0) {
3111                     av_log(avctx, AV_LOG_ERROR,
3112                            "Decoding nal unit %d %d from hvcC failed\n",
3113                            type, i);
3114                     return ret;
3115                 }
3116                 bytestream2_skip(&gb, nalsize);
3117             }
3118         }
3119
3120         /* Now store right nal length size, that will be used to parse
3121          * all other nals */
3122         s->nal_length_size = nal_len_size;
3123     } else {
3124         s->is_nalff = 0;
3125         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3126         if (ret < 0)
3127             return ret;
3128     }
3129     return 0;
3130 }
3131
3132 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3133 {
3134     HEVCContext *s = avctx->priv_data;
3135     int ret;
3136
3137     ff_init_cabac_states();
3138
3139     avctx->internal->allocate_progress = 1;
3140
3141     ret = hevc_init_context(avctx);
3142     if (ret < 0)
3143         return ret;
3144
3145     if (avctx->extradata_size > 0 && avctx->extradata) {
3146         ret = hevc_decode_extradata(s);
3147         if (ret < 0) {
3148             hevc_decode_free(avctx);
3149             return ret;
3150         }
3151     }
3152
3153     return 0;
3154 }
3155
3156 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3157 {
3158     HEVCContext *s = avctx->priv_data;
3159     int ret;
3160
3161     memset(s, 0, sizeof(*s));
3162
3163     ret = hevc_init_context(avctx);
3164     if (ret < 0)
3165         return ret;
3166
3167     return 0;
3168 }
3169
3170 static void hevc_decode_flush(AVCodecContext *avctx)
3171 {
3172     HEVCContext *s = avctx->priv_data;
3173     ff_hevc_flush_dpb(s);
3174     s->max_ra = INT_MAX;
3175 }
3176
3177 #define OFFSET(x) offsetof(HEVCContext, x)
3178 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3179
3180 static const AVProfile profiles[] = {
3181     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3182     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3183     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3184     { FF_PROFILE_UNKNOWN },
3185 };
3186
3187 static const AVOption options[] = {
3188     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3189         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3190     { NULL },
3191 };
3192
3193 static const AVClass hevc_decoder_class = {
3194     .class_name = "HEVC decoder",
3195     .item_name  = av_default_item_name,
3196     .option     = options,
3197     .version    = LIBAVUTIL_VERSION_INT,
3198 };
3199
3200 AVCodec ff_hevc_decoder = {
3201     .name                  = "hevc",
3202     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3203     .type                  = AVMEDIA_TYPE_VIDEO,
3204     .id                    = AV_CODEC_ID_HEVC,
3205     .priv_data_size        = sizeof(HEVCContext),
3206     .priv_class            = &hevc_decoder_class,
3207     .init                  = hevc_decode_init,
3208     .close                 = hevc_decode_free,
3209     .decode                = hevc_decode_frame,
3210     .flush                 = hevc_decode_flush,
3211     .update_thread_context = hevc_update_thread_context,
3212     .init_thread_copy      = hevc_init_thread_copy,
3213     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3214                              CODEC_CAP_FRAME_THREADS,
3215     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3216 };