git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/stereo3d.h"
  33
  34 #include "bytestream.h"
  35 #include "cabac_functions.h"
  36 #include "dsputil.h"
  37 #include "golomb.h"
  38 #include "hevc.h"
  39
  40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  43
  44 static const uint8_t scan_1x1[1] = { 0 };
  45
  46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  47
  48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  49
  50 static const uint8_t horiz_scan4x4_x[16] = {
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55 };
  56
  57 static const uint8_t horiz_scan4x4_y[16] = {
  58     0, 0, 0, 0,
  59     1, 1, 1, 1,
  60     2, 2, 2, 2,
  61     3, 3, 3, 3,
  62 };
  63
  64 static const uint8_t horiz_scan8x8_inv[8][8] = {
  65     {  0,  1,  2,  3, 16, 17, 18, 19, },
  66     {  4,  5,  6,  7, 20, 21, 22, 23, },
  67     {  8,  9, 10, 11, 24, 25, 26, 27, },
  68     { 12, 13, 14, 15, 28, 29, 30, 31, },
  69     { 32, 33, 34, 35, 48, 49, 50, 51, },
  70     { 36, 37, 38, 39, 52, 53, 54, 55, },
  71     { 40, 41, 42, 43, 56, 57, 58, 59, },
  72     { 44, 45, 46, 47, 60, 61, 62, 63, },
  73 };
  74
  75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  76
  77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  78
  79 static const uint8_t diag_scan2x2_inv[2][2] = {
  80     { 0, 2, },
  81     { 1, 3, },
  82 };
  83
  84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  85     0, 0, 1, 0,
  86     1, 2, 0, 1,
  87     2, 3, 1, 2,
  88     3, 2, 3, 3,
  89 };
  90
  91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  92     0, 1, 0, 2,
  93     1, 0, 3, 2,
  94     1, 0, 3, 2,
  95     1, 3, 2, 3,
  96 };
  97
  98 static const uint8_t diag_scan4x4_inv[4][4] = {
  99     { 0,  2,  5,  9, },
 100     { 1,  4,  8, 12, },
 101     { 3,  7, 11, 14, },
 102     { 6, 10, 13, 15, },
 103 };
 104
 105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 106     0, 0, 1, 0,
 107     1, 2, 0, 1,
 108     2, 3, 0, 1,
 109     2, 3, 4, 0,
 110     1, 2, 3, 4,
 111     5, 0, 1, 2,
 112     3, 4, 5, 6,
 113     0, 1, 2, 3,
 114     4, 5, 6, 7,
 115     1, 2, 3, 4,
 116     5, 6, 7, 2,
 117     3, 4, 5, 6,
 118     7, 3, 4, 5,
 119     6, 7, 4, 5,
 120     6, 7, 5, 6,
 121     7, 6, 7, 7,
 122 };
 123
 124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 125     0, 1, 0, 2,
 126     1, 0, 3, 2,
 127     1, 0, 4, 3,
 128     2, 1, 0, 5,
 129     4, 3, 2, 1,
 130     0, 6, 5, 4,
 131     3, 2, 1, 0,
 132     7, 6, 5, 4,
 133     3, 2, 1, 0,
 134     7, 6, 5, 4,
 135     3, 2, 1, 7,
 136     6, 5, 4, 3,
 137     2, 7, 6, 5,
 138     4, 3, 7, 6,
 139     5, 4, 7, 6,
 140     5, 7, 6, 7,
 141 };
 142
 143 static const uint8_t diag_scan8x8_inv[8][8] = {
 144     {  0,  2,  5,  9, 14, 20, 27, 35, },
 145     {  1,  4,  8, 13, 19, 26, 34, 42, },
 146     {  3,  7, 12, 18, 25, 33, 41, 48, },
 147     {  6, 11, 17, 24, 32, 40, 47, 53, },
 148     { 10, 16, 23, 31, 39, 46, 52, 57, },
 149     { 15, 22, 30, 38, 45, 51, 56, 60, },
 150     { 21, 29, 37, 44, 50, 55, 59, 62, },
 151     { 28, 36, 43, 49, 54, 58, 61, 63, },
 152 };
 153
 154 /**
 155  * NOTE: Each function hls_foo correspond to the function foo in the
 156  * specification (HLS stands for High Level Syntax).
 157  */
 158
 159 /**
 160  * Section 5.7
 161  */
 162
 163 /* free everything allocated  by pic_arrays_init() */
 164 static void pic_arrays_free(HEVCContext *s)
 165 {
 166     av_freep(&s->sao);
 167     av_freep(&s->deblock);
 168     av_freep(&s->split_cu_flag);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size         = width * height;
 195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 196                            ((height >> log2_min_cb_size) + 1);
 197     int ctb_count        = sps->ctb_width * sps->ctb_height;
 198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 199
 200     s->bs_width  = width  >> 3;
 201     s->bs_height = height >> 3;
 202
 203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 205     s->split_cu_flag = av_malloc(pic_size);
 206     if (!s->sao || !s->deblock || !s->split_cu_flag)
 207         goto fail;
 208
 209     s->skip_flag    = av_malloc(pic_size_in_ctb);
 210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 211     if (!s->skip_flag || !s->tab_ct_depth)
 212         goto fail;
 213
 214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 215     s->tab_ipm  = av_malloc(min_pu_size);
 216     s->is_pcm   = av_malloc(min_pu_size);
 217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 218         goto fail;
 219
 220     s->filter_slice_edges = av_malloc(ctb_count);
 221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->tab_slice_address));
 223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 224                                       sizeof(*s->qp_y_tab));
 225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 226         goto fail;
 227
 228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     if (!s->horizontal_bs || !s->vertical_bs)
 231         goto fail;
 232
 233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 234                                           av_buffer_alloc);
 235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 236                                           av_buffer_allocz);
 237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 238         goto fail;
 239
 240     return 0;
 241
 242 fail:
 243     pic_arrays_free(s);
 244     return AVERROR(ENOMEM);
 245 }
 246
 247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 248 {
 249     int i = 0;
 250     int j = 0;
 251     uint8_t luma_weight_l0_flag[16];
 252     uint8_t chroma_weight_l0_flag[16];
 253     uint8_t luma_weight_l1_flag[16];
 254     uint8_t chroma_weight_l1_flag[16];
 255
 256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 257     if (s->sps->chroma_format_idc != 0) {
 258         int delta = get_se_golomb(gb);
 259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 260     }
 261
 262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 263         luma_weight_l0_flag[i] = get_bits1(gb);
 264         if (!luma_weight_l0_flag[i]) {
 265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 266             s->sh.luma_offset_l0[i] = 0;
 267         }
 268     }
 269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 271             chroma_weight_l0_flag[i] = get_bits1(gb);
 272     } else {
 273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 274             chroma_weight_l0_flag[i] = 0;
 275     }
 276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 277         if (luma_weight_l0_flag[i]) {
 278             int delta_luma_weight_l0 = get_se_golomb(gb);
 279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 281         }
 282         if (chroma_weight_l0_flag[i]) {
 283             for (j = 0; j < 2; j++) {
 284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 289             }
 290         } else {
 291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][0] = 0;
 293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 294             s->sh.chroma_offset_l0[i][1] = 0;
 295         }
 296     }
 297     if (s->sh.slice_type == B_SLICE) {
 298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 299             luma_weight_l1_flag[i] = get_bits1(gb);
 300             if (!luma_weight_l1_flag[i]) {
 301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 302                 s->sh.luma_offset_l1[i] = 0;
 303             }
 304         }
 305         if (s->sps->chroma_format_idc != 0) {
 306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 307                 chroma_weight_l1_flag[i] = get_bits1(gb);
 308         } else {
 309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 310                 chroma_weight_l1_flag[i] = 0;
 311         }
 312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 313             if (luma_weight_l1_flag[i]) {
 314                 int delta_luma_weight_l1 = get_se_golomb(gb);
 315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 317             }
 318             if (chroma_weight_l1_flag[i]) {
 319                 for (j = 0; j < 2; j++) {
 320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 325                 }
 326             } else {
 327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][0] = 0;
 329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 330                 s->sh.chroma_offset_l1[i][1] = 0;
 331             }
 332         }
 333     }
 334 }
 335
 336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 337 {
 338     const HEVCSPS *sps = s->sps;
 339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 340     int prev_delta_msb = 0;
 341     unsigned int nb_sps = 0, nb_sh;
 342     int i;
 343
 344     rps->nb_refs = 0;
 345     if (!sps->long_term_ref_pics_present_flag)
 346         return 0;
 347
 348     if (sps->num_long_term_ref_pics_sps > 0)
 349         nb_sps = get_ue_golomb_long(gb);
 350     nb_sh = get_ue_golomb_long(gb);
 351
 352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 353         return AVERROR_INVALIDDATA;
 354
 355     rps->nb_refs = nb_sh + nb_sps;
 356
 357     for (i = 0; i < rps->nb_refs; i++) {
 358         uint8_t delta_poc_msb_present;
 359
 360         if (i < nb_sps) {
 361             uint8_t lt_idx_sps = 0;
 362
 363             if (sps->num_long_term_ref_pics_sps > 1)
 364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 365
 366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 368         } else {
 369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 370             rps->used[i] = get_bits1(gb);
 371         }
 372
 373         delta_poc_msb_present = get_bits1(gb);
 374         if (delta_poc_msb_present) {
 375             int delta = get_ue_golomb_long(gb);
 376
 377             if (i && i != nb_sps)
 378                 delta += prev_delta_msb;
 379
 380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 381             prev_delta_msb = delta;
 382         }
 383     }
 384
 385     return 0;
 386 }
 387
 388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 389 {
 390     int ret;
 391     int num = 0, den = 0;
 392
 393     pic_arrays_free(s);
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     s->avctx->coded_width         = sps->width;
 399     s->avctx->coded_height        = sps->height;
 400     s->avctx->width               = sps->output_width;
 401     s->avctx->height              = sps->output_height;
 402     s->avctx->pix_fmt             = sps->pix_fmt;
 403     s->avctx->sample_aspect_ratio = sps->vui.sar;
 404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 405
 406     if (sps->vui.video_signal_type_present_flag)
 407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 408                                                                : AVCOL_RANGE_MPEG;
 409     else
 410         s->avctx->color_range = AVCOL_RANGE_MPEG;
 411
 412     if (sps->vui.colour_description_present_flag) {
 413         s->avctx->color_primaries = sps->vui.colour_primaries;
 414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 416     } else {
 417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 420     }
 421
 422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 425
 426     if (sps->sao_enabled) {
 427         av_frame_unref(s->tmp_frame);
 428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 429         if (ret < 0)
 430             goto fail;
 431         s->frame = s->tmp_frame;
 432     }
 433
 434     s->sps = sps;
 435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 436
 437     if (s->vps->vps_timing_info_present_flag) {
 438         num = s->vps->vps_num_units_in_tick;
 439         den = s->vps->vps_time_scale;
 440     } else if (sps->vui.vui_timing_info_present_flag) {
 441         num = sps->vui.vui_num_units_in_tick;
 442         den = sps->vui.vui_time_scale;
 443     }
 444
 445     if (num != 0 && den != 0)
 446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 447                   num, den, 1 << 30);
 448
 449     return 0;
 450
 451 fail:
 452     pic_arrays_free(s);
 453     s->sps = NULL;
 454     return ret;
 455 }
 456
 457 static int hls_slice_header(HEVCContext *s)
 458 {
 459     GetBitContext *gb = &s->HEVClc.gb;
 460     SliceHeader *sh   = &s->sh;
 461     int i, ret;
 462
 463     // Coded parameters
 464     sh->first_slice_in_pic_flag = get_bits1(gb);
 465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 466         s->seq_decode = (s->seq_decode + 1) & 0xff;
 467         s->max_ra     = INT_MAX;
 468         if (IS_IDR(s))
 469             ff_hevc_clear_refs(s);
 470     }
 471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 473
 474     sh->pps_id = get_ue_golomb_long(gb);
 475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 477         return AVERROR_INVALIDDATA;
 478     }
 479     if (!sh->first_slice_in_pic_flag &&
 480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 482         return AVERROR_INVALIDDATA;
 483     }
 484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 485
 486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 488
 489         ff_hevc_clear_refs(s);
 490         ret = set_sps(s, s->sps);
 491         if (ret < 0)
 492             return ret;
 493
 494         s->seq_decode = (s->seq_decode + 1) & 0xff;
 495         s->max_ra     = INT_MAX;
 496     }
 497
 498     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 499     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 500
 501     sh->dependent_slice_segment_flag = 0;
 502     if (!sh->first_slice_in_pic_flag) {
 503         int slice_address_length;
 504
 505         if (s->pps->dependent_slice_segments_enabled_flag)
 506             sh->dependent_slice_segment_flag = get_bits1(gb);
 507
 508         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 509                                             s->sps->ctb_height);
 510         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 511         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 512             av_log(s->avctx, AV_LOG_ERROR,
 513                    "Invalid slice segment address: %u.\n",
 514                    sh->slice_segment_addr);
 515             return AVERROR_INVALIDDATA;
 516         }
 517
 518         if (!sh->dependent_slice_segment_flag) {
 519             sh->slice_addr = sh->slice_segment_addr;
 520             s->slice_idx++;
 521         }
 522     } else {
 523         sh->slice_segment_addr = sh->slice_addr = 0;
 524         s->slice_idx           = 0;
 525         s->slice_initialized   = 0;
 526     }
 527
 528     if (!sh->dependent_slice_segment_flag) {
 529         s->slice_initialized = 0;
 530
 531         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 532             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 533
 534         sh->slice_type = get_ue_golomb_long(gb);
 535         if (!(sh->slice_type == I_SLICE ||
 536               sh->slice_type == P_SLICE ||
 537               sh->slice_type == B_SLICE)) {
 538             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 539                    sh->slice_type);
 540             return AVERROR_INVALIDDATA;
 541         }
 542         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 543             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 544             return AVERROR_INVALIDDATA;
 545         }
 546
 547         if (s->pps->output_flag_present_flag)
 548             sh->pic_output_flag = get_bits1(gb);
 549
 550         if (s->sps->separate_colour_plane_flag)
 551             sh->colour_plane_id = get_bits(gb, 2);
 552
 553         if (!IS_IDR(s)) {
 554             int short_term_ref_pic_set_sps_flag, poc;
 555
 556             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 557             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 558             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 559                 av_log(s->avctx, AV_LOG_WARNING,
 560                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 561                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 562                     return AVERROR_INVALIDDATA;
 563                 poc = s->poc;
 564             }
 565             s->poc = poc;
 566
 567             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 568             if (!short_term_ref_pic_set_sps_flag) {
 569                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 570                 if (ret < 0)
 571                     return ret;
 572
 573                 sh->short_term_rps = &sh->slice_rps;
 574             } else {
 575                 int numbits, rps_idx;
 576
 577                 if (!s->sps->nb_st_rps) {
 578                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 579                     return AVERROR_INVALIDDATA;
 580                 }
 581
 582                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 583                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 584                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 585             }
 586
 587             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 588             if (ret < 0) {
 589                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 590                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 591                     return AVERROR_INVALIDDATA;
 592             }
 593
 594             if (s->sps->sps_temporal_mvp_enabled_flag)
 595                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 596             else
 597                 sh->slice_temporal_mvp_enabled_flag = 0;
 598         } else {
 599             s->sh.short_term_rps = NULL;
 600             s->poc               = 0;
 601         }
 602
 603         /* 8.3.1 */
 604         if (s->temporal_id == 0 &&
 605             s->nal_unit_type != NAL_TRAIL_N &&
 606             s->nal_unit_type != NAL_TSA_N   &&
 607             s->nal_unit_type != NAL_STSA_N  &&
 608             s->nal_unit_type != NAL_RADL_N  &&
 609             s->nal_unit_type != NAL_RADL_R  &&
 610             s->nal_unit_type != NAL_RASL_N  &&
 611             s->nal_unit_type != NAL_RASL_R)
 612             s->pocTid0 = s->poc;
 613
 614         if (s->sps->sao_enabled) {
 615             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 616             sh->slice_sample_adaptive_offset_flag[1] =
 617             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 618         } else {
 619             sh->slice_sample_adaptive_offset_flag[0] = 0;
 620             sh->slice_sample_adaptive_offset_flag[1] = 0;
 621             sh->slice_sample_adaptive_offset_flag[2] = 0;
 622         }
 623
 624         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 625         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 626             int nb_refs;
 627
 628             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 629             if (sh->slice_type == B_SLICE)
 630                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 631
 632             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 633                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 634                 if (sh->slice_type == B_SLICE)
 635                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 636             }
 637             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 638                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 639                        sh->nb_refs[L0], sh->nb_refs[L1]);
 640                 return AVERROR_INVALIDDATA;
 641             }
 642
 643             sh->rpl_modification_flag[0] = 0;
 644             sh->rpl_modification_flag[1] = 0;
 645             nb_refs = ff_hevc_frame_nb_refs(s);
 646             if (!nb_refs) {
 647                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 648                 return AVERROR_INVALIDDATA;
 649             }
 650
 651             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 652                 sh->rpl_modification_flag[0] = get_bits1(gb);
 653                 if (sh->rpl_modification_flag[0]) {
 654                     for (i = 0; i < sh->nb_refs[L0]; i++)
 655                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 656                 }
 657
 658                 if (sh->slice_type == B_SLICE) {
 659                     sh->rpl_modification_flag[1] = get_bits1(gb);
 660                     if (sh->rpl_modification_flag[1] == 1)
 661                         for (i = 0; i < sh->nb_refs[L1]; i++)
 662                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 663                 }
 664             }
 665
 666             if (sh->slice_type == B_SLICE)
 667                 sh->mvd_l1_zero_flag = get_bits1(gb);
 668
 669             if (s->pps->cabac_init_present_flag)
 670                 sh->cabac_init_flag = get_bits1(gb);
 671             else
 672                 sh->cabac_init_flag = 0;
 673
 674             sh->collocated_ref_idx = 0;
 675             if (sh->slice_temporal_mvp_enabled_flag) {
 676                 sh->collocated_list = L0;
 677                 if (sh->slice_type == B_SLICE)
 678                     sh->collocated_list = !get_bits1(gb);
 679
 680                 if (sh->nb_refs[sh->collocated_list] > 1) {
 681                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 682                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 683                         av_log(s->avctx, AV_LOG_ERROR,
 684                                "Invalid collocated_ref_idx: %d.\n",
 685                                sh->collocated_ref_idx);
 686                         return AVERROR_INVALIDDATA;
 687                     }
 688                 }
 689             }
 690
 691             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 692                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 693                 pred_weight_table(s, gb);
 694             }
 695
 696             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 697             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 698                 av_log(s->avctx, AV_LOG_ERROR,
 699                        "Invalid number of merging MVP candidates: %d.\n",
 700                        sh->max_num_merge_cand);
 701                 return AVERROR_INVALIDDATA;
 702             }
 703         }
 704
 705         sh->slice_qp_delta = get_se_golomb(gb);
 706
 707         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 708             sh->slice_cb_qp_offset = get_se_golomb(gb);
 709             sh->slice_cr_qp_offset = get_se_golomb(gb);
 710         } else {
 711             sh->slice_cb_qp_offset = 0;
 712             sh->slice_cr_qp_offset = 0;
 713         }
 714
 715         if (s->pps->deblocking_filter_control_present_flag) {
 716             int deblocking_filter_override_flag = 0;
 717
 718             if (s->pps->deblocking_filter_override_enabled_flag)
 719                 deblocking_filter_override_flag = get_bits1(gb);
 720
 721             if (deblocking_filter_override_flag) {
 722                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 723                 if (!sh->disable_deblocking_filter_flag) {
 724                     sh->beta_offset = get_se_golomb(gb) * 2;
 725                     sh->tc_offset   = get_se_golomb(gb) * 2;
 726                 }
 727             } else {
 728                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 729                 sh->beta_offset                    = s->pps->beta_offset;
 730                 sh->tc_offset                      = s->pps->tc_offset;
 731             }
 732         } else {
 733             sh->disable_deblocking_filter_flag = 0;
 734             sh->beta_offset                    = 0;
 735             sh->tc_offset                      = 0;
 736         }
 737
 738         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 739             (sh->slice_sample_adaptive_offset_flag[0] ||
 740              sh->slice_sample_adaptive_offset_flag[1] ||
 741              !sh->disable_deblocking_filter_flag)) {
 742             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 743         } else {
 744             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 745         }
 746     } else if (!s->slice_initialized) {
 747         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 748         return AVERROR_INVALIDDATA;
 749     }
 750
 751     sh->num_entry_point_offsets = 0;
 752     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 753         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 754         if (sh->num_entry_point_offsets > 0) {
 755             int offset_len = get_ue_golomb_long(gb) + 1;
 756
 757             for (i = 0; i < sh->num_entry_point_offsets; i++)
 758                 skip_bits(gb, offset_len);
 759         }
 760     }
 761
 762     if (s->pps->slice_header_extension_present_flag) {
 763         unsigned int length = get_ue_golomb_long(gb);
 764         for (i = 0; i < length; i++)
 765             skip_bits(gb, 8);  // slice_header_extension_data_byte
 766     }
 767
 768     // Inferred parameters
 769     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 770     if (sh->slice_qp > 51 ||
 771         sh->slice_qp < -s->sps->qp_bd_offset) {
 772         av_log(s->avctx, AV_LOG_ERROR,
 773                "The slice_qp %d is outside the valid range "
 774                "[%d, 51].\n",
 775                sh->slice_qp,
 776                -s->sps->qp_bd_offset);
 777         return AVERROR_INVALIDDATA;
 778     }
 779
 780     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 781
 782     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 783
 784     if (!s->pps->cu_qp_delta_enabled_flag)
 785         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 786                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 787
 788     s->slice_initialized = 1;
 789
 790     return 0;
 791 }
 792
 793 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 794
 795 #define SET_SAO(elem, value)                            \
 796 do {                                                    \
 797     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 798         sao->elem = value;                              \
 799     else if (sao_merge_left_flag)                       \
 800         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 801     else if (sao_merge_up_flag)                         \
 802         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 803     else                                                \
 804         sao->elem = 0;                                  \
 805 } while (0)
 806
 807 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 808 {
 809     HEVCLocalContext *lc    = &s->HEVClc;
 810     int sao_merge_left_flag = 0;
 811     int sao_merge_up_flag   = 0;
 812     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 813     SAOParams *sao          = &CTB(s->sao, rx, ry);
 814     int c_idx, i;
 815
 816     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 817         s->sh.slice_sample_adaptive_offset_flag[1]) {
 818         if (rx > 0) {
 819             if (lc->ctb_left_flag)
 820                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 821         }
 822         if (ry > 0 && !sao_merge_left_flag) {
 823             if (lc->ctb_up_flag)
 824                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 825         }
 826     }
 827
 828     for (c_idx = 0; c_idx < 3; c_idx++) {
 829         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 830             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 831             continue;
 832         }
 833
 834         if (c_idx == 2) {
 835             sao->type_idx[2] = sao->type_idx[1];
 836             sao->eo_class[2] = sao->eo_class[1];
 837         } else {
 838             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 839         }
 840
 841         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 842             continue;
 843
 844         for (i = 0; i < 4; i++)
 845             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 846
 847         if (sao->type_idx[c_idx] == SAO_BAND) {
 848             for (i = 0; i < 4; i++) {
 849                 if (sao->offset_abs[c_idx][i]) {
 850                     SET_SAO(offset_sign[c_idx][i],
 851                             ff_hevc_sao_offset_sign_decode(s));
 852                 } else {
 853                     sao->offset_sign[c_idx][i] = 0;
 854                 }
 855             }
 856             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 857         } else if (c_idx != 2) {
 858             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 859         }
 860
 861         // Inferred parameters
 862         sao->offset_val[c_idx][0] = 0;
 863         for (i = 0; i < 4; i++) {
 864             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 865             if (sao->type_idx[c_idx] == SAO_EDGE) {
 866                 if (i > 1)
 867                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 868             } else if (sao->offset_sign[c_idx][i]) {
 869                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 870             }
 871         }
 872     }
 873 }
 874
 875 #undef SET_SAO
 876 #undef CTB
 877
 878 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 879                                 int log2_trafo_size, enum ScanType scan_idx,
 880                                 int c_idx)
 881 {
 882 #define GET_COORD(offset, n)                                    \
 883     do {                                                        \
 884         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 885         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 886     } while (0)
 887     HEVCLocalContext *lc    = &s->HEVClc;
 888     int transform_skip_flag = 0;
 889
 890     int last_significant_coeff_x, last_significant_coeff_y;
 891     int last_scan_pos;
 892     int n_end;
 893     int num_coeff    = 0;
 894     int greater1_ctx = 1;
 895
 896     int num_last_subset;
 897     int x_cg_last_sig, y_cg_last_sig;
 898
 899     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 900
 901     ptrdiff_t stride = s->frame->linesize[c_idx];
 902     int hshift       = s->sps->hshift[c_idx];
 903     int vshift       = s->sps->vshift[c_idx];
 904     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 905                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 906     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 907     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 908
 909     int trafo_size = 1 << log2_trafo_size;
 910     int i, qp, shift, add, scale, scale_m;
 911     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 912     const uint8_t *scale_matrix;
 913     uint8_t dc_scale;
 914
 915     // Derive QP for dequant
 916     if (!lc->cu.cu_transquant_bypass_flag) {
 917         static const int qp_c[] = {
 918             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 919         };
 920
 921         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 922             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 923             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 924             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 925         };
 926
 927         static const uint8_t div6[51 + 2 * 6 + 1] = {
 928             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 929             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 930             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 931         };
 932         int qp_y = lc->qp_y;
 933
 934         if (c_idx == 0) {
 935             qp = qp_y + s->sps->qp_bd_offset;
 936         } else {
 937             int qp_i, offset;
 938
 939             if (c_idx == 1)
 940                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 941             else
 942                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 943
 944             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 945             if (qp_i < 30)
 946                 qp = qp_i;
 947             else if (qp_i > 43)
 948                 qp = qp_i - 6;
 949             else
 950                 qp = qp_c[qp_i - 30];
 951
 952             qp += s->sps->qp_bd_offset;
 953         }
 954
 955         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 956         add      = 1 << (shift - 1);
 957         scale    = level_scale[rem6[qp]] << (div6[qp]);
 958         scale_m  = 16; // default when no custom scaling lists.
 959         dc_scale = 16;
 960
 961         if (s->sps->scaling_list_enable_flag) {
 962             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 963                                     &s->pps->scaling_list : &s->sps->scaling_list;
 964             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 965
 966             if (log2_trafo_size != 5)
 967                 matrix_id = 3 * matrix_id + c_idx;
 968
 969             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 970             if (log2_trafo_size >= 4)
 971                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 972         }
 973     }
 974
 975     if (s->pps->transform_skip_enabled_flag &&
 976         !lc->cu.cu_transquant_bypass_flag   &&
 977         log2_trafo_size == 2) {
 978         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 979     }
 980
 981     last_significant_coeff_x =
 982         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 983     last_significant_coeff_y =
 984         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 985
 986     if (last_significant_coeff_x > 3) {
 987         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 988         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 989                                    (2 + (last_significant_coeff_x & 1)) +
 990                                    suffix;
 991     }
 992
 993     if (last_significant_coeff_y > 3) {
 994         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 995         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 996                                    (2 + (last_significant_coeff_y & 1)) +
 997                                    suffix;
 998     }
 999
1000     if (scan_idx == SCAN_VERT)
1001         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1002
1003     x_cg_last_sig = last_significant_coeff_x >> 2;
1004     y_cg_last_sig = last_significant_coeff_y >> 2;
1005
1006     switch (scan_idx) {
1007     case SCAN_DIAG: {
1008         int last_x_c = last_significant_coeff_x & 3;
1009         int last_y_c = last_significant_coeff_y & 3;
1010
1011         scan_x_off = ff_hevc_diag_scan4x4_x;
1012         scan_y_off = ff_hevc_diag_scan4x4_y;
1013         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1014         if (trafo_size == 4) {
1015             scan_x_cg = scan_1x1;
1016             scan_y_cg = scan_1x1;
1017         } else if (trafo_size == 8) {
1018             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1019             scan_x_cg  = diag_scan2x2_x;
1020             scan_y_cg  = diag_scan2x2_y;
1021         } else if (trafo_size == 16) {
1022             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1023             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1024             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1025         } else { // trafo_size == 32
1026             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1027             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1028             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1029         }
1030         break;
1031     }
1032     case SCAN_HORIZ:
1033         scan_x_cg  = horiz_scan2x2_x;
1034         scan_y_cg  = horiz_scan2x2_y;
1035         scan_x_off = horiz_scan4x4_x;
1036         scan_y_off = horiz_scan4x4_y;
1037         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1038         break;
1039     default: //SCAN_VERT
1040         scan_x_cg  = horiz_scan2x2_y;
1041         scan_y_cg  = horiz_scan2x2_x;
1042         scan_x_off = horiz_scan4x4_y;
1043         scan_y_off = horiz_scan4x4_x;
1044         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1045         break;
1046     }
1047     num_coeff++;
1048     num_last_subset = (num_coeff - 1) >> 4;
1049
1050     for (i = num_last_subset; i >= 0; i--) {
1051         int n, m;
1052         int x_cg, y_cg, x_c, y_c;
1053         int implicit_non_zero_coeff = 0;
1054         int64_t trans_coeff_level;
1055         int prev_sig = 0;
1056         int offset   = i << 4;
1057
1058         uint8_t significant_coeff_flag_idx[16];
1059         uint8_t nb_significant_coeff_flag = 0;
1060
1061         x_cg = scan_x_cg[i];
1062         y_cg = scan_y_cg[i];
1063
1064         if (i < num_last_subset && i > 0) {
1065             int ctx_cg = 0;
1066             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1067                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1068             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1069                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1070
1071             significant_coeff_group_flag[x_cg][y_cg] =
1072                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1073             implicit_non_zero_coeff = 1;
1074         } else {
1075             significant_coeff_group_flag[x_cg][y_cg] =
1076                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1077                  (x_cg == 0 && y_cg == 0));
1078         }
1079
1080         last_scan_pos = num_coeff - offset - 1;
1081
1082         if (i == num_last_subset) {
1083             n_end                         = last_scan_pos - 1;
1084             significant_coeff_flag_idx[0] = last_scan_pos;
1085             nb_significant_coeff_flag     = 1;
1086         } else {
1087             n_end = 15;
1088         }
1089
1090         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1091             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1092         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1093             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1094
1095         for (n = n_end; n >= 0; n--) {
1096             GET_COORD(offset, n);
1097
1098             if (significant_coeff_group_flag[x_cg][y_cg] &&
1099                 (n > 0 || implicit_non_zero_coeff == 0)) {
1100                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1101                                                           log2_trafo_size,
1102                                                           scan_idx,
1103                                                           prev_sig) == 1) {
1104                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1105                     nb_significant_coeff_flag++;
1106                     implicit_non_zero_coeff = 0;
1107                 }
1108             } else {
1109                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1110                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1111                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1112                     nb_significant_coeff_flag++;
1113                 }
1114             }
1115         }
1116
1117         n_end = nb_significant_coeff_flag;
1118
1119         if (n_end) {
1120             int first_nz_pos_in_cg = 16;
1121             int last_nz_pos_in_cg = -1;
1122             int c_rice_param = 0;
1123             int first_greater1_coeff_idx = -1;
1124             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1125             uint16_t coeff_sign_flag;
1126             int sum_abs = 0;
1127             int sign_hidden = 0;
1128
1129             // initialize first elem of coeff_bas_level_greater1_flag
1130             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1131
1132             if (!(i == num_last_subset) && greater1_ctx == 0)
1133                 ctx_set++;
1134             greater1_ctx      = 1;
1135             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1136
1137             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1138                 int n_idx = significant_coeff_flag_idx[m];
1139                 int inc   = (ctx_set << 2) + greater1_ctx;
1140                 coeff_abs_level_greater1_flag[n_idx] =
1141                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1142                 if (coeff_abs_level_greater1_flag[n_idx]) {
1143                     greater1_ctx = 0;
1144                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1145                     greater1_ctx++;
1146                 }
1147
1148                 if (coeff_abs_level_greater1_flag[n_idx] &&
1149                     first_greater1_coeff_idx == -1)
1150                     first_greater1_coeff_idx = n_idx;
1151             }
1152             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1153             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1154                                  !lc->cu.cu_transquant_bypass_flag;
1155
1156             if (first_greater1_coeff_idx != -1) {
1157                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1158             }
1159             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1160                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1161             } else {
1162                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1163             }
1164
1165             for (m = 0; m < n_end; m++) {
1166                 n = significant_coeff_flag_idx[m];
1167                 GET_COORD(offset, n);
1168                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1169                 if (trans_coeff_level == ((m < 8) ?
1170                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1171                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1172
1173                     trans_coeff_level += last_coeff_abs_level_remaining;
1174                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1175                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1176                 }
1177                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1178                     sum_abs += trans_coeff_level;
1179                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1180                         trans_coeff_level = -trans_coeff_level;
1181                 }
1182                 if (coeff_sign_flag >> 15)
1183                     trans_coeff_level = -trans_coeff_level;
1184                 coeff_sign_flag <<= 1;
1185                 if (!lc->cu.cu_transquant_bypass_flag) {
1186                     if (s->sps->scaling_list_enable_flag) {
1187                         if (y_c || x_c || log2_trafo_size < 4) {
1188                             int pos;
1189                             switch (log2_trafo_size) {
1190                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1191                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1192                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1193                             default: pos = (y_c        << 2) +  x_c;
1194                             }
1195                             scale_m = scale_matrix[pos];
1196                         } else {
1197                             scale_m = dc_scale;
1198                         }
1199                     }
1200                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1201                     if(trans_coeff_level < 0) {
1202                         if((~trans_coeff_level) & 0xFffffffffff8000)
1203                             trans_coeff_level = -32768;
1204                     } else {
1205                         if (trans_coeff_level & 0xffffffffffff8000)
1206                             trans_coeff_level = 32767;
1207                     }
1208                 }
1209                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1210             }
1211         }
1212     }
1213
1214     if (lc->cu.cu_transquant_bypass_flag) {
1215         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1216     } else {
1217         if (transform_skip_flag)
1218             s->hevcdsp.transform_skip(dst, coeffs, stride);
1219         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1220                  log2_trafo_size == 2)
1221             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1222         else
1223             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1224     }
1225 }
1226
1227 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1228                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1229                               int log2_cb_size, int log2_trafo_size,
1230                               int trafo_depth, int blk_idx)
1231 {
1232     HEVCLocalContext *lc = &s->HEVClc;
1233
1234     if (lc->cu.pred_mode == MODE_INTRA) {
1235         int trafo_size = 1 << log2_trafo_size;
1236         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1237
1238         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1239         if (log2_trafo_size > 2) {
1240             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1241             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1242             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1243             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1244         } else if (blk_idx == 3) {
1245             trafo_size = trafo_size << s->sps->hshift[1];
1246             ff_hevc_set_neighbour_available(s, xBase, yBase,
1247                                             trafo_size, trafo_size);
1248             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1249             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1250         }
1251     }
1252
1253     if (lc->tt.cbf_luma ||
1254         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1255         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1256         int scan_idx   = SCAN_DIAG;
1257         int scan_idx_c = SCAN_DIAG;
1258
1259         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1260             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1261             if (lc->tu.cu_qp_delta != 0)
1262                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1263                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1264             lc->tu.is_cu_qp_delta_coded = 1;
1265
1266             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1267                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1268                 av_log(s->avctx, AV_LOG_ERROR,
1269                        "The cu_qp_delta %d is outside the valid range "
1270                        "[%d, %d].\n",
1271                        lc->tu.cu_qp_delta,
1272                        -(26 + s->sps->qp_bd_offset / 2),
1273                         (25 + s->sps->qp_bd_offset / 2));
1274                 return AVERROR_INVALIDDATA;
1275             }
1276
1277             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1278         }
1279
1280         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1281             if (lc->tu.cur_intra_pred_mode >= 6 &&
1282                 lc->tu.cur_intra_pred_mode <= 14) {
1283                 scan_idx = SCAN_VERT;
1284             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1285                        lc->tu.cur_intra_pred_mode <= 30) {
1286                 scan_idx = SCAN_HORIZ;
1287             }
1288
1289             if (lc->pu.intra_pred_mode_c >=  6 &&
1290                 lc->pu.intra_pred_mode_c <= 14) {
1291                 scan_idx_c = SCAN_VERT;
1292             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1293                        lc->pu.intra_pred_mode_c <= 30) {
1294                 scan_idx_c = SCAN_HORIZ;
1295             }
1296         }
1297
1298         if (lc->tt.cbf_luma)
1299             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1300         if (log2_trafo_size > 2) {
1301             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1302                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1303             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1304                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1305         } else if (blk_idx == 3) {
1306             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1307                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1308             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1309                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1310         }
1311     }
1312     return 0;
1313 }
1314
1315 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1316 {
1317     int cb_size          = 1 << log2_cb_size;
1318     int log2_min_pu_size = s->sps->log2_min_pu_size;
1319
1320     int min_pu_width     = s->sps->min_pu_width;
1321     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1322     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1323     int i, j;
1324
1325     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1326         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1327             s->is_pcm[i + j * min_pu_width] = 2;
1328 }
1329
1330 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1331                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1332                               int log2_cb_size, int log2_trafo_size,
1333                               int trafo_depth, int blk_idx)
1334 {
1335     HEVCLocalContext *lc = &s->HEVClc;
1336     uint8_t split_transform_flag;
1337     int ret;
1338
1339     if (trafo_depth > 0 && log2_trafo_size == 2) {
1340         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1341             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1342         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1343             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1344     } else {
1345         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1346         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1347     }
1348
1349     if (lc->cu.intra_split_flag) {
1350         if (trafo_depth == 1)
1351             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1352     } else {
1353         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1354     }
1355
1356     lc->tt.cbf_luma = 1;
1357
1358     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1359                               lc->cu.pred_mode == MODE_INTER &&
1360                               lc->cu.part_mode != PART_2Nx2N &&
1361                               trafo_depth == 0;
1362
1363     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1364         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1365         trafo_depth     < lc->cu.max_trafo_depth       &&
1366         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1367         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1368     } else {
1369         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1370                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1371                                lc->tt.inter_split_flag;
1372     }
1373
1374     if (log2_trafo_size > 2) {
1375         if (trafo_depth == 0 ||
1376             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1377             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1378                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1379         }
1380
1381         if (trafo_depth == 0 ||
1382             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1383             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1384                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1385         }
1386     }
1387
1388     if (split_transform_flag) {
1389         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1390         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1391
1392         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1393                                  log2_cb_size, log2_trafo_size - 1,
1394                                  trafo_depth + 1, 0);
1395         if (ret < 0)
1396             return ret;
1397         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1398                                  log2_cb_size, log2_trafo_size - 1,
1399                                  trafo_depth + 1, 1);
1400         if (ret < 0)
1401             return ret;
1402         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1403                                  log2_cb_size, log2_trafo_size - 1,
1404                                  trafo_depth + 1, 2);
1405         if (ret < 0)
1406             return ret;
1407         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1408                                  log2_cb_size, log2_trafo_size - 1,
1409                                  trafo_depth + 1, 3);
1410         if (ret < 0)
1411             return ret;
1412     } else {
1413         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1414         int log2_min_tu_size = s->sps->log2_min_tb_size;
1415         int min_tu_width     = s->sps->min_tb_width;
1416
1417         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1418             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1419             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1420             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1421         }
1422
1423         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1424                                  log2_cb_size, log2_trafo_size, trafo_depth,
1425                                  blk_idx);
1426         if (ret < 0)
1427             return ret;
1428         // TODO: store cbf_luma somewhere else
1429         if (lc->tt.cbf_luma) {
1430             int i, j;
1431             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1432                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1433                     int x_tu = (x0 + j) >> log2_min_tu_size;
1434                     int y_tu = (y0 + i) >> log2_min_tu_size;
1435                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1436                 }
1437         }
1438         if (!s->sh.disable_deblocking_filter_flag) {
1439             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1440                                                   lc->slice_or_tiles_up_boundary,
1441                                                   lc->slice_or_tiles_left_boundary);
1442             if (s->pps->transquant_bypass_enable_flag &&
1443                 lc->cu.cu_transquant_bypass_flag)
1444                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1445         }
1446     }
1447     return 0;
1448 }
1449
1450 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1451 {
1452     //TODO: non-4:2:0 support
1453     HEVCLocalContext *lc = &s->HEVClc;
1454     GetBitContext gb;
1455     int cb_size   = 1 << log2_cb_size;
1456     int stride0   = s->frame->linesize[0];
1457     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1458     int   stride1 = s->frame->linesize[1];
1459     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1460     int   stride2 = s->frame->linesize[2];
1461     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1462
1463     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1464     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1465     int ret;
1466
1467     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1468                                           lc->slice_or_tiles_up_boundary,
1469                                           lc->slice_or_tiles_left_boundary);
1470
1471     ret = init_get_bits(&gb, pcm, length);
1472     if (ret < 0)
1473         return ret;
1474
1475     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1476     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1477     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1478     return 0;
1479 }
1480
1481 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1482 {
1483     HEVCLocalContext *lc = &s->HEVClc;
1484     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1485     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1486
1487     if (x)
1488         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1489     if (y)
1490         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1491
1492     switch (x) {
1493     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1494     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1495     case 0: lc->pu.mvd.x = 0;                               break;
1496     }
1497
1498     switch (y) {
1499     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1500     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1501     case 0: lc->pu.mvd.y = 0;                               break;
1502     }
1503 }
1504
1505 /**
1506  * 8.5.3.2.2.1 Luma sample interpolation process
1507  *
1508  * @param s HEVC decoding context
1509  * @param dst target buffer for block data at block position
1510  * @param dststride stride of the dst buffer
1511  * @param ref reference picture buffer at origin (0, 0)
1512  * @param mv motion vector (relative to block position) to get pixel data from
1513  * @param x_off horizontal position of block from origin (0, 0)
1514  * @param y_off vertical position of block from origin (0, 0)
1515  * @param block_w width of block
1516  * @param block_h height of block
1517  */
1518 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1519                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1520                     int block_w, int block_h)
1521 {
1522     HEVCLocalContext *lc = &s->HEVClc;
1523     uint8_t *src         = ref->data[0];
1524     ptrdiff_t srcstride  = ref->linesize[0];
1525     int pic_width        = s->sps->width;
1526     int pic_height       = s->sps->height;
1527
1528     int mx         = mv->x & 3;
1529     int my         = mv->y & 3;
1530     int extra_left = ff_hevc_qpel_extra_before[mx];
1531     int extra_top  = ff_hevc_qpel_extra_before[my];
1532
1533     x_off += mv->x >> 2;
1534     y_off += mv->y >> 2;
1535     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1536
1537     if (x_off < extra_left || y_off < extra_top ||
1538         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1539         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1540         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1541         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1542         int buf_offset = extra_top *
1543                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1544
1545         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1546                                  edge_emu_stride, srcstride,
1547                                  block_w + ff_hevc_qpel_extra[mx],
1548                                  block_h + ff_hevc_qpel_extra[my],
1549                                  x_off - extra_left, y_off - extra_top,
1550                                  pic_width, pic_height);
1551         src = lc->edge_emu_buffer + buf_offset;
1552         srcstride = edge_emu_stride;
1553     }
1554     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1555                                      block_h, lc->mc_buffer);
1556 }
1557
1558 /**
1559  * 8.5.3.2.2.2 Chroma sample interpolation process
1560  *
1561  * @param s HEVC decoding context
1562  * @param dst1 target buffer for block data at block position (U plane)
1563  * @param dst2 target buffer for block data at block position (V plane)
1564  * @param dststride stride of the dst1 and dst2 buffers
1565  * @param ref reference picture buffer at origin (0, 0)
1566  * @param mv motion vector (relative to block position) to get pixel data from
1567  * @param x_off horizontal position of block from origin (0, 0)
1568  * @param y_off vertical position of block from origin (0, 0)
1569  * @param block_w width of block
1570  * @param block_h height of block
1571  */
1572 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1573                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1574                       int x_off, int y_off, int block_w, int block_h)
1575 {
1576     HEVCLocalContext *lc = &s->HEVClc;
1577     uint8_t *src1        = ref->data[1];
1578     uint8_t *src2        = ref->data[2];
1579     ptrdiff_t src1stride = ref->linesize[1];
1580     ptrdiff_t src2stride = ref->linesize[2];
1581     int pic_width        = s->sps->width >> 1;
1582     int pic_height       = s->sps->height >> 1;
1583
1584     int mx = mv->x & 7;
1585     int my = mv->y & 7;
1586
1587     x_off += mv->x >> 3;
1588     y_off += mv->y >> 3;
1589     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1590     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1591
1592     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1593         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1594         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1595         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1596         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1597         int buf_offset1 = EPEL_EXTRA_BEFORE *
1598                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1599         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1600         int buf_offset2 = EPEL_EXTRA_BEFORE *
1601                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1602
1603         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1604                                  edge_emu_stride, src1stride,
1605                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1606                                  x_off - EPEL_EXTRA_BEFORE,
1607                                  y_off - EPEL_EXTRA_BEFORE,
1608                                  pic_width, pic_height);
1609
1610         src1 = lc->edge_emu_buffer + buf_offset1;
1611         src1stride = edge_emu_stride;
1612         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1613                                              block_w, block_h, mx, my, lc->mc_buffer);
1614
1615         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1616                                  edge_emu_stride, src2stride,
1617                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1618                                  x_off - EPEL_EXTRA_BEFORE,
1619                                  y_off - EPEL_EXTRA_BEFORE,
1620                                  pic_width, pic_height);
1621         src2 = lc->edge_emu_buffer + buf_offset2;
1622         src2stride = edge_emu_stride;
1623
1624         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1625                                              block_w, block_h, mx, my,
1626                                              lc->mc_buffer);
1627     } else {
1628         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1629                                              block_w, block_h, mx, my,
1630                                              lc->mc_buffer);
1631         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1632                                              block_w, block_h, mx, my,
1633                                              lc->mc_buffer);
1634     }
1635 }
1636
1637 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1638                                 const Mv *mv, int y0, int height)
1639 {
1640     int y = (mv->y >> 2) + y0 + height + 9;
1641     ff_thread_await_progress(&ref->tf, y, 0);
1642 }
1643
1644 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1645                                 int nPbW, int nPbH,
1646                                 int log2_cb_size, int partIdx)
1647 {
1648 #define POS(c_idx, x, y)                                                              \
1649     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1650                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1651     HEVCLocalContext *lc = &s->HEVClc;
1652     int merge_idx = 0;
1653     struct MvField current_mv = {{{ 0 }}};
1654
1655     int min_pu_width = s->sps->min_pu_width;
1656
1657     MvField *tab_mvf = s->ref->tab_mvf;
1658     RefPicList  *refPicList = s->ref->refPicList;
1659     HEVCFrame *ref0, *ref1;
1660
1661     int tmpstride = MAX_PB_SIZE;
1662
1663     uint8_t *dst0 = POS(0, x0, y0);
1664     uint8_t *dst1 = POS(1, x0, y0);
1665     uint8_t *dst2 = POS(2, x0, y0);
1666     int log2_min_cb_size = s->sps->log2_min_cb_size;
1667     int min_cb_width     = s->sps->min_cb_width;
1668     int x_cb             = x0 >> log2_min_cb_size;
1669     int y_cb             = y0 >> log2_min_cb_size;
1670     int ref_idx[2];
1671     int mvp_flag[2];
1672     int x_pu, y_pu;
1673     int i, j;
1674
1675     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1676         if (s->sh.max_num_merge_cand > 1)
1677             merge_idx = ff_hevc_merge_idx_decode(s);
1678         else
1679             merge_idx = 0;
1680
1681         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1682                                    1 << log2_cb_size,
1683                                    1 << log2_cb_size,
1684                                    log2_cb_size, partIdx,
1685                                    merge_idx, &current_mv);
1686         x_pu = x0 >> s->sps->log2_min_pu_size;
1687         y_pu = y0 >> s->sps->log2_min_pu_size;
1688
1689         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1690             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1691                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1692     } else { /* MODE_INTER */
1693         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1694         if (lc->pu.merge_flag) {
1695             if (s->sh.max_num_merge_cand > 1)
1696                 merge_idx = ff_hevc_merge_idx_decode(s);
1697             else
1698                 merge_idx = 0;
1699
1700             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1701                                        partIdx, merge_idx, &current_mv);
1702             x_pu = x0 >> s->sps->log2_min_pu_size;
1703             y_pu = y0 >> s->sps->log2_min_pu_size;
1704
1705             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1706                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1707                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1708         } else {
1709             enum InterPredIdc inter_pred_idc = PRED_L0;
1710             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1711             if (s->sh.slice_type == B_SLICE)
1712                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1713
1714             if (inter_pred_idc != PRED_L1) {
1715                 if (s->sh.nb_refs[L0]) {
1716                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1717                     current_mv.ref_idx[0] = ref_idx[0];
1718                 }
1719                 current_mv.pred_flag[0] = 1;
1720                 hls_mvd_coding(s, x0, y0, 0);
1721                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1722                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1723                                          partIdx, merge_idx, &current_mv,
1724                                          mvp_flag[0], 0);
1725                 current_mv.mv[0].x += lc->pu.mvd.x;
1726                 current_mv.mv[0].y += lc->pu.mvd.y;
1727             }
1728
1729             if (inter_pred_idc != PRED_L0) {
1730                 if (s->sh.nb_refs[L1]) {
1731                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1732                     current_mv.ref_idx[1] = ref_idx[1];
1733                 }
1734
1735                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1736                     lc->pu.mvd.x = 0;
1737                     lc->pu.mvd.y = 0;
1738                 } else {
1739                     hls_mvd_coding(s, x0, y0, 1);
1740                 }
1741
1742                 current_mv.pred_flag[1] = 1;
1743                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1744                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1745                                          partIdx, merge_idx, &current_mv,
1746                                          mvp_flag[1], 1);
1747                 current_mv.mv[1].x += lc->pu.mvd.x;
1748                 current_mv.mv[1].y += lc->pu.mvd.y;
1749             }
1750
1751             x_pu = x0 >> s->sps->log2_min_pu_size;
1752             y_pu = y0 >> s->sps->log2_min_pu_size;
1753
1754             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1755                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1756                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1757         }
1758     }
1759
1760     if (current_mv.pred_flag[0]) {
1761         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1762         if (!ref0)
1763             return;
1764         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1765     }
1766     if (current_mv.pred_flag[1]) {
1767         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1768         if (!ref1)
1769             return;
1770         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1771     }
1772
1773     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1774         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1775         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1776
1777         luma_mc(s, tmp, tmpstride, ref0->frame,
1778                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1779
1780         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1781             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1782             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1783                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1784                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1785                                      dst0, s->frame->linesize[0], tmp,
1786                                      tmpstride, nPbW, nPbH);
1787         } else {
1788             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1789         }
1790         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1791                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1792
1793         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1794             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1795             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1796                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1797                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1798                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1799                                      nPbW / 2, nPbH / 2);
1800             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1801                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1802                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1803                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1804                                      nPbW / 2, nPbH / 2);
1805         } else {
1806             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1807             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1808         }
1809     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1810         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1811         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1812
1813         if (!ref1)
1814             return;
1815
1816         luma_mc(s, tmp, tmpstride, ref1->frame,
1817                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1818
1819         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1820             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1821             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1822                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1823                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1824                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1825                                       nPbW, nPbH);
1826         } else {
1827             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1828         }
1829
1830         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1831                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1832
1833         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1834             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1835             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1836                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1837                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1838                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1839             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1840                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1841                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1842                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1843         } else {
1844             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1845             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1846         }
1847     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1848         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1849         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1850         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1851         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1852         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1853         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1854
1855         if (!ref0 || !ref1)
1856             return;
1857
1858         luma_mc(s, tmp, tmpstride, ref0->frame,
1859                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1860         luma_mc(s, tmp2, tmpstride, ref1->frame,
1861                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1862
1863         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1864             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1865             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1866                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1867                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1868                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1869                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1870                                          dst0, s->frame->linesize[0],
1871                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1872         } else {
1873             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1874                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1875         }
1876
1877         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1878                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1879         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1880                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1881
1882         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1883             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1884             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1885                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1886                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1887                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1888                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1889                                          dst1, s->frame->linesize[1], tmp, tmp3,
1890                                          tmpstride, nPbW / 2, nPbH / 2);
1891             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1892                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1893                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1894                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1895                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1896                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1897                                          tmpstride, nPbW / 2, nPbH / 2);
1898         } else {
1899             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1900             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1901         }
1902     }
1903 }
1904
1905 /**
1906  * 8.4.1
1907  */
1908 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1909                                 int prev_intra_luma_pred_flag)
1910 {
1911     HEVCLocalContext *lc = &s->HEVClc;
1912     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1913     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1914     int min_pu_width     = s->sps->min_pu_width;
1915     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1916     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1917     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1918
1919     int cand_up   = (lc->ctb_up_flag || y0b) ?
1920                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1921     int cand_left = (lc->ctb_left_flag || x0b) ?
1922                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1923
1924     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1925
1926     MvField *tab_mvf = s->ref->tab_mvf;
1927     int intra_pred_mode;
1928     int candidate[3];
1929     int i, j;
1930
1931     // intra_pred_mode prediction does not cross vertical CTB boundaries
1932     if ((y0 - 1) < y_ctb)
1933         cand_up = INTRA_DC;
1934
1935     if (cand_left == cand_up) {
1936         if (cand_left < 2) {
1937             candidate[0] = INTRA_PLANAR;
1938             candidate[1] = INTRA_DC;
1939             candidate[2] = INTRA_ANGULAR_26;
1940         } else {
1941             candidate[0] = cand_left;
1942             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1943             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1944         }
1945     } else {
1946         candidate[0] = cand_left;
1947         candidate[1] = cand_up;
1948         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1949             candidate[2] = INTRA_PLANAR;
1950         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1951             candidate[2] = INTRA_DC;
1952         } else {
1953             candidate[2] = INTRA_ANGULAR_26;
1954         }
1955     }
1956
1957     if (prev_intra_luma_pred_flag) {
1958         intra_pred_mode = candidate[lc->pu.mpm_idx];
1959     } else {
1960         if (candidate[0] > candidate[1])
1961             FFSWAP(uint8_t, candidate[0], candidate[1]);
1962         if (candidate[0] > candidate[2])
1963             FFSWAP(uint8_t, candidate[0], candidate[2]);
1964         if (candidate[1] > candidate[2])
1965             FFSWAP(uint8_t, candidate[1], candidate[2]);
1966
1967         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1968         for (i = 0; i < 3; i++)
1969             if (intra_pred_mode >= candidate[i])
1970                 intra_pred_mode++;
1971     }
1972
1973     /* write the intra prediction units into the mv array */
1974     if (!size_in_pus)
1975         size_in_pus = 1;
1976     for (i = 0; i < size_in_pus; i++) {
1977         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1978                intra_pred_mode, size_in_pus);
1979
1980         for (j = 0; j < size_in_pus; j++) {
1981             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1982             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1983             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1984             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1985             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1986             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1987             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1990         }
1991     }
1992
1993     return intra_pred_mode;
1994 }
1995
1996 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1997                                           int log2_cb_size, int ct_depth)
1998 {
1999     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2000     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2001     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2002     int y;
2003
2004     for (y = 0; y < length; y++)
2005         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2006                ct_depth, length);
2007 }
2008
2009 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2010                                   int log2_cb_size)
2011 {
2012     HEVCLocalContext *lc = &s->HEVClc;
2013     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2014     uint8_t prev_intra_luma_pred_flag[4];
2015     int split   = lc->cu.part_mode == PART_NxN;
2016     int pb_size = (1 << log2_cb_size) >> split;
2017     int side    = split + 1;
2018     int chroma_mode;
2019     int i, j;
2020
2021     for (i = 0; i < side; i++)
2022         for (j = 0; j < side; j++)
2023             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2024
2025     for (i = 0; i < side; i++) {
2026         for (j = 0; j < side; j++) {
2027             if (prev_intra_luma_pred_flag[2 * i + j])
2028                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2029             else
2030                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2031
2032             lc->pu.intra_pred_mode[2 * i + j] =
2033                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2034                                      prev_intra_luma_pred_flag[2 * i + j]);
2035         }
2036     }
2037
2038     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2039     if (chroma_mode != 4) {
2040         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2041             lc->pu.intra_pred_mode_c = 34;
2042         else
2043             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2044     } else {
2045         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2046     }
2047 }
2048
2049 static void intra_prediction_unit_default_value(HEVCContext *s,
2050                                                 int x0, int y0,
2051                                                 int log2_cb_size)
2052 {
2053     HEVCLocalContext *lc = &s->HEVClc;
2054     int pb_size          = 1 << log2_cb_size;
2055     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2056     int min_pu_width     = s->sps->min_pu_width;
2057     MvField *tab_mvf     = s->ref->tab_mvf;
2058     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2059     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2060     int j, k;
2061
2062     if (size_in_pus == 0)
2063         size_in_pus = 1;
2064     for (j = 0; j < size_in_pus; j++) {
2065         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2066         for (k = 0; k < size_in_pus; k++)
2067             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2068     }
2069 }
2070
2071 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2072 {
2073     int cb_size          = 1 << log2_cb_size;
2074     HEVCLocalContext *lc = &s->HEVClc;
2075     int log2_min_cb_size = s->sps->log2_min_cb_size;
2076     int length           = cb_size >> log2_min_cb_size;
2077     int min_cb_width     = s->sps->min_cb_width;
2078     int x_cb             = x0 >> log2_min_cb_size;
2079     int y_cb             = y0 >> log2_min_cb_size;
2080     int x, y, ret;
2081
2082     lc->cu.x                = x0;
2083     lc->cu.y                = y0;
2084     lc->cu.rqt_root_cbf     = 1;
2085     lc->cu.pred_mode        = MODE_INTRA;
2086     lc->cu.part_mode        = PART_2Nx2N;
2087     lc->cu.intra_split_flag = 0;
2088     lc->cu.pcm_flag         = 0;
2089
2090     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2091     for (x = 0; x < 4; x++)
2092         lc->pu.intra_pred_mode[x] = 1;
2093     if (s->pps->transquant_bypass_enable_flag) {
2094         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2095         if (lc->cu.cu_transquant_bypass_flag)
2096             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2097     } else
2098         lc->cu.cu_transquant_bypass_flag = 0;
2099
2100     if (s->sh.slice_type != I_SLICE) {
2101         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2102
2103         lc->cu.pred_mode = MODE_SKIP;
2104         x = y_cb * min_cb_width + x_cb;
2105         for (y = 0; y < length; y++) {
2106             memset(&s->skip_flag[x], skip_flag, length);
2107             x += min_cb_width;
2108         }
2109         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2110     }
2111
2112     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2113         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2114         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2115
2116         if (!s->sh.disable_deblocking_filter_flag)
2117             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2118                                                   lc->slice_or_tiles_up_boundary,
2119                                                   lc->slice_or_tiles_left_boundary);
2120     } else {
2121         if (s->sh.slice_type != I_SLICE)
2122             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2123         if (lc->cu.pred_mode != MODE_INTRA ||
2124             log2_cb_size == s->sps->log2_min_cb_size) {
2125             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2126             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2127                                       lc->cu.pred_mode == MODE_INTRA;
2128         }
2129
2130         if (lc->cu.pred_mode == MODE_INTRA) {
2131             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2132                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2133                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2134                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2135             }
2136             if (lc->cu.pcm_flag) {
2137                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2138                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2139                 if (s->sps->pcm.loop_filter_disable_flag)
2140                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2141
2142                 if (ret < 0)
2143                     return ret;
2144             } else {
2145                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2146             }
2147         } else {
2148             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2149             switch (lc->cu.part_mode) {
2150             case PART_2Nx2N:
2151                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2152                 break;
2153             case PART_2NxN:
2154                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2155                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2156                 break;
2157             case PART_Nx2N:
2158                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2159                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2160                 break;
2161             case PART_2NxnU:
2162                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2163                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2164                 break;
2165             case PART_2NxnD:
2166                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2167                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2168                 break;
2169             case PART_nLx2N:
2170                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2171                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2172                 break;
2173             case PART_nRx2N:
2174                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2175                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2176                 break;
2177             case PART_NxN:
2178                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2179                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2180                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2181                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2182                 break;
2183             }
2184         }
2185
2186         if (!lc->cu.pcm_flag) {
2187             if (lc->cu.pred_mode != MODE_INTRA &&
2188                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2189                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2190             }
2191             if (lc->cu.rqt_root_cbf) {
2192                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2193                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2194                                          s->sps->max_transform_hierarchy_depth_inter;
2195                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2196                                          log2_cb_size,
2197                                          log2_cb_size, 0, 0);
2198                 if (ret < 0)
2199                     return ret;
2200             } else {
2201                 if (!s->sh.disable_deblocking_filter_flag)
2202                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2203                                                           lc->slice_or_tiles_up_boundary,
2204                                                           lc->slice_or_tiles_left_boundary);
2205             }
2206         }
2207     }
2208
2209     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2210         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2211
2212     x = y_cb * min_cb_width + x_cb;
2213     for (y = 0; y < length; y++) {
2214         memset(&s->qp_y_tab[x], lc->qp_y, length);
2215         x += min_cb_width;
2216     }
2217
2218     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2219
2220     return 0;
2221 }
2222
2223 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2224                                int log2_cb_size, int cb_depth)
2225 {
2226     HEVCLocalContext *lc = &s->HEVClc;
2227     const int cb_size    = 1 << log2_cb_size;
2228
2229     lc->ct.depth = cb_depth;
2230     if (x0 + cb_size <= s->sps->width  &&
2231         y0 + cb_size <= s->sps->height &&
2232         log2_cb_size > s->sps->log2_min_cb_size) {
2233         SAMPLE(s->split_cu_flag, x0, y0) =
2234             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2235     } else {
2236         SAMPLE(s->split_cu_flag, x0, y0) =
2237             (log2_cb_size > s->sps->log2_min_cb_size);
2238     }
2239     if (s->pps->cu_qp_delta_enabled_flag &&
2240         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2241         lc->tu.is_cu_qp_delta_coded = 0;
2242         lc->tu.cu_qp_delta          = 0;
2243     }
2244
2245     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2246         const int cb_size_split = cb_size >> 1;
2247         const int x1 = x0 + cb_size_split;
2248         const int y1 = y0 + cb_size_split;
2249
2250         log2_cb_size--;
2251         cb_depth++;
2252
2253 #define SUBDIVIDE(x, y)                                                \
2254 do {                                                                   \
2255     if (x < s->sps->width && y < s->sps->height) {                     \
2256         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2257         if (ret < 0)                                                   \
2258             return ret;                                                \
2259     }                                                                  \
2260 } while (0)
2261
2262         SUBDIVIDE(x0, y0);
2263         SUBDIVIDE(x1, y0);
2264         SUBDIVIDE(x0, y1);
2265         SUBDIVIDE(x1, y1);
2266     } else {
2267         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2268         if (ret < 0)
2269             return ret;
2270     }
2271
2272     return 0;
2273 }
2274
2275 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2276                                  int ctb_addr_ts)
2277 {
2278     HEVCLocalContext *lc  = &s->HEVClc;
2279     int ctb_size          = 1 << s->sps->log2_ctb_size;
2280     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2281     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2282
2283     int tile_left_boundary, tile_up_boundary;
2284     int slice_left_boundary, slice_up_boundary;
2285
2286     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2287
2288     if (s->pps->entropy_coding_sync_enabled_flag) {
2289         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2290             lc->first_qp_group = 1;
2291         lc->end_of_tiles_x = s->sps->width;
2292     } else if (s->pps->tiles_enabled_flag) {
2293         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2294             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2295             lc->start_of_tiles_x = x_ctb;
2296             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2297             lc->first_qp_group   = 1;
2298         }
2299     } else {
2300         lc->end_of_tiles_x = s->sps->width;
2301     }
2302
2303     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2304
2305     if (s->pps->tiles_enabled_flag) {
2306         tile_left_boundary  = x_ctb > 0 &&
2307                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2308         slice_left_boundary = x_ctb > 0 &&
2309                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2310         tile_up_boundary  = y_ctb > 0 &&
2311                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2312         slice_up_boundary = y_ctb > 0 &&
2313                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2314     } else {
2315         tile_left_boundary  =
2316         tile_up_boundary    = 1;
2317         slice_left_boundary = ctb_addr_in_slice > 0;
2318         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2319     }
2320     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2321     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2322     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2323     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2324     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2325     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2326 }
2327
2328 static int hls_slice_data(HEVCContext *s)
2329 {
2330     int ctb_size    = 1 << s->sps->log2_ctb_size;
2331     int more_data   = 1;
2332     int x_ctb       = 0;
2333     int y_ctb       = 0;
2334     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2335     int ret;
2336
2337     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2338         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2339
2340         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2341         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2342         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2343
2344         ff_hevc_cabac_init(s, ctb_addr_ts);
2345
2346         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2347
2348         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2349         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2350         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2351
2352         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2353         if (ret < 0)
2354             return ret;
2355         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2356
2357         ctb_addr_ts++;
2358         ff_hevc_save_states(s, ctb_addr_ts);
2359         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2360     }
2361
2362     if (x_ctb + ctb_size >= s->sps->width &&
2363         y_ctb + ctb_size >= s->sps->height)
2364         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2365
2366     return ctb_addr_ts;
2367 }
2368
2369 /**
2370  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2371  * 0 if the unit should be skipped, 1 otherwise
2372  */
2373 static int hls_nal_unit(HEVCContext *s)
2374 {
2375     GetBitContext *gb = &s->HEVClc.gb;
2376     int nuh_layer_id;
2377
2378     if (get_bits1(gb) != 0)
2379         return AVERROR_INVALIDDATA;
2380
2381     s->nal_unit_type = get_bits(gb, 6);
2382
2383     nuh_layer_id   = get_bits(gb, 6);
2384     s->temporal_id = get_bits(gb, 3) - 1;
2385     if (s->temporal_id < 0)
2386         return AVERROR_INVALIDDATA;
2387
2388     av_log(s->avctx, AV_LOG_DEBUG,
2389            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2390            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2391
2392     return nuh_layer_id == 0;
2393 }
2394
2395 static void restore_tqb_pixels(HEVCContext *s)
2396 {
2397     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2398     int x, y, c_idx;
2399
2400     for (c_idx = 0; c_idx < 3; c_idx++) {
2401         ptrdiff_t stride = s->frame->linesize[c_idx];
2402         int hshift       = s->sps->hshift[c_idx];
2403         int vshift       = s->sps->vshift[c_idx];
2404         for (y = 0; y < s->sps->min_pu_height; y++) {
2405             for (x = 0; x < s->sps->min_pu_width; x++) {
2406                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2407                     int n;
2408                     int len      = min_pu_size >> hshift;
2409                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2410                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2411                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2412                         memcpy(dst, src, len);
2413                         src += stride;
2414                         dst += stride;
2415                     }
2416                 }
2417             }
2418         }
2419     }
2420 }
2421
2422 static int set_side_data(HEVCContext *s)
2423 {
2424     AVFrame *out = s->ref->frame;
2425
2426     if (s->sei_frame_packing_present &&
2427         s->frame_packing_arrangement_type >= 3 &&
2428         s->frame_packing_arrangement_type <= 5 &&
2429         s->content_interpretation_type > 0 &&
2430         s->content_interpretation_type < 3) {
2431         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2432         if (!stereo)
2433             return AVERROR(ENOMEM);
2434
2435         switch (s->frame_packing_arrangement_type) {
2436         case 3:
2437             if (s->quincunx_subsampling)
2438                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2439             else
2440                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2441             break;
2442         case 4:
2443             stereo->type = AV_STEREO3D_TOPBOTTOM;
2444             break;
2445         case 5:
2446             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2447             break;
2448         }
2449
2450         if (s->content_interpretation_type == 2)
2451             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2452     }
2453
2454     return 0;
2455 }
2456
2457 static int hevc_frame_start(HEVCContext *s)
2458 {
2459     HEVCLocalContext *lc = &s->HEVClc;
2460     int ret;
2461
2462     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2463     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2464     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2465     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2466
2467     lc->start_of_tiles_x = 0;
2468     s->is_decoded        = 0;
2469
2470     if (s->pps->tiles_enabled_flag)
2471         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2472
2473     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2474                               s->poc);
2475     if (ret < 0)
2476         goto fail;
2477
2478     ret = ff_hevc_frame_rps(s);
2479     if (ret < 0) {
2480         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2481         goto fail;
2482     }
2483
2484     ret = set_side_data(s);
2485     if (ret < 0)
2486         goto fail;
2487
2488     av_frame_unref(s->output_frame);
2489     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2490     if (ret < 0)
2491         goto fail;
2492
2493     ff_thread_finish_setup(s->avctx);
2494
2495     return 0;
2496
2497 fail:
2498     if (s->ref)
2499         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2500     s->ref = NULL;
2501     return ret;
2502 }
2503
2504 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2505 {
2506     HEVCLocalContext *lc = &s->HEVClc;
2507     GetBitContext *gb    = &lc->gb;
2508     int ctb_addr_ts, ret;
2509
2510     ret = init_get_bits8(gb, nal, length);
2511     if (ret < 0)
2512         return ret;
2513
2514     ret = hls_nal_unit(s);
2515     if (ret < 0) {
2516         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2517                s->nal_unit_type);
2518         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2519             return ret;
2520         return 0;
2521     } else if (!ret)
2522         return 0;
2523
2524     switch (s->nal_unit_type) {
2525     case NAL_VPS:
2526         ret = ff_hevc_decode_nal_vps(s);
2527         if (ret < 0)
2528             return ret;
2529         break;
2530     case NAL_SPS:
2531         ret = ff_hevc_decode_nal_sps(s);
2532         if (ret < 0)
2533             return ret;
2534         break;
2535     case NAL_PPS:
2536         ret = ff_hevc_decode_nal_pps(s);
2537         if (ret < 0)
2538             return ret;
2539         break;
2540     case NAL_SEI_PREFIX:
2541     case NAL_SEI_SUFFIX:
2542         ret = ff_hevc_decode_nal_sei(s);
2543         if (ret < 0)
2544             return ret;
2545         break;
2546     case NAL_TRAIL_R:
2547     case NAL_TRAIL_N:
2548     case NAL_TSA_N:
2549     case NAL_TSA_R:
2550     case NAL_STSA_N:
2551     case NAL_STSA_R:
2552     case NAL_BLA_W_LP:
2553     case NAL_BLA_W_RADL:
2554     case NAL_BLA_N_LP:
2555     case NAL_IDR_W_RADL:
2556     case NAL_IDR_N_LP:
2557     case NAL_CRA_NUT:
2558     case NAL_RADL_N:
2559     case NAL_RADL_R:
2560     case NAL_RASL_N:
2561     case NAL_RASL_R:
2562         ret = hls_slice_header(s);
2563         if (ret < 0)
2564             return ret;
2565
2566         if (s->max_ra == INT_MAX) {
2567             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2568                 s->max_ra = s->poc;
2569             } else {
2570                 if (IS_IDR(s))
2571                     s->max_ra = INT_MIN;
2572             }
2573         }
2574
2575         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2576             s->poc <= s->max_ra) {
2577             s->is_decoded = 0;
2578             break;
2579         } else {
2580             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2581                 s->max_ra = INT_MIN;
2582         }
2583
2584         if (s->sh.first_slice_in_pic_flag) {
2585             ret = hevc_frame_start(s);
2586             if (ret < 0)
2587                 return ret;
2588         } else if (!s->ref) {
2589             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2590             return AVERROR_INVALIDDATA;
2591         }
2592
2593         if (!s->sh.dependent_slice_segment_flag &&
2594             s->sh.slice_type != I_SLICE) {
2595             ret = ff_hevc_slice_rpl(s);
2596             if (ret < 0) {
2597                 av_log(s->avctx, AV_LOG_WARNING,
2598                        "Error constructing the reference lists for the current slice.\n");
2599                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2600                     return ret;
2601             }
2602         }
2603
2604         ctb_addr_ts = hls_slice_data(s);
2605         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2606             s->is_decoded = 1;
2607             if ((s->pps->transquant_bypass_enable_flag ||
2608                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2609                 s->sps->sao_enabled)
2610                 restore_tqb_pixels(s);
2611         }
2612
2613         if (ctb_addr_ts < 0)
2614             return ctb_addr_ts;
2615         break;
2616     case NAL_EOS_NUT:
2617     case NAL_EOB_NUT:
2618         s->seq_decode = (s->seq_decode + 1) & 0xff;
2619         s->max_ra     = INT_MAX;
2620         break;
2621     case NAL_AUD:
2622     case NAL_FD_NUT:
2623         break;
2624     default:
2625         av_log(s->avctx, AV_LOG_INFO,
2626                "Skipping NAL unit %d\n", s->nal_unit_type);
2627     }
2628
2629     return 0;
2630 }
2631
2632 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2633  * between these functions would be nice. */
2634 static int extract_rbsp(const uint8_t *src, int length,
2635                         HEVCNAL *nal)
2636 {
2637     int i, si, di;
2638     uint8_t *dst;
2639
2640 #define STARTCODE_TEST                                                  \
2641         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2642             if (src[i + 2] != 3) {                                      \
2643                 /* startcode, so we must be past the end */             \
2644                 length = i;                                             \
2645             }                                                           \
2646             break;                                                      \
2647         }
2648 #if HAVE_FAST_UNALIGNED
2649 #define FIND_FIRST_ZERO                                                 \
2650         if (i > 0 && !src[i])                                           \
2651             i--;                                                        \
2652         while (src[i])                                                  \
2653             i++
2654 #if HAVE_FAST_64BIT
2655     for (i = 0; i + 1 < length; i += 9) {
2656         if (!((~AV_RN64A(src + i) &
2657                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2658               0x8000800080008080ULL))
2659             continue;
2660         FIND_FIRST_ZERO;
2661         STARTCODE_TEST;
2662         i -= 7;
2663     }
2664 #else
2665     for (i = 0; i + 1 < length; i += 5) {
2666         if (!((~AV_RN32A(src + i) &
2667                (AV_RN32A(src + i) - 0x01000101U)) &
2668               0x80008080U))
2669             continue;
2670         FIND_FIRST_ZERO;
2671         STARTCODE_TEST;
2672         i -= 3;
2673     }
2674 #endif /* HAVE_FAST_64BIT */
2675 #else
2676     for (i = 0; i + 1 < length; i += 2) {
2677         if (src[i])
2678             continue;
2679         if (i > 0 && src[i - 1] == 0)
2680             i--;
2681         STARTCODE_TEST;
2682     }
2683 #endif /* HAVE_FAST_UNALIGNED */
2684
2685     if (i >= length - 1) { // no escaped 0
2686         nal->data = src;
2687         nal->size = length;
2688         return length;
2689     }
2690
2691     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2692                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2693     if (!nal->rbsp_buffer)
2694         return AVERROR(ENOMEM);
2695
2696     dst = nal->rbsp_buffer;
2697
2698     memcpy(dst, src, i);
2699     si = di = i;
2700     while (si + 2 < length) {
2701         // remove escapes (very rare 1:2^22)
2702         if (src[si + 2] > 3) {
2703             dst[di++] = src[si++];
2704             dst[di++] = src[si++];
2705         } else if (src[si] == 0 && src[si + 1] == 0) {
2706             if (src[si + 2] == 3) { // escape
2707                 dst[di++] = 0;
2708                 dst[di++] = 0;
2709                 si       += 3;
2710
2711                 continue;
2712             } else // next start code
2713                 goto nsc;
2714         }
2715
2716         dst[di++] = src[si++];
2717     }
2718     while (si < length)
2719         dst[di++] = src[si++];
2720
2721 nsc:
2722     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2723
2724     nal->data = dst;
2725     nal->size = di;
2726     return si;
2727 }
2728
2729 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2730 {
2731     int i, consumed, ret = 0;
2732
2733     s->ref = NULL;
2734     s->eos = 0;
2735
2736     /* split the input packet into NAL units, so we know the upper bound on the
2737      * number of slices in the frame */
2738     s->nb_nals = 0;
2739     while (length >= 4) {
2740         HEVCNAL *nal;
2741         int extract_length = 0;
2742
2743         if (s->is_nalff) {
2744             int i;
2745             for (i = 0; i < s->nal_length_size; i++)
2746                 extract_length = (extract_length << 8) | buf[i];
2747             buf    += s->nal_length_size;
2748             length -= s->nal_length_size;
2749
2750             if (extract_length > length) {
2751                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2752                 ret = AVERROR_INVALIDDATA;
2753                 goto fail;
2754             }
2755         } else {
2756             if (buf[2] == 0) {
2757                 length--;
2758                 buf++;
2759                 continue;
2760             }
2761             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2762                 ret = AVERROR_INVALIDDATA;
2763                 goto fail;
2764             }
2765
2766             buf           += 3;
2767             length        -= 3;
2768             extract_length = length;
2769         }
2770
2771         if (s->nals_allocated < s->nb_nals + 1) {
2772             int new_size = s->nals_allocated + 1;
2773             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2774             if (!tmp) {
2775                 ret = AVERROR(ENOMEM);
2776                 goto fail;
2777             }
2778             s->nals = tmp;
2779             memset(s->nals + s->nals_allocated, 0,
2780                    (new_size - s->nals_allocated) * sizeof(*tmp));
2781             s->nals_allocated = new_size;
2782         }
2783         nal = &s->nals[s->nb_nals++];
2784
2785         consumed = extract_rbsp(buf, extract_length, nal);
2786         if (consumed < 0) {
2787             ret = consumed;
2788             goto fail;
2789         }
2790
2791         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2792         if (ret < 0)
2793             goto fail;
2794         hls_nal_unit(s);
2795
2796         if (s->nal_unit_type == NAL_EOB_NUT ||
2797             s->nal_unit_type == NAL_EOS_NUT)
2798             s->eos = 1;
2799
2800         buf    += consumed;
2801         length -= consumed;
2802     }
2803
2804     /* parse the NAL units */
2805     for (i = 0; i < s->nb_nals; i++) {
2806         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2807         if (ret < 0) {
2808             av_log(s->avctx, AV_LOG_WARNING,
2809                    "Error parsing NAL unit #%d.\n", i);
2810             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2811                 goto fail;
2812         }
2813     }
2814
2815 fail:
2816     if (s->ref)
2817         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2818
2819     return ret;
2820 }
2821
2822 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2823 {
2824     int i;
2825     for (i = 0; i < 16; i++)
2826         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2827 }
2828
2829 static int verify_md5(HEVCContext *s, AVFrame *frame)
2830 {
2831     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2832     int pixel_shift;
2833     int i, j;
2834
2835     if (!desc)
2836         return AVERROR(EINVAL);
2837
2838     pixel_shift = desc->comp[0].depth_minus1 > 7;
2839
2840     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2841            s->poc);
2842
2843     /* the checksums are LE, so we have to byteswap for >8bpp formats
2844      * on BE arches */
2845 #if HAVE_BIGENDIAN
2846     if (pixel_shift && !s->checksum_buf) {
2847         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2848                        FFMAX3(frame->linesize[0], frame->linesize[1],
2849                               frame->linesize[2]));
2850         if (!s->checksum_buf)
2851             return AVERROR(ENOMEM);
2852     }
2853 #endif
2854
2855     for (i = 0; frame->data[i]; i++) {
2856         int width  = s->avctx->coded_width;
2857         int height = s->avctx->coded_height;
2858         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2859         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2860         uint8_t md5[16];
2861
2862         av_md5_init(s->md5_ctx);
2863         for (j = 0; j < h; j++) {
2864             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2865 #if HAVE_BIGENDIAN
2866             if (pixel_shift) {
2867                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2868                                    (const uint16_t*)src, w);
2869                 src = s->checksum_buf;
2870             }
2871 #endif
2872             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2873         }
2874         av_md5_final(s->md5_ctx, md5);
2875
2876         if (!memcmp(md5, s->md5[i], 16)) {
2877             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2878             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2879             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2880         } else {
2881             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2882             print_md5(s->avctx, AV_LOG_ERROR, md5);
2883             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2884             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2885             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2886             return AVERROR_INVALIDDATA;
2887         }
2888     }
2889
2890     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2891
2892     return 0;
2893 }
2894
2895 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2896                              AVPacket *avpkt)
2897 {
2898     int ret;
2899     HEVCContext *s = avctx->priv_data;
2900
2901     if (!avpkt->size) {
2902         ret = ff_hevc_output_frame(s, data, 1);
2903         if (ret < 0)
2904             return ret;
2905
2906         *got_output = ret;
2907         return 0;
2908     }
2909
2910     s->ref = NULL;
2911     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2912     if (ret < 0)
2913         return ret;
2914
2915     /* verify the SEI checksum */
2916     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2917         s->is_md5) {
2918         ret = verify_md5(s, s->ref->frame);
2919         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2920             ff_hevc_unref_frame(s, s->ref, ~0);
2921             return ret;
2922         }
2923     }
2924     s->is_md5 = 0;
2925
2926     if (s->is_decoded) {
2927         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2928         s->is_decoded = 0;
2929     }
2930
2931     if (s->output_frame->buf[0]) {
2932         av_frame_move_ref(data, s->output_frame);
2933         *got_output = 1;
2934     }
2935
2936     return avpkt->size;
2937 }
2938
2939 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2940 {
2941     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2942     if (ret < 0)
2943         return ret;
2944
2945     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2946     if (!dst->tab_mvf_buf)
2947         goto fail;
2948     dst->tab_mvf = src->tab_mvf;
2949
2950     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2951     if (!dst->rpl_tab_buf)
2952         goto fail;
2953     dst->rpl_tab = src->rpl_tab;
2954
2955     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2956     if (!dst->rpl_buf)
2957         goto fail;
2958
2959     dst->poc        = src->poc;
2960     dst->ctb_count  = src->ctb_count;
2961     dst->window     = src->window;
2962     dst->flags      = src->flags;
2963     dst->sequence   = src->sequence;
2964
2965     return 0;
2966 fail:
2967     ff_hevc_unref_frame(s, dst, ~0);
2968     return AVERROR(ENOMEM);
2969 }
2970
2971 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2972 {
2973     HEVCContext       *s = avctx->priv_data;
2974     int i;
2975
2976     pic_arrays_free(s);
2977
2978     av_freep(&s->md5_ctx);
2979
2980     av_frame_free(&s->tmp_frame);
2981     av_frame_free(&s->output_frame);
2982
2983     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2984         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2985         av_frame_free(&s->DPB[i].frame);
2986     }
2987
2988     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2989         av_buffer_unref(&s->vps_list[i]);
2990     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2991         av_buffer_unref(&s->sps_list[i]);
2992     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2993         av_buffer_unref(&s->pps_list[i]);
2994
2995     for (i = 0; i < s->nals_allocated; i++)
2996         av_freep(&s->nals[i].rbsp_buffer);
2997     av_freep(&s->nals);
2998     s->nals_allocated = 0;
2999
3000     return 0;
3001 }
3002
3003 static av_cold int hevc_init_context(AVCodecContext *avctx)
3004 {
3005     HEVCContext *s = avctx->priv_data;
3006     int i;
3007
3008     s->avctx = avctx;
3009
3010     s->tmp_frame = av_frame_alloc();
3011     if (!s->tmp_frame)
3012         goto fail;
3013
3014     s->output_frame = av_frame_alloc();
3015     if (!s->output_frame)
3016         goto fail;
3017
3018     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3019         s->DPB[i].frame = av_frame_alloc();
3020         if (!s->DPB[i].frame)
3021             goto fail;
3022         s->DPB[i].tf.f = s->DPB[i].frame;
3023     }
3024
3025     s->max_ra = INT_MAX;
3026
3027     s->md5_ctx = av_md5_alloc();
3028     if (!s->md5_ctx)
3029         goto fail;
3030
3031     ff_dsputil_init(&s->dsp, avctx);
3032
3033     s->context_initialized = 1;
3034
3035     return 0;
3036
3037 fail:
3038     hevc_decode_free(avctx);
3039     return AVERROR(ENOMEM);
3040 }
3041
3042 static int hevc_update_thread_context(AVCodecContext *dst,
3043                                       const AVCodecContext *src)
3044 {
3045     HEVCContext *s  = dst->priv_data;
3046     HEVCContext *s0 = src->priv_data;
3047     int i, ret;
3048
3049     if (!s->context_initialized) {
3050         ret = hevc_init_context(dst);
3051         if (ret < 0)
3052             return ret;
3053     }
3054
3055     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3056         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3057         if (s0->DPB[i].frame->buf[0]) {
3058             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3059             if (ret < 0)
3060                 return ret;
3061         }
3062     }
3063
3064     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3065         av_buffer_unref(&s->vps_list[i]);
3066         if (s0->vps_list[i]) {
3067             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3068             if (!s->vps_list[i])
3069                 return AVERROR(ENOMEM);
3070         }
3071     }
3072
3073     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3074         av_buffer_unref(&s->sps_list[i]);
3075         if (s0->sps_list[i]) {
3076             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3077             if (!s->sps_list[i])
3078                 return AVERROR(ENOMEM);
3079         }
3080     }
3081
3082     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3083         av_buffer_unref(&s->pps_list[i]);
3084         if (s0->pps_list[i]) {
3085             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3086             if (!s->pps_list[i])
3087                 return AVERROR(ENOMEM);
3088         }
3089     }
3090
3091     if (s->sps != s0->sps)
3092         ret = set_sps(s, s0->sps);
3093
3094     s->seq_decode = s0->seq_decode;
3095     s->seq_output = s0->seq_output;
3096     s->pocTid0    = s0->pocTid0;
3097     s->max_ra     = s0->max_ra;
3098
3099     s->is_nalff        = s0->is_nalff;
3100     s->nal_length_size = s0->nal_length_size;
3101
3102     if (s0->eos) {
3103         s->seq_decode = (s->seq_decode + 1) & 0xff;
3104         s->max_ra = INT_MAX;
3105     }
3106
3107     return 0;
3108 }
3109
3110 static int hevc_decode_extradata(HEVCContext *s)
3111 {
3112     AVCodecContext *avctx = s->avctx;
3113     GetByteContext gb;
3114     int ret;
3115
3116     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3117
3118     if (avctx->extradata_size > 3 &&
3119         (avctx->extradata[0] || avctx->extradata[1] ||
3120          avctx->extradata[2] > 1)) {
3121         /* It seems the extradata is encoded as hvcC format.
3122          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3123          * is finalized. When finalized, configurationVersion will be 1 and we
3124          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3125         int i, j, num_arrays, nal_len_size;
3126
3127         s->is_nalff = 1;
3128
3129         bytestream2_skip(&gb, 21);
3130         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3131         num_arrays   = bytestream2_get_byte(&gb);
3132
3133         /* nal units in the hvcC always have length coded with 2 bytes,
3134          * so put a fake nal_length_size = 2 while parsing them */
3135         s->nal_length_size = 2;
3136
3137         /* Decode nal units from hvcC. */
3138         for (i = 0; i < num_arrays; i++) {
3139             int type = bytestream2_get_byte(&gb) & 0x3f;
3140             int cnt  = bytestream2_get_be16(&gb);
3141
3142             for (j = 0; j < cnt; j++) {
3143                 // +2 for the nal size field
3144                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3145                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3146                     av_log(s->avctx, AV_LOG_ERROR,
3147                            "Invalid NAL unit size in extradata.\n");
3148                     return AVERROR_INVALIDDATA;
3149                 }
3150
3151                 ret = decode_nal_units(s, gb.buffer, nalsize);
3152                 if (ret < 0) {
3153                     av_log(avctx, AV_LOG_ERROR,
3154                            "Decoding nal unit %d %d from hvcC failed\n",
3155                            type, i);
3156                     return ret;
3157                 }
3158                 bytestream2_skip(&gb, nalsize);
3159             }
3160         }
3161
3162         /* Now store right nal length size, that will be used to parse
3163          * all other nals */
3164         s->nal_length_size = nal_len_size;
3165     } else {
3166         s->is_nalff = 0;
3167         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3168         if (ret < 0)
3169             return ret;
3170     }
3171     return 0;
3172 }
3173
3174 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3175 {
3176     HEVCContext *s = avctx->priv_data;
3177     int ret;
3178
3179     ff_init_cabac_states();
3180
3181     avctx->internal->allocate_progress = 1;
3182
3183     ret = hevc_init_context(avctx);
3184     if (ret < 0)
3185         return ret;
3186
3187     if (avctx->extradata_size > 0 && avctx->extradata) {
3188         ret = hevc_decode_extradata(s);
3189         if (ret < 0) {
3190             hevc_decode_free(avctx);
3191             return ret;
3192         }
3193     }
3194
3195     return 0;
3196 }
3197
3198 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3199 {
3200     HEVCContext *s = avctx->priv_data;
3201     int ret;
3202
3203     memset(s, 0, sizeof(*s));
3204
3205     ret = hevc_init_context(avctx);
3206     if (ret < 0)
3207         return ret;
3208
3209     return 0;
3210 }
3211
3212 static void hevc_decode_flush(AVCodecContext *avctx)
3213 {
3214     HEVCContext *s = avctx->priv_data;
3215     ff_hevc_flush_dpb(s);
3216     s->max_ra = INT_MAX;
3217 }
3218
3219 #define OFFSET(x) offsetof(HEVCContext, x)
3220 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3221
3222 static const AVProfile profiles[] = {
3223     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3224     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3225     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3226     { FF_PROFILE_UNKNOWN },
3227 };
3228
3229 static const AVOption options[] = {
3230     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3231         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3232     { NULL },
3233 };
3234
3235 static const AVClass hevc_decoder_class = {
3236     .class_name = "HEVC decoder",
3237     .item_name  = av_default_item_name,
3238     .option     = options,
3239     .version    = LIBAVUTIL_VERSION_INT,
3240 };
3241
3242 AVCodec ff_hevc_decoder = {
3243     .name                  = "hevc",
3244     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3245     .type                  = AVMEDIA_TYPE_VIDEO,
3246     .id                    = AV_CODEC_ID_HEVC,
3247     .priv_data_size        = sizeof(HEVCContext),
3248     .priv_class            = &hevc_decoder_class,
3249     .init                  = hevc_decode_init,
3250     .close                 = hevc_decode_free,
3251     .decode                = hevc_decode_frame,
3252     .flush                 = hevc_decode_flush,
3253     .update_thread_context = hevc_update_thread_context,
3254     .init_thread_copy      = hevc_init_thread_copy,
3255     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3256                              CODEC_CAP_FRAME_THREADS,
3257     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3258 };