git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  86     0, 0, 1, 0,
  87     1, 2, 0, 1,
  88     2, 3, 1, 2,
  89     3, 2, 3, 3,
  90 };
  91
  92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  93     0, 1, 0, 2,
  94     1, 0, 3, 2,
  95     1, 0, 3, 2,
  96     1, 3, 2, 3,
  97 };
  98
  99 static const uint8_t diag_scan4x4_inv[4][4] = {
 100     { 0,  2,  5,  9, },
 101     { 1,  4,  8, 12, },
 102     { 3,  7, 11, 14, },
 103     { 6, 10, 13, 15, },
 104 };
 105
 106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 107     0, 0, 1, 0,
 108     1, 2, 0, 1,
 109     2, 3, 0, 1,
 110     2, 3, 4, 0,
 111     1, 2, 3, 4,
 112     5, 0, 1, 2,
 113     3, 4, 5, 6,
 114     0, 1, 2, 3,
 115     4, 5, 6, 7,
 116     1, 2, 3, 4,
 117     5, 6, 7, 2,
 118     3, 4, 5, 6,
 119     7, 3, 4, 5,
 120     6, 7, 4, 5,
 121     6, 7, 5, 6,
 122     7, 6, 7, 7,
 123 };
 124
 125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 126     0, 1, 0, 2,
 127     1, 0, 3, 2,
 128     1, 0, 4, 3,
 129     2, 1, 0, 5,
 130     4, 3, 2, 1,
 131     0, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 0,
 135     7, 6, 5, 4,
 136     3, 2, 1, 7,
 137     6, 5, 4, 3,
 138     2, 7, 6, 5,
 139     4, 3, 7, 6,
 140     5, 4, 7, 6,
 141     5, 7, 6, 7,
 142 };
 143
 144 static const uint8_t diag_scan8x8_inv[8][8] = {
 145     {  0,  2,  5,  9, 14, 20, 27, 35, },
 146     {  1,  4,  8, 13, 19, 26, 34, 42, },
 147     {  3,  7, 12, 18, 25, 33, 41, 48, },
 148     {  6, 11, 17, 24, 32, 40, 47, 53, },
 149     { 10, 16, 23, 31, 39, 46, 52, 57, },
 150     { 15, 22, 30, 38, 45, 51, 56, 60, },
 151     { 21, 29, 37, 44, 50, 55, 59, 62, },
 152     { 28, 36, 43, 49, 54, 58, 61, 63, },
 153 };
 154
 155 /**
 156  * NOTE: Each function hls_foo correspond to the function foo in the
 157  * specification (HLS stands for High Level Syntax).
 158  */
 159
 160 /**
 161  * Section 5.7
 162  */
 163
 164 /* free everything allocated  by pic_arrays_init() */
 165 static void pic_arrays_free(HEVCContext *s)
 166 {
 167     av_freep(&s->sao);
 168     av_freep(&s->deblock);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     if (!s->sao || !s->deblock)
 205         goto fail;
 206
 207     s->skip_flag    = av_malloc(pic_size_in_ctb);
 208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 209     if (!s->skip_flag || !s->tab_ct_depth)
 210         goto fail;
 211
 212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 213     s->tab_ipm  = av_mallocz(min_pu_size);
 214     s->is_pcm   = av_malloc(min_pu_size);
 215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 216         goto fail;
 217
 218     s->filter_slice_edges = av_malloc(ctb_count);
 219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 220                                       sizeof(*s->tab_slice_address));
 221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->qp_y_tab));
 223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 224         goto fail;
 225
 226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     if (!s->horizontal_bs || !s->vertical_bs)
 229         goto fail;
 230
 231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 232                                           av_buffer_alloc);
 233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 234                                           av_buffer_allocz);
 235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 236         goto fail;
 237
 238     return 0;
 239
 240 fail:
 241     pic_arrays_free(s);
 242     return AVERROR(ENOMEM);
 243 }
 244
 245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 246 {
 247     int i = 0;
 248     int j = 0;
 249     uint8_t luma_weight_l0_flag[16];
 250     uint8_t chroma_weight_l0_flag[16];
 251     uint8_t luma_weight_l1_flag[16];
 252     uint8_t chroma_weight_l1_flag[16];
 253
 254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 255     if (s->sps->chroma_format_idc != 0) {
 256         int delta = get_se_golomb(gb);
 257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 258     }
 259
 260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 261         luma_weight_l0_flag[i] = get_bits1(gb);
 262         if (!luma_weight_l0_flag[i]) {
 263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 264             s->sh.luma_offset_l0[i] = 0;
 265         }
 266     }
 267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 269             chroma_weight_l0_flag[i] = get_bits1(gb);
 270     } else {
 271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 272             chroma_weight_l0_flag[i] = 0;
 273     }
 274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 275         if (luma_weight_l0_flag[i]) {
 276             int delta_luma_weight_l0 = get_se_golomb(gb);
 277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 279         }
 280         if (chroma_weight_l0_flag[i]) {
 281             for (j = 0; j < 2; j++) {
 282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 287             }
 288         } else {
 289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 290             s->sh.chroma_offset_l0[i][0] = 0;
 291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][1] = 0;
 293         }
 294     }
 295     if (s->sh.slice_type == B_SLICE) {
 296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 297             luma_weight_l1_flag[i] = get_bits1(gb);
 298             if (!luma_weight_l1_flag[i]) {
 299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 300                 s->sh.luma_offset_l1[i] = 0;
 301             }
 302         }
 303         if (s->sps->chroma_format_idc != 0) {
 304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 305                 chroma_weight_l1_flag[i] = get_bits1(gb);
 306         } else {
 307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 308                 chroma_weight_l1_flag[i] = 0;
 309         }
 310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 311             if (luma_weight_l1_flag[i]) {
 312                 int delta_luma_weight_l1 = get_se_golomb(gb);
 313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 315             }
 316             if (chroma_weight_l1_flag[i]) {
 317                 for (j = 0; j < 2; j++) {
 318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 323                 }
 324             } else {
 325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 326                 s->sh.chroma_offset_l1[i][0] = 0;
 327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][1] = 0;
 329             }
 330         }
 331     }
 332 }
 333
 334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 335 {
 336     const HEVCSPS *sps = s->sps;
 337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 338     int prev_delta_msb = 0;
 339     unsigned int nb_sps = 0, nb_sh;
 340     int i;
 341
 342     rps->nb_refs = 0;
 343     if (!sps->long_term_ref_pics_present_flag)
 344         return 0;
 345
 346     if (sps->num_long_term_ref_pics_sps > 0)
 347         nb_sps = get_ue_golomb_long(gb);
 348     nb_sh = get_ue_golomb_long(gb);
 349
 350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 351         return AVERROR_INVALIDDATA;
 352
 353     rps->nb_refs = nb_sh + nb_sps;
 354
 355     for (i = 0; i < rps->nb_refs; i++) {
 356         uint8_t delta_poc_msb_present;
 357
 358         if (i < nb_sps) {
 359             uint8_t lt_idx_sps = 0;
 360
 361             if (sps->num_long_term_ref_pics_sps > 1)
 362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 363
 364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 366         } else {
 367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 368             rps->used[i] = get_bits1(gb);
 369         }
 370
 371         delta_poc_msb_present = get_bits1(gb);
 372         if (delta_poc_msb_present) {
 373             int delta = get_ue_golomb_long(gb);
 374
 375             if (i && i != nb_sps)
 376                 delta += prev_delta_msb;
 377
 378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 379             prev_delta_msb = delta;
 380         }
 381     }
 382
 383     return 0;
 384 }
 385
 386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 387 {
 388     int ret;
 389     unsigned int num = 0, den = 0;
 390
 391     pic_arrays_free(s);
 392     ret = pic_arrays_init(s, sps);
 393     if (ret < 0)
 394         goto fail;
 395
 396     s->avctx->coded_width         = sps->width;
 397     s->avctx->coded_height        = sps->height;
 398     s->avctx->width               = sps->output_width;
 399     s->avctx->height              = sps->output_height;
 400     s->avctx->pix_fmt             = sps->pix_fmt;
 401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 402
 403     ff_set_sar(s->avctx, sps->vui.sar);
 404
 405     if (sps->vui.video_signal_type_present_flag)
 406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 407                                                                : AVCOL_RANGE_MPEG;
 408     else
 409         s->avctx->color_range = AVCOL_RANGE_MPEG;
 410
 411     if (sps->vui.colour_description_present_flag) {
 412         s->avctx->color_primaries = sps->vui.colour_primaries;
 413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 415     } else {
 416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 419     }
 420
 421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 424
 425     if (sps->sao_enabled) {
 426         av_frame_unref(s->tmp_frame);
 427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 428         if (ret < 0)
 429             goto fail;
 430         s->frame = s->tmp_frame;
 431     }
 432
 433     s->sps = sps;
 434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 435
 436     if (s->vps->vps_timing_info_present_flag) {
 437         num = s->vps->vps_num_units_in_tick;
 438         den = s->vps->vps_time_scale;
 439     } else if (sps->vui.vui_timing_info_present_flag) {
 440         num = sps->vui.vui_num_units_in_tick;
 441         den = sps->vui.vui_time_scale;
 442     }
 443
 444     if (num != 0 && den != 0)
 445         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 446                   num, den, 1 << 30);
 447
 448     return 0;
 449
 450 fail:
 451     pic_arrays_free(s);
 452     s->sps = NULL;
 453     return ret;
 454 }
 455
 456 static int hls_slice_header(HEVCContext *s)
 457 {
 458     GetBitContext *gb = &s->HEVClc.gb;
 459     SliceHeader *sh   = &s->sh;
 460     int i, ret;
 461
 462     // Coded parameters
 463     sh->first_slice_in_pic_flag = get_bits1(gb);
 464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 465         s->seq_decode = (s->seq_decode + 1) & 0xff;
 466         s->max_ra     = INT_MAX;
 467         if (IS_IDR(s))
 468             ff_hevc_clear_refs(s);
 469     }
 470     if (IS_IRAP(s))
 471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 472
 473     sh->pps_id = get_ue_golomb_long(gb);
 474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 476         return AVERROR_INVALIDDATA;
 477     }
 478     if (!sh->first_slice_in_pic_flag &&
 479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 481         return AVERROR_INVALIDDATA;
 482     }
 483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 484
 485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 487
 488         ff_hevc_clear_refs(s);
 489         ret = set_sps(s, s->sps);
 490         if (ret < 0)
 491             return ret;
 492
 493         s->seq_decode = (s->seq_decode + 1) & 0xff;
 494         s->max_ra     = INT_MAX;
 495     }
 496
 497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 499
 500     sh->dependent_slice_segment_flag = 0;
 501     if (!sh->first_slice_in_pic_flag) {
 502         int slice_address_length;
 503
 504         if (s->pps->dependent_slice_segments_enabled_flag)
 505             sh->dependent_slice_segment_flag = get_bits1(gb);
 506
 507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 508                                             s->sps->ctb_height);
 509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 511             av_log(s->avctx, AV_LOG_ERROR,
 512                    "Invalid slice segment address: %u.\n",
 513                    sh->slice_segment_addr);
 514             return AVERROR_INVALIDDATA;
 515         }
 516
 517         if (!sh->dependent_slice_segment_flag) {
 518             sh->slice_addr = sh->slice_segment_addr;
 519             s->slice_idx++;
 520         }
 521     } else {
 522         sh->slice_segment_addr = sh->slice_addr = 0;
 523         s->slice_idx           = 0;
 524         s->slice_initialized   = 0;
 525     }
 526
 527     if (!sh->dependent_slice_segment_flag) {
 528         s->slice_initialized = 0;
 529
 530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 532
 533         sh->slice_type = get_ue_golomb_long(gb);
 534         if (!(sh->slice_type == I_SLICE ||
 535               sh->slice_type == P_SLICE ||
 536               sh->slice_type == B_SLICE)) {
 537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 538                    sh->slice_type);
 539             return AVERROR_INVALIDDATA;
 540         }
 541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 543             return AVERROR_INVALIDDATA;
 544         }
 545
 546         // when flag is not present, picture is inferred to be output
 547         sh->pic_output_flag = 1;
 548         if (s->pps->output_flag_present_flag)
 549             sh->pic_output_flag = get_bits1(gb);
 550
 551         if (s->sps->separate_colour_plane_flag)
 552             sh->colour_plane_id = get_bits(gb, 2);
 553
 554         if (!IS_IDR(s)) {
 555             int short_term_ref_pic_set_sps_flag, poc;
 556
 557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 560                 av_log(s->avctx, AV_LOG_WARNING,
 561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 563                     return AVERROR_INVALIDDATA;
 564                 poc = s->poc;
 565             }
 566             s->poc = poc;
 567
 568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 569             if (!short_term_ref_pic_set_sps_flag) {
 570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 571                 if (ret < 0)
 572                     return ret;
 573
 574                 sh->short_term_rps = &sh->slice_rps;
 575             } else {
 576                 int numbits, rps_idx;
 577
 578                 if (!s->sps->nb_st_rps) {
 579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 580                     return AVERROR_INVALIDDATA;
 581                 }
 582
 583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 586             }
 587
 588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 589             if (ret < 0) {
 590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 592                     return AVERROR_INVALIDDATA;
 593             }
 594
 595             if (s->sps->sps_temporal_mvp_enabled_flag)
 596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 597             else
 598                 sh->slice_temporal_mvp_enabled_flag = 0;
 599         } else {
 600             s->sh.short_term_rps = NULL;
 601             s->poc               = 0;
 602         }
 603
 604         /* 8.3.1 */
 605         if (s->temporal_id == 0 &&
 606             s->nal_unit_type != NAL_TRAIL_N &&
 607             s->nal_unit_type != NAL_TSA_N   &&
 608             s->nal_unit_type != NAL_STSA_N  &&
 609             s->nal_unit_type != NAL_RADL_N  &&
 610             s->nal_unit_type != NAL_RADL_R  &&
 611             s->nal_unit_type != NAL_RASL_N  &&
 612             s->nal_unit_type != NAL_RASL_R)
 613             s->pocTid0 = s->poc;
 614
 615         if (s->sps->sao_enabled) {
 616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 617             sh->slice_sample_adaptive_offset_flag[1] =
 618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 619         } else {
 620             sh->slice_sample_adaptive_offset_flag[0] = 0;
 621             sh->slice_sample_adaptive_offset_flag[1] = 0;
 622             sh->slice_sample_adaptive_offset_flag[2] = 0;
 623         }
 624
 625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 627             int nb_refs;
 628
 629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 630             if (sh->slice_type == B_SLICE)
 631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 632
 633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 635                 if (sh->slice_type == B_SLICE)
 636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 637             }
 638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 640                        sh->nb_refs[L0], sh->nb_refs[L1]);
 641                 return AVERROR_INVALIDDATA;
 642             }
 643
 644             sh->rpl_modification_flag[0] = 0;
 645             sh->rpl_modification_flag[1] = 0;
 646             nb_refs = ff_hevc_frame_nb_refs(s);
 647             if (!nb_refs) {
 648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 649                 return AVERROR_INVALIDDATA;
 650             }
 651
 652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 653                 sh->rpl_modification_flag[0] = get_bits1(gb);
 654                 if (sh->rpl_modification_flag[0]) {
 655                     for (i = 0; i < sh->nb_refs[L0]; i++)
 656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 657                 }
 658
 659                 if (sh->slice_type == B_SLICE) {
 660                     sh->rpl_modification_flag[1] = get_bits1(gb);
 661                     if (sh->rpl_modification_flag[1] == 1)
 662                         for (i = 0; i < sh->nb_refs[L1]; i++)
 663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 664                 }
 665             }
 666
 667             if (sh->slice_type == B_SLICE)
 668                 sh->mvd_l1_zero_flag = get_bits1(gb);
 669
 670             if (s->pps->cabac_init_present_flag)
 671                 sh->cabac_init_flag = get_bits1(gb);
 672             else
 673                 sh->cabac_init_flag = 0;
 674
 675             sh->collocated_ref_idx = 0;
 676             if (sh->slice_temporal_mvp_enabled_flag) {
 677                 sh->collocated_list = L0;
 678                 if (sh->slice_type == B_SLICE)
 679                     sh->collocated_list = !get_bits1(gb);
 680
 681                 if (sh->nb_refs[sh->collocated_list] > 1) {
 682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 684                         av_log(s->avctx, AV_LOG_ERROR,
 685                                "Invalid collocated_ref_idx: %d.\n",
 686                                sh->collocated_ref_idx);
 687                         return AVERROR_INVALIDDATA;
 688                     }
 689                 }
 690             }
 691
 692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 694                 pred_weight_table(s, gb);
 695             }
 696
 697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 699                 av_log(s->avctx, AV_LOG_ERROR,
 700                        "Invalid number of merging MVP candidates: %d.\n",
 701                        sh->max_num_merge_cand);
 702                 return AVERROR_INVALIDDATA;
 703             }
 704         }
 705
 706         sh->slice_qp_delta = get_se_golomb(gb);
 707
 708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 709             sh->slice_cb_qp_offset = get_se_golomb(gb);
 710             sh->slice_cr_qp_offset = get_se_golomb(gb);
 711         } else {
 712             sh->slice_cb_qp_offset = 0;
 713             sh->slice_cr_qp_offset = 0;
 714         }
 715
 716         if (s->pps->deblocking_filter_control_present_flag) {
 717             int deblocking_filter_override_flag = 0;
 718
 719             if (s->pps->deblocking_filter_override_enabled_flag)
 720                 deblocking_filter_override_flag = get_bits1(gb);
 721
 722             if (deblocking_filter_override_flag) {
 723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 724                 if (!sh->disable_deblocking_filter_flag) {
 725                     sh->beta_offset = get_se_golomb(gb) * 2;
 726                     sh->tc_offset   = get_se_golomb(gb) * 2;
 727                 }
 728             } else {
 729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 730                 sh->beta_offset                    = s->pps->beta_offset;
 731                 sh->tc_offset                      = s->pps->tc_offset;
 732             }
 733         } else {
 734             sh->disable_deblocking_filter_flag = 0;
 735             sh->beta_offset                    = 0;
 736             sh->tc_offset                      = 0;
 737         }
 738
 739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 740             (sh->slice_sample_adaptive_offset_flag[0] ||
 741              sh->slice_sample_adaptive_offset_flag[1] ||
 742              !sh->disable_deblocking_filter_flag)) {
 743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 744         } else {
 745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 746         }
 747     } else if (!s->slice_initialized) {
 748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 749         return AVERROR_INVALIDDATA;
 750     }
 751
 752     sh->num_entry_point_offsets = 0;
 753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 755         if (sh->num_entry_point_offsets > 0) {
 756             int offset_len = get_ue_golomb_long(gb) + 1;
 757
 758             for (i = 0; i < sh->num_entry_point_offsets; i++)
 759                 skip_bits(gb, offset_len);
 760         }
 761     }
 762
 763     if (s->pps->slice_header_extension_present_flag) {
 764         unsigned int length = get_ue_golomb_long(gb);
 765         for (i = 0; i < length; i++)
 766             skip_bits(gb, 8);  // slice_header_extension_data_byte
 767     }
 768
 769     // Inferred parameters
 770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 771     if (sh->slice_qp > 51 ||
 772         sh->slice_qp < -s->sps->qp_bd_offset) {
 773         av_log(s->avctx, AV_LOG_ERROR,
 774                "The slice_qp %d is outside the valid range "
 775                "[%d, 51].\n",
 776                sh->slice_qp,
 777                -s->sps->qp_bd_offset);
 778         return AVERROR_INVALIDDATA;
 779     }
 780
 781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 782
 783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 785         return AVERROR_INVALIDDATA;
 786     }
 787
 788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 789
 790     if (!s->pps->cu_qp_delta_enabled_flag)
 791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 793
 794     s->slice_initialized = 1;
 795
 796     return 0;
 797 }
 798
 799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 800
 801 #define SET_SAO(elem, value)                            \
 802 do {                                                    \
 803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 804         sao->elem = value;                              \
 805     else if (sao_merge_left_flag)                       \
 806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 807     else if (sao_merge_up_flag)                         \
 808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 809     else                                                \
 810         sao->elem = 0;                                  \
 811 } while (0)
 812
 813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 814 {
 815     HEVCLocalContext *lc    = &s->HEVClc;
 816     int sao_merge_left_flag = 0;
 817     int sao_merge_up_flag   = 0;
 818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 819     SAOParams *sao          = &CTB(s->sao, rx, ry);
 820     int c_idx, i;
 821
 822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 823         s->sh.slice_sample_adaptive_offset_flag[1]) {
 824         if (rx > 0) {
 825             if (lc->ctb_left_flag)
 826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 827         }
 828         if (ry > 0 && !sao_merge_left_flag) {
 829             if (lc->ctb_up_flag)
 830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 831         }
 832     }
 833
 834     for (c_idx = 0; c_idx < 3; c_idx++) {
 835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 837             continue;
 838         }
 839
 840         if (c_idx == 2) {
 841             sao->type_idx[2] = sao->type_idx[1];
 842             sao->eo_class[2] = sao->eo_class[1];
 843         } else {
 844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 845         }
 846
 847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 848             continue;
 849
 850         for (i = 0; i < 4; i++)
 851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 852
 853         if (sao->type_idx[c_idx] == SAO_BAND) {
 854             for (i = 0; i < 4; i++) {
 855                 if (sao->offset_abs[c_idx][i]) {
 856                     SET_SAO(offset_sign[c_idx][i],
 857                             ff_hevc_sao_offset_sign_decode(s));
 858                 } else {
 859                     sao->offset_sign[c_idx][i] = 0;
 860                 }
 861             }
 862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 863         } else if (c_idx != 2) {
 864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 865         }
 866
 867         // Inferred parameters
 868         sao->offset_val[c_idx][0] = 0;
 869         for (i = 0; i < 4; i++) {
 870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 871             if (sao->type_idx[c_idx] == SAO_EDGE) {
 872                 if (i > 1)
 873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 874             } else if (sao->offset_sign[c_idx][i]) {
 875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 876             }
 877         }
 878     }
 879 }
 880
 881 #undef SET_SAO
 882 #undef CTB
 883
 884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 885                                 int log2_trafo_size, enum ScanType scan_idx,
 886                                 int c_idx)
 887 {
 888 #define GET_COORD(offset, n)                                    \
 889     do {                                                        \
 890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 892     } while (0)
 893     HEVCLocalContext *lc    = &s->HEVClc;
 894     int transform_skip_flag = 0;
 895
 896     int last_significant_coeff_x, last_significant_coeff_y;
 897     int last_scan_pos;
 898     int n_end;
 899     int num_coeff    = 0;
 900     int greater1_ctx = 1;
 901
 902     int num_last_subset;
 903     int x_cg_last_sig, y_cg_last_sig;
 904
 905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 906
 907     ptrdiff_t stride = s->frame->linesize[c_idx];
 908     int hshift       = s->sps->hshift[c_idx];
 909     int vshift       = s->sps->vshift[c_idx];
 910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 914
 915     int trafo_size = 1 << log2_trafo_size;
 916     int i, qp, shift, add, scale, scale_m;
 917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 918     const uint8_t *scale_matrix;
 919     uint8_t dc_scale;
 920
 921     // Derive QP for dequant
 922     if (!lc->cu.cu_transquant_bypass_flag) {
 923         static const int qp_c[] = {
 924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 925         };
 926
 927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 931         };
 932
 933         static const uint8_t div6[51 + 2 * 6 + 1] = {
 934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 937         };
 938         int qp_y = lc->qp_y;
 939
 940         if (c_idx == 0) {
 941             qp = qp_y + s->sps->qp_bd_offset;
 942         } else {
 943             int qp_i, offset;
 944
 945             if (c_idx == 1)
 946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 947             else
 948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 949
 950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 951             if (qp_i < 30)
 952                 qp = qp_i;
 953             else if (qp_i > 43)
 954                 qp = qp_i - 6;
 955             else
 956                 qp = qp_c[qp_i - 30];
 957
 958             qp += s->sps->qp_bd_offset;
 959         }
 960
 961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 962         add      = 1 << (shift - 1);
 963         scale    = level_scale[rem6[qp]] << (div6[qp]);
 964         scale_m  = 16; // default when no custom scaling lists.
 965         dc_scale = 16;
 966
 967         if (s->sps->scaling_list_enable_flag) {
 968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 969                                     &s->pps->scaling_list : &s->sps->scaling_list;
 970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 971
 972             if (log2_trafo_size != 5)
 973                 matrix_id = 3 * matrix_id + c_idx;
 974
 975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 976             if (log2_trafo_size >= 4)
 977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 978         }
 979     }
 980
 981     if (s->pps->transform_skip_enabled_flag &&
 982         !lc->cu.cu_transquant_bypass_flag   &&
 983         log2_trafo_size == 2) {
 984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 985     }
 986
 987     last_significant_coeff_x =
 988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 989     last_significant_coeff_y =
 990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 991
 992     if (last_significant_coeff_x > 3) {
 993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 995                                    (2 + (last_significant_coeff_x & 1)) +
 996                                    suffix;
 997     }
 998
 999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int trafo_depth, int blk_idx)
1237 {
1238     HEVCLocalContext *lc = &s->HEVClc;
1239
1240     if (lc->cu.pred_mode == MODE_INTRA) {
1241         int trafo_size = 1 << log2_trafo_size;
1242         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1243
1244         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1245         if (log2_trafo_size > 2) {
1246             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1247             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1248             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1250         } else if (blk_idx == 3) {
1251             trafo_size = trafo_size << s->sps->hshift[1];
1252             ff_hevc_set_neighbour_available(s, xBase, yBase,
1253                                             trafo_size, trafo_size);
1254             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1256         }
1257     }
1258
1259     if (lc->tt.cbf_luma ||
1260         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1261         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1262         int scan_idx   = SCAN_DIAG;
1263         int scan_idx_c = SCAN_DIAG;
1264
1265         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1266             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1267             if (lc->tu.cu_qp_delta != 0)
1268                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1269                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1270             lc->tu.is_cu_qp_delta_coded = 1;
1271
1272             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1273                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1274                 av_log(s->avctx, AV_LOG_ERROR,
1275                        "The cu_qp_delta %d is outside the valid range "
1276                        "[%d, %d].\n",
1277                        lc->tu.cu_qp_delta,
1278                        -(26 + s->sps->qp_bd_offset / 2),
1279                         (25 + s->sps->qp_bd_offset / 2));
1280                 return AVERROR_INVALIDDATA;
1281             }
1282
1283             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1284         }
1285
1286         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1287             if (lc->tu.cur_intra_pred_mode >= 6 &&
1288                 lc->tu.cur_intra_pred_mode <= 14) {
1289                 scan_idx = SCAN_VERT;
1290             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1291                        lc->tu.cur_intra_pred_mode <= 30) {
1292                 scan_idx = SCAN_HORIZ;
1293             }
1294
1295             if (lc->pu.intra_pred_mode_c >=  6 &&
1296                 lc->pu.intra_pred_mode_c <= 14) {
1297                 scan_idx_c = SCAN_VERT;
1298             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1299                        lc->pu.intra_pred_mode_c <= 30) {
1300                 scan_idx_c = SCAN_HORIZ;
1301             }
1302         }
1303
1304         if (lc->tt.cbf_luma)
1305             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1306         if (log2_trafo_size > 2) {
1307             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1308                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1309             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1310                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1311         } else if (blk_idx == 3) {
1312             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1313                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1314             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1315                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1316         }
1317     }
1318     return 0;
1319 }
1320
1321 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1322 {
1323     int cb_size          = 1 << log2_cb_size;
1324     int log2_min_pu_size = s->sps->log2_min_pu_size;
1325
1326     int min_pu_width     = s->sps->min_pu_width;
1327     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1328     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1329     int i, j;
1330
1331     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1332         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1333             s->is_pcm[i + j * min_pu_width] = 2;
1334 }
1335
1336 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1337                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1338                               int log2_cb_size, int log2_trafo_size,
1339                               int trafo_depth, int blk_idx)
1340 {
1341     HEVCLocalContext *lc = &s->HEVClc;
1342     uint8_t split_transform_flag;
1343     int ret;
1344
1345     if (trafo_depth > 0 && log2_trafo_size == 2) {
1346         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1347             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1348         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1349             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1350     } else {
1351         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1352         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1353     }
1354
1355     if (lc->cu.intra_split_flag) {
1356         if (trafo_depth == 1)
1357             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1358     } else {
1359         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1360     }
1361
1362     lc->tt.cbf_luma = 1;
1363
1364     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1365                               lc->cu.pred_mode == MODE_INTER &&
1366                               lc->cu.part_mode != PART_2Nx2N &&
1367                               trafo_depth == 0;
1368
1369     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1370         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1371         trafo_depth     < lc->cu.max_trafo_depth       &&
1372         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1373         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1374     } else {
1375         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1376                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1377                                lc->tt.inter_split_flag;
1378     }
1379
1380     if (log2_trafo_size > 2) {
1381         if (trafo_depth == 0 ||
1382             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1383             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1384                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1385         }
1386
1387         if (trafo_depth == 0 ||
1388             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1389             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1390                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1391         }
1392     }
1393
1394     if (split_transform_flag) {
1395         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1396         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1397
1398         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1399                                  log2_cb_size, log2_trafo_size - 1,
1400                                  trafo_depth + 1, 0);
1401         if (ret < 0)
1402             return ret;
1403         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1404                                  log2_cb_size, log2_trafo_size - 1,
1405                                  trafo_depth + 1, 1);
1406         if (ret < 0)
1407             return ret;
1408         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1409                                  log2_cb_size, log2_trafo_size - 1,
1410                                  trafo_depth + 1, 2);
1411         if (ret < 0)
1412             return ret;
1413         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1414                                  log2_cb_size, log2_trafo_size - 1,
1415                                  trafo_depth + 1, 3);
1416         if (ret < 0)
1417             return ret;
1418     } else {
1419         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1420         int log2_min_tu_size = s->sps->log2_min_tb_size;
1421         int min_tu_width     = s->sps->min_tb_width;
1422
1423         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1424             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1425             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1426             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1427         }
1428
1429         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1430                                  log2_cb_size, log2_trafo_size, trafo_depth,
1431                                  blk_idx);
1432         if (ret < 0)
1433             return ret;
1434         // TODO: store cbf_luma somewhere else
1435         if (lc->tt.cbf_luma) {
1436             int i, j;
1437             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1438                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1439                     int x_tu = (x0 + j) >> log2_min_tu_size;
1440                     int y_tu = (y0 + i) >> log2_min_tu_size;
1441                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1442                 }
1443         }
1444         if (!s->sh.disable_deblocking_filter_flag) {
1445             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1446                                                   lc->slice_or_tiles_up_boundary,
1447                                                   lc->slice_or_tiles_left_boundary);
1448             if (s->pps->transquant_bypass_enable_flag &&
1449                 lc->cu.cu_transquant_bypass_flag)
1450                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1451         }
1452     }
1453     return 0;
1454 }
1455
1456 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1457 {
1458     //TODO: non-4:2:0 support
1459     HEVCLocalContext *lc = &s->HEVClc;
1460     GetBitContext gb;
1461     int cb_size   = 1 << log2_cb_size;
1462     int stride0   = s->frame->linesize[0];
1463     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1464     int   stride1 = s->frame->linesize[1];
1465     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1466     int   stride2 = s->frame->linesize[2];
1467     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1468
1469     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1470     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1471     int ret;
1472
1473     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1474                                           lc->slice_or_tiles_up_boundary,
1475                                           lc->slice_or_tiles_left_boundary);
1476
1477     ret = init_get_bits(&gb, pcm, length);
1478     if (ret < 0)
1479         return ret;
1480
1481     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1482     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1483     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1484     return 0;
1485 }
1486
1487 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1488 {
1489     HEVCLocalContext *lc = &s->HEVClc;
1490     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1491     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1492
1493     if (x)
1494         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1495     if (y)
1496         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1497
1498     switch (x) {
1499     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1500     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1501     case 0: lc->pu.mvd.x = 0;                               break;
1502     }
1503
1504     switch (y) {
1505     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1506     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1507     case 0: lc->pu.mvd.y = 0;                               break;
1508     }
1509 }
1510
1511 /**
1512  * 8.5.3.2.2.1 Luma sample interpolation process
1513  *
1514  * @param s HEVC decoding context
1515  * @param dst target buffer for block data at block position
1516  * @param dststride stride of the dst buffer
1517  * @param ref reference picture buffer at origin (0, 0)
1518  * @param mv motion vector (relative to block position) to get pixel data from
1519  * @param x_off horizontal position of block from origin (0, 0)
1520  * @param y_off vertical position of block from origin (0, 0)
1521  * @param block_w width of block
1522  * @param block_h height of block
1523  */
1524 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1525                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1526                     int block_w, int block_h)
1527 {
1528     HEVCLocalContext *lc = &s->HEVClc;
1529     uint8_t *src         = ref->data[0];
1530     ptrdiff_t srcstride  = ref->linesize[0];
1531     int pic_width        = s->sps->width;
1532     int pic_height       = s->sps->height;
1533
1534     int mx         = mv->x & 3;
1535     int my         = mv->y & 3;
1536     int extra_left = ff_hevc_qpel_extra_before[mx];
1537     int extra_top  = ff_hevc_qpel_extra_before[my];
1538
1539     x_off += mv->x >> 2;
1540     y_off += mv->y >> 2;
1541     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1542
1543     if (x_off < extra_left || y_off < extra_top ||
1544         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1545         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1546         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1547         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1548         int buf_offset = extra_top *
1549                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1550
1551         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1552                                  edge_emu_stride, srcstride,
1553                                  block_w + ff_hevc_qpel_extra[mx],
1554                                  block_h + ff_hevc_qpel_extra[my],
1555                                  x_off - extra_left, y_off - extra_top,
1556                                  pic_width, pic_height);
1557         src = lc->edge_emu_buffer + buf_offset;
1558         srcstride = edge_emu_stride;
1559     }
1560     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1561                                      block_h, lc->mc_buffer);
1562 }
1563
1564 /**
1565  * 8.5.3.2.2.2 Chroma sample interpolation process
1566  *
1567  * @param s HEVC decoding context
1568  * @param dst1 target buffer for block data at block position (U plane)
1569  * @param dst2 target buffer for block data at block position (V plane)
1570  * @param dststride stride of the dst1 and dst2 buffers
1571  * @param ref reference picture buffer at origin (0, 0)
1572  * @param mv motion vector (relative to block position) to get pixel data from
1573  * @param x_off horizontal position of block from origin (0, 0)
1574  * @param y_off vertical position of block from origin (0, 0)
1575  * @param block_w width of block
1576  * @param block_h height of block
1577  */
1578 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1579                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1580                       int x_off, int y_off, int block_w, int block_h)
1581 {
1582     HEVCLocalContext *lc = &s->HEVClc;
1583     uint8_t *src1        = ref->data[1];
1584     uint8_t *src2        = ref->data[2];
1585     ptrdiff_t src1stride = ref->linesize[1];
1586     ptrdiff_t src2stride = ref->linesize[2];
1587     int pic_width        = s->sps->width >> 1;
1588     int pic_height       = s->sps->height >> 1;
1589
1590     int mx = mv->x & 7;
1591     int my = mv->y & 7;
1592
1593     x_off += mv->x >> 3;
1594     y_off += mv->y >> 3;
1595     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1596     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1597
1598     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1599         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1600         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1601         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1602         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1603         int buf_offset1 = EPEL_EXTRA_BEFORE *
1604                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1605         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1606         int buf_offset2 = EPEL_EXTRA_BEFORE *
1607                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1608
1609         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1610                                  edge_emu_stride, src1stride,
1611                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1612                                  x_off - EPEL_EXTRA_BEFORE,
1613                                  y_off - EPEL_EXTRA_BEFORE,
1614                                  pic_width, pic_height);
1615
1616         src1 = lc->edge_emu_buffer + buf_offset1;
1617         src1stride = edge_emu_stride;
1618         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1619                                              block_w, block_h, mx, my, lc->mc_buffer);
1620
1621         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1622                                  edge_emu_stride, src2stride,
1623                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1624                                  x_off - EPEL_EXTRA_BEFORE,
1625                                  y_off - EPEL_EXTRA_BEFORE,
1626                                  pic_width, pic_height);
1627         src2 = lc->edge_emu_buffer + buf_offset2;
1628         src2stride = edge_emu_stride;
1629
1630         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1631                                              block_w, block_h, mx, my,
1632                                              lc->mc_buffer);
1633     } else {
1634         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1635                                              block_w, block_h, mx, my,
1636                                              lc->mc_buffer);
1637         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1638                                              block_w, block_h, mx, my,
1639                                              lc->mc_buffer);
1640     }
1641 }
1642
1643 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1644                                 const Mv *mv, int y0, int height)
1645 {
1646     int y = (mv->y >> 2) + y0 + height + 9;
1647     ff_thread_await_progress(&ref->tf, y, 0);
1648 }
1649
1650 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1651                                 int nPbW, int nPbH,
1652                                 int log2_cb_size, int partIdx)
1653 {
1654 #define POS(c_idx, x, y)                                                              \
1655     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1656                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1657     HEVCLocalContext *lc = &s->HEVClc;
1658     int merge_idx = 0;
1659     struct MvField current_mv = {{{ 0 }}};
1660
1661     int min_pu_width = s->sps->min_pu_width;
1662
1663     MvField *tab_mvf = s->ref->tab_mvf;
1664     RefPicList  *refPicList = s->ref->refPicList;
1665     HEVCFrame *ref0, *ref1;
1666
1667     int tmpstride = MAX_PB_SIZE;
1668
1669     uint8_t *dst0 = POS(0, x0, y0);
1670     uint8_t *dst1 = POS(1, x0, y0);
1671     uint8_t *dst2 = POS(2, x0, y0);
1672     int log2_min_cb_size = s->sps->log2_min_cb_size;
1673     int min_cb_width     = s->sps->min_cb_width;
1674     int x_cb             = x0 >> log2_min_cb_size;
1675     int y_cb             = y0 >> log2_min_cb_size;
1676     int ref_idx[2];
1677     int mvp_flag[2];
1678     int x_pu, y_pu;
1679     int i, j;
1680
1681     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1682         if (s->sh.max_num_merge_cand > 1)
1683             merge_idx = ff_hevc_merge_idx_decode(s);
1684         else
1685             merge_idx = 0;
1686
1687         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1688                                    1 << log2_cb_size,
1689                                    1 << log2_cb_size,
1690                                    log2_cb_size, partIdx,
1691                                    merge_idx, &current_mv);
1692         x_pu = x0 >> s->sps->log2_min_pu_size;
1693         y_pu = y0 >> s->sps->log2_min_pu_size;
1694
1695         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1696             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1697                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1698     } else { /* MODE_INTER */
1699         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1700         if (lc->pu.merge_flag) {
1701             if (s->sh.max_num_merge_cand > 1)
1702                 merge_idx = ff_hevc_merge_idx_decode(s);
1703             else
1704                 merge_idx = 0;
1705
1706             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1707                                        partIdx, merge_idx, &current_mv);
1708             x_pu = x0 >> s->sps->log2_min_pu_size;
1709             y_pu = y0 >> s->sps->log2_min_pu_size;
1710
1711             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1712                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1713                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1714         } else {
1715             enum InterPredIdc inter_pred_idc = PRED_L0;
1716             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1717             if (s->sh.slice_type == B_SLICE)
1718                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1719
1720             if (inter_pred_idc != PRED_L1) {
1721                 if (s->sh.nb_refs[L0]) {
1722                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1723                     current_mv.ref_idx[0] = ref_idx[0];
1724                 }
1725                 current_mv.pred_flag[0] = 1;
1726                 hls_mvd_coding(s, x0, y0, 0);
1727                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1728                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1729                                          partIdx, merge_idx, &current_mv,
1730                                          mvp_flag[0], 0);
1731                 current_mv.mv[0].x += lc->pu.mvd.x;
1732                 current_mv.mv[0].y += lc->pu.mvd.y;
1733             }
1734
1735             if (inter_pred_idc != PRED_L0) {
1736                 if (s->sh.nb_refs[L1]) {
1737                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1738                     current_mv.ref_idx[1] = ref_idx[1];
1739                 }
1740
1741                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1742                     lc->pu.mvd.x = 0;
1743                     lc->pu.mvd.y = 0;
1744                 } else {
1745                     hls_mvd_coding(s, x0, y0, 1);
1746                 }
1747
1748                 current_mv.pred_flag[1] = 1;
1749                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1750                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1751                                          partIdx, merge_idx, &current_mv,
1752                                          mvp_flag[1], 1);
1753                 current_mv.mv[1].x += lc->pu.mvd.x;
1754                 current_mv.mv[1].y += lc->pu.mvd.y;
1755             }
1756
1757             x_pu = x0 >> s->sps->log2_min_pu_size;
1758             y_pu = y0 >> s->sps->log2_min_pu_size;
1759
1760             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1761                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1762                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1763         }
1764     }
1765
1766     if (current_mv.pred_flag[0]) {
1767         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1768         if (!ref0)
1769             return;
1770         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1771     }
1772     if (current_mv.pred_flag[1]) {
1773         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1774         if (!ref1)
1775             return;
1776         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1777     }
1778
1779     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1780         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1781         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1782
1783         luma_mc(s, tmp, tmpstride, ref0->frame,
1784                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1785
1786         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1787             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1788             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1789                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1790                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1791                                      dst0, s->frame->linesize[0], tmp,
1792                                      tmpstride, nPbW, nPbH);
1793         } else {
1794             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1795         }
1796         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1797                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1798
1799         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1800             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1801             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1802                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1803                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1804                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1805                                      nPbW / 2, nPbH / 2);
1806             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1807                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1808                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1809                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1810                                      nPbW / 2, nPbH / 2);
1811         } else {
1812             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1813             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1814         }
1815     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1816         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1817         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1818
1819         if (!ref1)
1820             return;
1821
1822         luma_mc(s, tmp, tmpstride, ref1->frame,
1823                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1824
1825         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1826             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1827             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1828                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1829                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1830                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1831                                       nPbW, nPbH);
1832         } else {
1833             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1834         }
1835
1836         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1837                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1838
1839         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1840             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1841             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1842                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1843                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1844                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1845             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1846                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1847                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1848                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1849         } else {
1850             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1851             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1852         }
1853     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1854         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1855         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1856         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1857         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1858         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1859         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1860
1861         if (!ref0 || !ref1)
1862             return;
1863
1864         luma_mc(s, tmp, tmpstride, ref0->frame,
1865                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1866         luma_mc(s, tmp2, tmpstride, ref1->frame,
1867                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1868
1869         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1870             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1871             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1872                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1873                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1874                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1875                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1876                                          dst0, s->frame->linesize[0],
1877                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1878         } else {
1879             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1880                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1881         }
1882
1883         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1884                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1885         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1886                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1887
1888         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1889             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1890             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1891                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1892                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1893                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1894                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1895                                          dst1, s->frame->linesize[1], tmp, tmp3,
1896                                          tmpstride, nPbW / 2, nPbH / 2);
1897             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1898                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1899                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1900                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1901                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1902                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1903                                          tmpstride, nPbW / 2, nPbH / 2);
1904         } else {
1905             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1906             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1907         }
1908     }
1909 }
1910
1911 /**
1912  * 8.4.1
1913  */
1914 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1915                                 int prev_intra_luma_pred_flag)
1916 {
1917     HEVCLocalContext *lc = &s->HEVClc;
1918     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1919     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1920     int min_pu_width     = s->sps->min_pu_width;
1921     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1922     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1923     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1924
1925     int cand_up   = (lc->ctb_up_flag || y0b) ?
1926                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1927     int cand_left = (lc->ctb_left_flag || x0b) ?
1928                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1929
1930     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1931
1932     MvField *tab_mvf = s->ref->tab_mvf;
1933     int intra_pred_mode;
1934     int candidate[3];
1935     int i, j;
1936
1937     // intra_pred_mode prediction does not cross vertical CTB boundaries
1938     if ((y0 - 1) < y_ctb)
1939         cand_up = INTRA_DC;
1940
1941     if (cand_left == cand_up) {
1942         if (cand_left < 2) {
1943             candidate[0] = INTRA_PLANAR;
1944             candidate[1] = INTRA_DC;
1945             candidate[2] = INTRA_ANGULAR_26;
1946         } else {
1947             candidate[0] = cand_left;
1948             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1949             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1950         }
1951     } else {
1952         candidate[0] = cand_left;
1953         candidate[1] = cand_up;
1954         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1955             candidate[2] = INTRA_PLANAR;
1956         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1957             candidate[2] = INTRA_DC;
1958         } else {
1959             candidate[2] = INTRA_ANGULAR_26;
1960         }
1961     }
1962
1963     if (prev_intra_luma_pred_flag) {
1964         intra_pred_mode = candidate[lc->pu.mpm_idx];
1965     } else {
1966         if (candidate[0] > candidate[1])
1967             FFSWAP(uint8_t, candidate[0], candidate[1]);
1968         if (candidate[0] > candidate[2])
1969             FFSWAP(uint8_t, candidate[0], candidate[2]);
1970         if (candidate[1] > candidate[2])
1971             FFSWAP(uint8_t, candidate[1], candidate[2]);
1972
1973         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1974         for (i = 0; i < 3; i++)
1975             if (intra_pred_mode >= candidate[i])
1976                 intra_pred_mode++;
1977     }
1978
1979     /* write the intra prediction units into the mv array */
1980     if (!size_in_pus)
1981         size_in_pus = 1;
1982     for (i = 0; i < size_in_pus; i++) {
1983         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1984                intra_pred_mode, size_in_pus);
1985
1986         for (j = 0; j < size_in_pus; j++) {
1987             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1991             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1992             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1993             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1994             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1995             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1996         }
1997     }
1998
1999     return intra_pred_mode;
2000 }
2001
2002 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2003                                           int log2_cb_size, int ct_depth)
2004 {
2005     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2006     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2007     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2008     int y;
2009
2010     for (y = 0; y < length; y++)
2011         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2012                ct_depth, length);
2013 }
2014
2015 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2016                                   int log2_cb_size)
2017 {
2018     HEVCLocalContext *lc = &s->HEVClc;
2019     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2020     uint8_t prev_intra_luma_pred_flag[4];
2021     int split   = lc->cu.part_mode == PART_NxN;
2022     int pb_size = (1 << log2_cb_size) >> split;
2023     int side    = split + 1;
2024     int chroma_mode;
2025     int i, j;
2026
2027     for (i = 0; i < side; i++)
2028         for (j = 0; j < side; j++)
2029             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2030
2031     for (i = 0; i < side; i++) {
2032         for (j = 0; j < side; j++) {
2033             if (prev_intra_luma_pred_flag[2 * i + j])
2034                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2035             else
2036                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2037
2038             lc->pu.intra_pred_mode[2 * i + j] =
2039                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2040                                      prev_intra_luma_pred_flag[2 * i + j]);
2041         }
2042     }
2043
2044     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2045     if (chroma_mode != 4) {
2046         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2047             lc->pu.intra_pred_mode_c = 34;
2048         else
2049             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2050     } else {
2051         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2052     }
2053 }
2054
2055 static void intra_prediction_unit_default_value(HEVCContext *s,
2056                                                 int x0, int y0,
2057                                                 int log2_cb_size)
2058 {
2059     HEVCLocalContext *lc = &s->HEVClc;
2060     int pb_size          = 1 << log2_cb_size;
2061     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2062     int min_pu_width     = s->sps->min_pu_width;
2063     MvField *tab_mvf     = s->ref->tab_mvf;
2064     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2065     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2066     int j, k;
2067
2068     if (size_in_pus == 0)
2069         size_in_pus = 1;
2070     for (j = 0; j < size_in_pus; j++) {
2071         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2072         for (k = 0; k < size_in_pus; k++)
2073             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2074     }
2075 }
2076
2077 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2078 {
2079     int cb_size          = 1 << log2_cb_size;
2080     HEVCLocalContext *lc = &s->HEVClc;
2081     int log2_min_cb_size = s->sps->log2_min_cb_size;
2082     int length           = cb_size >> log2_min_cb_size;
2083     int min_cb_width     = s->sps->min_cb_width;
2084     int x_cb             = x0 >> log2_min_cb_size;
2085     int y_cb             = y0 >> log2_min_cb_size;
2086     int x, y, ret;
2087
2088     lc->cu.x                = x0;
2089     lc->cu.y                = y0;
2090     lc->cu.rqt_root_cbf     = 1;
2091     lc->cu.pred_mode        = MODE_INTRA;
2092     lc->cu.part_mode        = PART_2Nx2N;
2093     lc->cu.intra_split_flag = 0;
2094     lc->cu.pcm_flag         = 0;
2095
2096     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2097     for (x = 0; x < 4; x++)
2098         lc->pu.intra_pred_mode[x] = 1;
2099     if (s->pps->transquant_bypass_enable_flag) {
2100         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2101         if (lc->cu.cu_transquant_bypass_flag)
2102             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2103     } else
2104         lc->cu.cu_transquant_bypass_flag = 0;
2105
2106     if (s->sh.slice_type != I_SLICE) {
2107         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2108
2109         lc->cu.pred_mode = MODE_SKIP;
2110         x = y_cb * min_cb_width + x_cb;
2111         for (y = 0; y < length; y++) {
2112             memset(&s->skip_flag[x], skip_flag, length);
2113             x += min_cb_width;
2114         }
2115         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2116     }
2117
2118     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2119         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2120         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2121
2122         if (!s->sh.disable_deblocking_filter_flag)
2123             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2124                                                   lc->slice_or_tiles_up_boundary,
2125                                                   lc->slice_or_tiles_left_boundary);
2126     } else {
2127         if (s->sh.slice_type != I_SLICE)
2128             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2129         if (lc->cu.pred_mode != MODE_INTRA ||
2130             log2_cb_size == s->sps->log2_min_cb_size) {
2131             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2132             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2133                                       lc->cu.pred_mode == MODE_INTRA;
2134         }
2135
2136         if (lc->cu.pred_mode == MODE_INTRA) {
2137             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2138                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2139                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2140                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2141             }
2142             if (lc->cu.pcm_flag) {
2143                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2144                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2145                 if (s->sps->pcm.loop_filter_disable_flag)
2146                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2147
2148                 if (ret < 0)
2149                     return ret;
2150             } else {
2151                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2152             }
2153         } else {
2154             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2155             switch (lc->cu.part_mode) {
2156             case PART_2Nx2N:
2157                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2158                 break;
2159             case PART_2NxN:
2160                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2161                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2162                 break;
2163             case PART_Nx2N:
2164                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2165                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2166                 break;
2167             case PART_2NxnU:
2168                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2169                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2170                 break;
2171             case PART_2NxnD:
2172                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2173                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2174                 break;
2175             case PART_nLx2N:
2176                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2177                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2178                 break;
2179             case PART_nRx2N:
2180                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2181                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2182                 break;
2183             case PART_NxN:
2184                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2185                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2186                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2187                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2188                 break;
2189             }
2190         }
2191
2192         if (!lc->cu.pcm_flag) {
2193             if (lc->cu.pred_mode != MODE_INTRA &&
2194                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2195                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2196             }
2197             if (lc->cu.rqt_root_cbf) {
2198                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2199                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2200                                          s->sps->max_transform_hierarchy_depth_inter;
2201                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2202                                          log2_cb_size,
2203                                          log2_cb_size, 0, 0);
2204                 if (ret < 0)
2205                     return ret;
2206             } else {
2207                 if (!s->sh.disable_deblocking_filter_flag)
2208                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2209                                                           lc->slice_or_tiles_up_boundary,
2210                                                           lc->slice_or_tiles_left_boundary);
2211             }
2212         }
2213     }
2214
2215     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2216         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2217
2218     x = y_cb * min_cb_width + x_cb;
2219     for (y = 0; y < length; y++) {
2220         memset(&s->qp_y_tab[x], lc->qp_y, length);
2221         x += min_cb_width;
2222     }
2223
2224     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2225
2226     return 0;
2227 }
2228
2229 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2230                                int log2_cb_size, int cb_depth)
2231 {
2232     HEVCLocalContext *lc = &s->HEVClc;
2233     const int cb_size    = 1 << log2_cb_size;
2234     int split_cu;
2235
2236     lc->ct.depth = cb_depth;
2237     if (x0 + cb_size <= s->sps->width  &&
2238         y0 + cb_size <= s->sps->height &&
2239         log2_cb_size > s->sps->log2_min_cb_size) {
2240         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2241     } else {
2242         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2243     }
2244     if (s->pps->cu_qp_delta_enabled_flag &&
2245         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2246         lc->tu.is_cu_qp_delta_coded = 0;
2247         lc->tu.cu_qp_delta          = 0;
2248     }
2249
2250     if (split_cu) {
2251         const int cb_size_split = cb_size >> 1;
2252         const int x1 = x0 + cb_size_split;
2253         const int y1 = y0 + cb_size_split;
2254
2255         log2_cb_size--;
2256         cb_depth++;
2257
2258 #define SUBDIVIDE(x, y)                                                \
2259 do {                                                                   \
2260     if (x < s->sps->width && y < s->sps->height) {                     \
2261         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2262         if (ret < 0)                                                   \
2263             return ret;                                                \
2264     }                                                                  \
2265 } while (0)
2266
2267         SUBDIVIDE(x0, y0);
2268         SUBDIVIDE(x1, y0);
2269         SUBDIVIDE(x0, y1);
2270         SUBDIVIDE(x1, y1);
2271     } else {
2272         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2273         if (ret < 0)
2274             return ret;
2275     }
2276
2277     return 0;
2278 }
2279
2280 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2281                                  int ctb_addr_ts)
2282 {
2283     HEVCLocalContext *lc  = &s->HEVClc;
2284     int ctb_size          = 1 << s->sps->log2_ctb_size;
2285     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2286     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2287
2288     int tile_left_boundary, tile_up_boundary;
2289     int slice_left_boundary, slice_up_boundary;
2290
2291     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2292
2293     if (s->pps->entropy_coding_sync_enabled_flag) {
2294         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2295             lc->first_qp_group = 1;
2296         lc->end_of_tiles_x = s->sps->width;
2297     } else if (s->pps->tiles_enabled_flag) {
2298         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2299             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2300             lc->start_of_tiles_x = x_ctb;
2301             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2302             lc->first_qp_group   = 1;
2303         }
2304     } else {
2305         lc->end_of_tiles_x = s->sps->width;
2306     }
2307
2308     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2309
2310     if (s->pps->tiles_enabled_flag) {
2311         tile_left_boundary  = x_ctb > 0 &&
2312                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2313         slice_left_boundary = x_ctb > 0 &&
2314                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2315         tile_up_boundary  = y_ctb > 0 &&
2316                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2317         slice_up_boundary = y_ctb > 0 &&
2318                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2319     } else {
2320         tile_left_boundary  =
2321         tile_up_boundary    = 1;
2322         slice_left_boundary = ctb_addr_in_slice > 0;
2323         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2324     }
2325     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2326     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2327     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2328     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2329     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2330     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2331 }
2332
2333 static int hls_slice_data(HEVCContext *s)
2334 {
2335     int ctb_size    = 1 << s->sps->log2_ctb_size;
2336     int more_data   = 1;
2337     int x_ctb       = 0;
2338     int y_ctb       = 0;
2339     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2340     int ret;
2341
2342     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2343         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2344
2345         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2346         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2347         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2348
2349         ff_hevc_cabac_init(s, ctb_addr_ts);
2350
2351         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2352
2353         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2354         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2355         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2356
2357         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2358         if (ret < 0)
2359             return ret;
2360         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2361
2362         ctb_addr_ts++;
2363         ff_hevc_save_states(s, ctb_addr_ts);
2364         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2365     }
2366
2367     if (x_ctb + ctb_size >= s->sps->width &&
2368         y_ctb + ctb_size >= s->sps->height)
2369         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2370
2371     return ctb_addr_ts;
2372 }
2373
2374 /**
2375  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2376  * 0 if the unit should be skipped, 1 otherwise
2377  */
2378 static int hls_nal_unit(HEVCContext *s)
2379 {
2380     GetBitContext *gb = &s->HEVClc.gb;
2381     int nuh_layer_id;
2382
2383     if (get_bits1(gb) != 0)
2384         return AVERROR_INVALIDDATA;
2385
2386     s->nal_unit_type = get_bits(gb, 6);
2387
2388     nuh_layer_id   = get_bits(gb, 6);
2389     s->temporal_id = get_bits(gb, 3) - 1;
2390     if (s->temporal_id < 0)
2391         return AVERROR_INVALIDDATA;
2392
2393     av_log(s->avctx, AV_LOG_DEBUG,
2394            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2395            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2396
2397     return nuh_layer_id == 0;
2398 }
2399
2400 static void restore_tqb_pixels(HEVCContext *s)
2401 {
2402     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2403     int x, y, c_idx;
2404
2405     for (c_idx = 0; c_idx < 3; c_idx++) {
2406         ptrdiff_t stride = s->frame->linesize[c_idx];
2407         int hshift       = s->sps->hshift[c_idx];
2408         int vshift       = s->sps->vshift[c_idx];
2409         for (y = 0; y < s->sps->min_pu_height; y++) {
2410             for (x = 0; x < s->sps->min_pu_width; x++) {
2411                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2412                     int n;
2413                     int len      = min_pu_size >> hshift;
2414                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2415                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2416                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2417                         memcpy(dst, src, len);
2418                         src += stride;
2419                         dst += stride;
2420                     }
2421                 }
2422             }
2423         }
2424     }
2425 }
2426
2427 static int set_side_data(HEVCContext *s)
2428 {
2429     AVFrame *out = s->ref->frame;
2430
2431     if (s->sei_frame_packing_present &&
2432         s->frame_packing_arrangement_type >= 3 &&
2433         s->frame_packing_arrangement_type <= 5 &&
2434         s->content_interpretation_type > 0 &&
2435         s->content_interpretation_type < 3) {
2436         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2437         if (!stereo)
2438             return AVERROR(ENOMEM);
2439
2440         switch (s->frame_packing_arrangement_type) {
2441         case 3:
2442             if (s->quincunx_subsampling)
2443                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2444             else
2445                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2446             break;
2447         case 4:
2448             stereo->type = AV_STEREO3D_TOPBOTTOM;
2449             break;
2450         case 5:
2451             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2452             break;
2453         }
2454
2455         if (s->content_interpretation_type == 2)
2456             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2457     }
2458
2459     if (s->sei_display_orientation_present &&
2460         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2461         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2462         AVFrameSideData *rotation = av_frame_new_side_data(out,
2463                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2464                                                            sizeof(int32_t) * 9);
2465         if (!rotation)
2466             return AVERROR(ENOMEM);
2467
2468         av_display_rotation_set((int32_t *)rotation->data, angle);
2469         av_display_matrix_flip((int32_t *)rotation->data,
2470                                s->sei_vflip, s->sei_hflip);
2471     }
2472
2473     return 0;
2474 }
2475
2476 static int hevc_frame_start(HEVCContext *s)
2477 {
2478     HEVCLocalContext *lc = &s->HEVClc;
2479     int ret;
2480
2481     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2482     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2483     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2484     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2485
2486     lc->start_of_tiles_x = 0;
2487     s->is_decoded        = 0;
2488     s->first_nal_type    = s->nal_unit_type;
2489
2490     if (s->pps->tiles_enabled_flag)
2491         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2492
2493     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2494                               s->poc);
2495     if (ret < 0)
2496         goto fail;
2497
2498     ret = ff_hevc_frame_rps(s);
2499     if (ret < 0) {
2500         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2501         goto fail;
2502     }
2503
2504     s->ref->frame->key_frame = IS_IRAP(s);
2505
2506     ret = set_side_data(s);
2507     if (ret < 0)
2508         goto fail;
2509
2510     av_frame_unref(s->output_frame);
2511     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2512     if (ret < 0)
2513         goto fail;
2514
2515     ff_thread_finish_setup(s->avctx);
2516
2517     return 0;
2518
2519 fail:
2520     if (s->ref)
2521         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2522     s->ref = NULL;
2523     return ret;
2524 }
2525
2526 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2527 {
2528     HEVCLocalContext *lc = &s->HEVClc;
2529     GetBitContext *gb    = &lc->gb;
2530     int ctb_addr_ts, ret;
2531
2532     ret = init_get_bits8(gb, nal, length);
2533     if (ret < 0)
2534         return ret;
2535
2536     ret = hls_nal_unit(s);
2537     if (ret < 0) {
2538         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2539                s->nal_unit_type);
2540         goto fail;
2541     } else if (!ret)
2542         return 0;
2543
2544     switch (s->nal_unit_type) {
2545     case NAL_VPS:
2546         ret = ff_hevc_decode_nal_vps(s);
2547         if (ret < 0)
2548             goto fail;
2549         break;
2550     case NAL_SPS:
2551         ret = ff_hevc_decode_nal_sps(s);
2552         if (ret < 0)
2553             goto fail;
2554         break;
2555     case NAL_PPS:
2556         ret = ff_hevc_decode_nal_pps(s);
2557         if (ret < 0)
2558             goto fail;
2559         break;
2560     case NAL_SEI_PREFIX:
2561     case NAL_SEI_SUFFIX:
2562         ret = ff_hevc_decode_nal_sei(s);
2563         if (ret < 0)
2564             goto fail;
2565         break;
2566     case NAL_TRAIL_R:
2567     case NAL_TRAIL_N:
2568     case NAL_TSA_N:
2569     case NAL_TSA_R:
2570     case NAL_STSA_N:
2571     case NAL_STSA_R:
2572     case NAL_BLA_W_LP:
2573     case NAL_BLA_W_RADL:
2574     case NAL_BLA_N_LP:
2575     case NAL_IDR_W_RADL:
2576     case NAL_IDR_N_LP:
2577     case NAL_CRA_NUT:
2578     case NAL_RADL_N:
2579     case NAL_RADL_R:
2580     case NAL_RASL_N:
2581     case NAL_RASL_R:
2582         ret = hls_slice_header(s);
2583         if (ret < 0)
2584             return ret;
2585
2586         if (s->max_ra == INT_MAX) {
2587             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2588                 s->max_ra = s->poc;
2589             } else {
2590                 if (IS_IDR(s))
2591                     s->max_ra = INT_MIN;
2592             }
2593         }
2594
2595         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2596             s->poc <= s->max_ra) {
2597             s->is_decoded = 0;
2598             break;
2599         } else {
2600             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2601                 s->max_ra = INT_MIN;
2602         }
2603
2604         if (s->sh.first_slice_in_pic_flag) {
2605             ret = hevc_frame_start(s);
2606             if (ret < 0)
2607                 return ret;
2608         } else if (!s->ref) {
2609             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2610             goto fail;
2611         }
2612
2613         if (s->nal_unit_type != s->first_nal_type) {
2614             av_log(s->avctx, AV_LOG_ERROR,
2615                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2616                    s->first_nal_type, s->nal_unit_type);
2617             return AVERROR_INVALIDDATA;
2618         }
2619
2620         if (!s->sh.dependent_slice_segment_flag &&
2621             s->sh.slice_type != I_SLICE) {
2622             ret = ff_hevc_slice_rpl(s);
2623             if (ret < 0) {
2624                 av_log(s->avctx, AV_LOG_WARNING,
2625                        "Error constructing the reference lists for the current slice.\n");
2626                 goto fail;
2627             }
2628         }
2629
2630         ctb_addr_ts = hls_slice_data(s);
2631         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2632             s->is_decoded = 1;
2633             if ((s->pps->transquant_bypass_enable_flag ||
2634                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2635                 s->sps->sao_enabled)
2636                 restore_tqb_pixels(s);
2637         }
2638
2639         if (ctb_addr_ts < 0) {
2640             ret = ctb_addr_ts;
2641             goto fail;
2642         }
2643         break;
2644     case NAL_EOS_NUT:
2645     case NAL_EOB_NUT:
2646         s->seq_decode = (s->seq_decode + 1) & 0xff;
2647         s->max_ra     = INT_MAX;
2648         break;
2649     case NAL_AUD:
2650     case NAL_FD_NUT:
2651         break;
2652     default:
2653         av_log(s->avctx, AV_LOG_INFO,
2654                "Skipping NAL unit %d\n", s->nal_unit_type);
2655     }
2656
2657     return 0;
2658 fail:
2659     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2660         return ret;
2661     return 0;
2662 }
2663
2664 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2665  * between these functions would be nice. */
2666 static int extract_rbsp(const uint8_t *src, int length,
2667                         HEVCNAL *nal)
2668 {
2669     int i, si, di;
2670     uint8_t *dst;
2671
2672 #define STARTCODE_TEST                                                  \
2673         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2674             if (src[i + 2] != 3) {                                      \
2675                 /* startcode, so we must be past the end */             \
2676                 length = i;                                             \
2677             }                                                           \
2678             break;                                                      \
2679         }
2680 #if HAVE_FAST_UNALIGNED
2681 #define FIND_FIRST_ZERO                                                 \
2682         if (i > 0 && !src[i])                                           \
2683             i--;                                                        \
2684         while (src[i])                                                  \
2685             i++
2686 #if HAVE_FAST_64BIT
2687     for (i = 0; i + 1 < length; i += 9) {
2688         if (!((~AV_RN64A(src + i) &
2689                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2690               0x8000800080008080ULL))
2691             continue;
2692         FIND_FIRST_ZERO;
2693         STARTCODE_TEST;
2694         i -= 7;
2695     }
2696 #else
2697     for (i = 0; i + 1 < length; i += 5) {
2698         if (!((~AV_RN32A(src + i) &
2699                (AV_RN32A(src + i) - 0x01000101U)) &
2700               0x80008080U))
2701             continue;
2702         FIND_FIRST_ZERO;
2703         STARTCODE_TEST;
2704         i -= 3;
2705     }
2706 #endif /* HAVE_FAST_64BIT */
2707 #else
2708     for (i = 0; i + 1 < length; i += 2) {
2709         if (src[i])
2710             continue;
2711         if (i > 0 && src[i - 1] == 0)
2712             i--;
2713         STARTCODE_TEST;
2714     }
2715 #endif /* HAVE_FAST_UNALIGNED */
2716
2717     if (i >= length - 1) { // no escaped 0
2718         nal->data = src;
2719         nal->size = length;
2720         return length;
2721     }
2722
2723     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2724                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2725     if (!nal->rbsp_buffer)
2726         return AVERROR(ENOMEM);
2727
2728     dst = nal->rbsp_buffer;
2729
2730     memcpy(dst, src, i);
2731     si = di = i;
2732     while (si + 2 < length) {
2733         // remove escapes (very rare 1:2^22)
2734         if (src[si + 2] > 3) {
2735             dst[di++] = src[si++];
2736             dst[di++] = src[si++];
2737         } else if (src[si] == 0 && src[si + 1] == 0) {
2738             if (src[si + 2] == 3) { // escape
2739                 dst[di++] = 0;
2740                 dst[di++] = 0;
2741                 si       += 3;
2742
2743                 continue;
2744             } else // next start code
2745                 goto nsc;
2746         }
2747
2748         dst[di++] = src[si++];
2749     }
2750     while (si < length)
2751         dst[di++] = src[si++];
2752
2753 nsc:
2754     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2755
2756     nal->data = dst;
2757     nal->size = di;
2758     return si;
2759 }
2760
2761 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2762 {
2763     int i, consumed, ret = 0;
2764
2765     s->ref = NULL;
2766     s->eos = 0;
2767
2768     /* split the input packet into NAL units, so we know the upper bound on the
2769      * number of slices in the frame */
2770     s->nb_nals = 0;
2771     while (length >= 4) {
2772         HEVCNAL *nal;
2773         int extract_length = 0;
2774
2775         if (s->is_nalff) {
2776             int i;
2777             for (i = 0; i < s->nal_length_size; i++)
2778                 extract_length = (extract_length << 8) | buf[i];
2779             buf    += s->nal_length_size;
2780             length -= s->nal_length_size;
2781
2782             if (extract_length > length) {
2783                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2784                 ret = AVERROR_INVALIDDATA;
2785                 goto fail;
2786             }
2787         } else {
2788             if (buf[2] == 0) {
2789                 length--;
2790                 buf++;
2791                 continue;
2792             }
2793             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2794                 ret = AVERROR_INVALIDDATA;
2795                 goto fail;
2796             }
2797
2798             buf           += 3;
2799             length        -= 3;
2800             extract_length = length;
2801         }
2802
2803         if (s->nals_allocated < s->nb_nals + 1) {
2804             int new_size = s->nals_allocated + 1;
2805             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2806             if (!tmp) {
2807                 ret = AVERROR(ENOMEM);
2808                 goto fail;
2809             }
2810             s->nals = tmp;
2811             memset(s->nals + s->nals_allocated, 0,
2812                    (new_size - s->nals_allocated) * sizeof(*tmp));
2813             s->nals_allocated = new_size;
2814         }
2815         nal = &s->nals[s->nb_nals++];
2816
2817         consumed = extract_rbsp(buf, extract_length, nal);
2818         if (consumed < 0) {
2819             ret = consumed;
2820             goto fail;
2821         }
2822
2823         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2824         if (ret < 0)
2825             goto fail;
2826         hls_nal_unit(s);
2827
2828         if (s->nal_unit_type == NAL_EOB_NUT ||
2829             s->nal_unit_type == NAL_EOS_NUT)
2830             s->eos = 1;
2831
2832         buf    += consumed;
2833         length -= consumed;
2834     }
2835
2836     /* parse the NAL units */
2837     for (i = 0; i < s->nb_nals; i++) {
2838         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2839         if (ret < 0) {
2840             av_log(s->avctx, AV_LOG_WARNING,
2841                    "Error parsing NAL unit #%d.\n", i);
2842             goto fail;
2843         }
2844     }
2845
2846 fail:
2847     if (s->ref)
2848         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2849
2850     return ret;
2851 }
2852
2853 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2854 {
2855     int i;
2856     for (i = 0; i < 16; i++)
2857         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2858 }
2859
2860 static int verify_md5(HEVCContext *s, AVFrame *frame)
2861 {
2862     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2863     int pixel_shift;
2864     int i, j;
2865
2866     if (!desc)
2867         return AVERROR(EINVAL);
2868
2869     pixel_shift = desc->comp[0].depth_minus1 > 7;
2870
2871     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2872            s->poc);
2873
2874     /* the checksums are LE, so we have to byteswap for >8bpp formats
2875      * on BE arches */
2876 #if HAVE_BIGENDIAN
2877     if (pixel_shift && !s->checksum_buf) {
2878         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2879                        FFMAX3(frame->linesize[0], frame->linesize[1],
2880                               frame->linesize[2]));
2881         if (!s->checksum_buf)
2882             return AVERROR(ENOMEM);
2883     }
2884 #endif
2885
2886     for (i = 0; frame->data[i]; i++) {
2887         int width  = s->avctx->coded_width;
2888         int height = s->avctx->coded_height;
2889         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2890         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2891         uint8_t md5[16];
2892
2893         av_md5_init(s->md5_ctx);
2894         for (j = 0; j < h; j++) {
2895             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2896 #if HAVE_BIGENDIAN
2897             if (pixel_shift) {
2898                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2899                                     (const uint16_t *) src, w);
2900                 src = s->checksum_buf;
2901             }
2902 #endif
2903             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2904         }
2905         av_md5_final(s->md5_ctx, md5);
2906
2907         if (!memcmp(md5, s->md5[i], 16)) {
2908             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2909             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2910             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2911         } else {
2912             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2913             print_md5(s->avctx, AV_LOG_ERROR, md5);
2914             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2915             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2916             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2917             return AVERROR_INVALIDDATA;
2918         }
2919     }
2920
2921     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2922
2923     return 0;
2924 }
2925
2926 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2927                              AVPacket *avpkt)
2928 {
2929     int ret;
2930     HEVCContext *s = avctx->priv_data;
2931
2932     if (!avpkt->size) {
2933         ret = ff_hevc_output_frame(s, data, 1);
2934         if (ret < 0)
2935             return ret;
2936
2937         *got_output = ret;
2938         return 0;
2939     }
2940
2941     s->ref = NULL;
2942     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2943     if (ret < 0)
2944         return ret;
2945
2946     /* verify the SEI checksum */
2947     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2948         s->is_md5) {
2949         ret = verify_md5(s, s->ref->frame);
2950         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2951             ff_hevc_unref_frame(s, s->ref, ~0);
2952             return ret;
2953         }
2954     }
2955     s->is_md5 = 0;
2956
2957     if (s->is_decoded) {
2958         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2959         s->is_decoded = 0;
2960     }
2961
2962     if (s->output_frame->buf[0]) {
2963         av_frame_move_ref(data, s->output_frame);
2964         *got_output = 1;
2965     }
2966
2967     return avpkt->size;
2968 }
2969
2970 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2971 {
2972     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2973     if (ret < 0)
2974         return ret;
2975
2976     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2977     if (!dst->tab_mvf_buf)
2978         goto fail;
2979     dst->tab_mvf = src->tab_mvf;
2980
2981     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2982     if (!dst->rpl_tab_buf)
2983         goto fail;
2984     dst->rpl_tab = src->rpl_tab;
2985
2986     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2987     if (!dst->rpl_buf)
2988         goto fail;
2989
2990     dst->poc        = src->poc;
2991     dst->ctb_count  = src->ctb_count;
2992     dst->window     = src->window;
2993     dst->flags      = src->flags;
2994     dst->sequence   = src->sequence;
2995
2996     return 0;
2997 fail:
2998     ff_hevc_unref_frame(s, dst, ~0);
2999     return AVERROR(ENOMEM);
3000 }
3001
3002 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3003 {
3004     HEVCContext       *s = avctx->priv_data;
3005     int i;
3006
3007     pic_arrays_free(s);
3008
3009     av_freep(&s->md5_ctx);
3010
3011     av_frame_free(&s->tmp_frame);
3012     av_frame_free(&s->output_frame);
3013
3014     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3015         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3016         av_frame_free(&s->DPB[i].frame);
3017     }
3018
3019     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3020         av_buffer_unref(&s->vps_list[i]);
3021     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3022         av_buffer_unref(&s->sps_list[i]);
3023     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3024         av_buffer_unref(&s->pps_list[i]);
3025
3026     for (i = 0; i < s->nals_allocated; i++)
3027         av_freep(&s->nals[i].rbsp_buffer);
3028     av_freep(&s->nals);
3029     s->nals_allocated = 0;
3030
3031     return 0;
3032 }
3033
3034 static av_cold int hevc_init_context(AVCodecContext *avctx)
3035 {
3036     HEVCContext *s = avctx->priv_data;
3037     int i;
3038
3039     s->avctx = avctx;
3040
3041     s->tmp_frame = av_frame_alloc();
3042     if (!s->tmp_frame)
3043         goto fail;
3044
3045     s->output_frame = av_frame_alloc();
3046     if (!s->output_frame)
3047         goto fail;
3048
3049     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3050         s->DPB[i].frame = av_frame_alloc();
3051         if (!s->DPB[i].frame)
3052             goto fail;
3053         s->DPB[i].tf.f = s->DPB[i].frame;
3054     }
3055
3056     s->max_ra = INT_MAX;
3057
3058     s->md5_ctx = av_md5_alloc();
3059     if (!s->md5_ctx)
3060         goto fail;
3061
3062     ff_bswapdsp_init(&s->bdsp);
3063
3064     s->context_initialized = 1;
3065
3066     return 0;
3067
3068 fail:
3069     hevc_decode_free(avctx);
3070     return AVERROR(ENOMEM);
3071 }
3072
3073 static int hevc_update_thread_context(AVCodecContext *dst,
3074                                       const AVCodecContext *src)
3075 {
3076     HEVCContext *s  = dst->priv_data;
3077     HEVCContext *s0 = src->priv_data;
3078     int i, ret;
3079
3080     if (!s->context_initialized) {
3081         ret = hevc_init_context(dst);
3082         if (ret < 0)
3083             return ret;
3084     }
3085
3086     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3087         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3088         if (s0->DPB[i].frame->buf[0]) {
3089             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3090             if (ret < 0)
3091                 return ret;
3092         }
3093     }
3094
3095     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3096         av_buffer_unref(&s->vps_list[i]);
3097         if (s0->vps_list[i]) {
3098             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3099             if (!s->vps_list[i])
3100                 return AVERROR(ENOMEM);
3101         }
3102     }
3103
3104     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3105         av_buffer_unref(&s->sps_list[i]);
3106         if (s0->sps_list[i]) {
3107             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3108             if (!s->sps_list[i])
3109                 return AVERROR(ENOMEM);
3110         }
3111     }
3112
3113     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3114         av_buffer_unref(&s->pps_list[i]);
3115         if (s0->pps_list[i]) {
3116             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3117             if (!s->pps_list[i])
3118                 return AVERROR(ENOMEM);
3119         }
3120     }
3121
3122     if (s->sps != s0->sps)
3123         ret = set_sps(s, s0->sps);
3124
3125     s->seq_decode = s0->seq_decode;
3126     s->seq_output = s0->seq_output;
3127     s->pocTid0    = s0->pocTid0;
3128     s->max_ra     = s0->max_ra;
3129
3130     s->is_nalff        = s0->is_nalff;
3131     s->nal_length_size = s0->nal_length_size;
3132
3133     if (s0->eos) {
3134         s->seq_decode = (s->seq_decode + 1) & 0xff;
3135         s->max_ra = INT_MAX;
3136     }
3137
3138     return 0;
3139 }
3140
3141 static int hevc_decode_extradata(HEVCContext *s)
3142 {
3143     AVCodecContext *avctx = s->avctx;
3144     GetByteContext gb;
3145     int ret;
3146
3147     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3148
3149     if (avctx->extradata_size > 3 &&
3150         (avctx->extradata[0] || avctx->extradata[1] ||
3151          avctx->extradata[2] > 1)) {
3152         /* It seems the extradata is encoded as hvcC format.
3153          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3154          * is finalized. When finalized, configurationVersion will be 1 and we
3155          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3156         int i, j, num_arrays, nal_len_size;
3157
3158         s->is_nalff = 1;
3159
3160         bytestream2_skip(&gb, 21);
3161         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3162         num_arrays   = bytestream2_get_byte(&gb);
3163
3164         /* nal units in the hvcC always have length coded with 2 bytes,
3165          * so put a fake nal_length_size = 2 while parsing them */
3166         s->nal_length_size = 2;
3167
3168         /* Decode nal units from hvcC. */
3169         for (i = 0; i < num_arrays; i++) {
3170             int type = bytestream2_get_byte(&gb) & 0x3f;
3171             int cnt  = bytestream2_get_be16(&gb);
3172
3173             for (j = 0; j < cnt; j++) {
3174                 // +2 for the nal size field
3175                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3176                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3177                     av_log(s->avctx, AV_LOG_ERROR,
3178                            "Invalid NAL unit size in extradata.\n");
3179                     return AVERROR_INVALIDDATA;
3180                 }
3181
3182                 ret = decode_nal_units(s, gb.buffer, nalsize);
3183                 if (ret < 0) {
3184                     av_log(avctx, AV_LOG_ERROR,
3185                            "Decoding nal unit %d %d from hvcC failed\n",
3186                            type, i);
3187                     return ret;
3188                 }
3189                 bytestream2_skip(&gb, nalsize);
3190             }
3191         }
3192
3193         /* Now store right nal length size, that will be used to parse
3194          * all other nals */
3195         s->nal_length_size = nal_len_size;
3196     } else {
3197         s->is_nalff = 0;
3198         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3199         if (ret < 0)
3200             return ret;
3201     }
3202     return 0;
3203 }
3204
3205 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3206 {
3207     HEVCContext *s = avctx->priv_data;
3208     int ret;
3209
3210     ff_init_cabac_states();
3211
3212     avctx->internal->allocate_progress = 1;
3213
3214     ret = hevc_init_context(avctx);
3215     if (ret < 0)
3216         return ret;
3217
3218     if (avctx->extradata_size > 0 && avctx->extradata) {
3219         ret = hevc_decode_extradata(s);
3220         if (ret < 0) {
3221             hevc_decode_free(avctx);
3222             return ret;
3223         }
3224     }
3225
3226     return 0;
3227 }
3228
3229 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3230 {
3231     HEVCContext *s = avctx->priv_data;
3232     int ret;
3233
3234     memset(s, 0, sizeof(*s));
3235
3236     ret = hevc_init_context(avctx);
3237     if (ret < 0)
3238         return ret;
3239
3240     return 0;
3241 }
3242
3243 static void hevc_decode_flush(AVCodecContext *avctx)
3244 {
3245     HEVCContext *s = avctx->priv_data;
3246     ff_hevc_flush_dpb(s);
3247     s->max_ra = INT_MAX;
3248 }
3249
3250 #define OFFSET(x) offsetof(HEVCContext, x)
3251 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3252
3253 static const AVProfile profiles[] = {
3254     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3255     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3256     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3257     { FF_PROFILE_UNKNOWN },
3258 };
3259
3260 static const AVOption options[] = {
3261     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3262         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3263     { NULL },
3264 };
3265
3266 static const AVClass hevc_decoder_class = {
3267     .class_name = "HEVC decoder",
3268     .item_name  = av_default_item_name,
3269     .option     = options,
3270     .version    = LIBAVUTIL_VERSION_INT,
3271 };
3272
3273 AVCodec ff_hevc_decoder = {
3274     .name                  = "hevc",
3275     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3276     .type                  = AVMEDIA_TYPE_VIDEO,
3277     .id                    = AV_CODEC_ID_HEVC,
3278     .priv_data_size        = sizeof(HEVCContext),
3279     .priv_class            = &hevc_decoder_class,
3280     .init                  = hevc_decode_init,
3281     .close                 = hevc_decode_free,
3282     .decode                = hevc_decode_frame,
3283     .flush                 = hevc_decode_flush,
3284     .update_thread_context = hevc_update_thread_context,
3285     .init_thread_copy      = hevc_init_thread_copy,
3286     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3287                              CODEC_CAP_FRAME_THREADS,
3288     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3289 };