git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  86     0, 0, 1, 0,
  87     1, 2, 0, 1,
  88     2, 3, 1, 2,
  89     3, 2, 3, 3,
  90 };
  91
  92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  93     0, 1, 0, 2,
  94     1, 0, 3, 2,
  95     1, 0, 3, 2,
  96     1, 3, 2, 3,
  97 };
  98
  99 static const uint8_t diag_scan4x4_inv[4][4] = {
 100     { 0,  2,  5,  9, },
 101     { 1,  4,  8, 12, },
 102     { 3,  7, 11, 14, },
 103     { 6, 10, 13, 15, },
 104 };
 105
 106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 107     0, 0, 1, 0,
 108     1, 2, 0, 1,
 109     2, 3, 0, 1,
 110     2, 3, 4, 0,
 111     1, 2, 3, 4,
 112     5, 0, 1, 2,
 113     3, 4, 5, 6,
 114     0, 1, 2, 3,
 115     4, 5, 6, 7,
 116     1, 2, 3, 4,
 117     5, 6, 7, 2,
 118     3, 4, 5, 6,
 119     7, 3, 4, 5,
 120     6, 7, 4, 5,
 121     6, 7, 5, 6,
 122     7, 6, 7, 7,
 123 };
 124
 125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 126     0, 1, 0, 2,
 127     1, 0, 3, 2,
 128     1, 0, 4, 3,
 129     2, 1, 0, 5,
 130     4, 3, 2, 1,
 131     0, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 0,
 135     7, 6, 5, 4,
 136     3, 2, 1, 7,
 137     6, 5, 4, 3,
 138     2, 7, 6, 5,
 139     4, 3, 7, 6,
 140     5, 4, 7, 6,
 141     5, 7, 6, 7,
 142 };
 143
 144 static const uint8_t diag_scan8x8_inv[8][8] = {
 145     {  0,  2,  5,  9, 14, 20, 27, 35, },
 146     {  1,  4,  8, 13, 19, 26, 34, 42, },
 147     {  3,  7, 12, 18, 25, 33, 41, 48, },
 148     {  6, 11, 17, 24, 32, 40, 47, 53, },
 149     { 10, 16, 23, 31, 39, 46, 52, 57, },
 150     { 15, 22, 30, 38, 45, 51, 56, 60, },
 151     { 21, 29, 37, 44, 50, 55, 59, 62, },
 152     { 28, 36, 43, 49, 54, 58, 61, 63, },
 153 };
 154
 155 /**
 156  * NOTE: Each function hls_foo correspond to the function foo in the
 157  * specification (HLS stands for High Level Syntax).
 158  */
 159
 160 /**
 161  * Section 5.7
 162  */
 163
 164 /* free everything allocated  by pic_arrays_init() */
 165 static void pic_arrays_free(HEVCContext *s)
 166 {
 167     av_freep(&s->sao);
 168     av_freep(&s->deblock);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     if (!s->sao || !s->deblock)
 205         goto fail;
 206
 207     s->skip_flag    = av_malloc(pic_size_in_ctb);
 208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 209     if (!s->skip_flag || !s->tab_ct_depth)
 210         goto fail;
 211
 212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 213     s->tab_ipm  = av_mallocz(min_pu_size);
 214     s->is_pcm   = av_malloc(min_pu_size);
 215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 216         goto fail;
 217
 218     s->filter_slice_edges = av_malloc(ctb_count);
 219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 220                                       sizeof(*s->tab_slice_address));
 221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->qp_y_tab));
 223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 224         goto fail;
 225
 226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     if (!s->horizontal_bs || !s->vertical_bs)
 229         goto fail;
 230
 231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 232                                           av_buffer_alloc);
 233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 234                                           av_buffer_allocz);
 235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 236         goto fail;
 237
 238     return 0;
 239
 240 fail:
 241     pic_arrays_free(s);
 242     return AVERROR(ENOMEM);
 243 }
 244
 245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 246 {
 247     int i = 0;
 248     int j = 0;
 249     uint8_t luma_weight_l0_flag[16];
 250     uint8_t chroma_weight_l0_flag[16];
 251     uint8_t luma_weight_l1_flag[16];
 252     uint8_t chroma_weight_l1_flag[16];
 253
 254     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 255     if (s->ps.sps->chroma_format_idc != 0) {
 256         int delta = get_se_golomb(gb);
 257         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 258     }
 259
 260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 261         luma_weight_l0_flag[i] = get_bits1(gb);
 262         if (!luma_weight_l0_flag[i]) {
 263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 264             s->sh.luma_offset_l0[i] = 0;
 265         }
 266     }
 267     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 269             chroma_weight_l0_flag[i] = get_bits1(gb);
 270     } else {
 271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 272             chroma_weight_l0_flag[i] = 0;
 273     }
 274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 275         if (luma_weight_l0_flag[i]) {
 276             int delta_luma_weight_l0 = get_se_golomb(gb);
 277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 279         }
 280         if (chroma_weight_l0_flag[i]) {
 281             for (j = 0; j < 2; j++) {
 282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 285                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 287             }
 288         } else {
 289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 290             s->sh.chroma_offset_l0[i][0] = 0;
 291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][1] = 0;
 293         }
 294     }
 295     if (s->sh.slice_type == B_SLICE) {
 296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 297             luma_weight_l1_flag[i] = get_bits1(gb);
 298             if (!luma_weight_l1_flag[i]) {
 299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 300                 s->sh.luma_offset_l1[i] = 0;
 301             }
 302         }
 303         if (s->ps.sps->chroma_format_idc != 0) {
 304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 305                 chroma_weight_l1_flag[i] = get_bits1(gb);
 306         } else {
 307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 308                 chroma_weight_l1_flag[i] = 0;
 309         }
 310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 311             if (luma_weight_l1_flag[i]) {
 312                 int delta_luma_weight_l1 = get_se_golomb(gb);
 313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 315             }
 316             if (chroma_weight_l1_flag[i]) {
 317                 for (j = 0; j < 2; j++) {
 318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 321                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 323                 }
 324             } else {
 325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 326                 s->sh.chroma_offset_l1[i][0] = 0;
 327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][1] = 0;
 329             }
 330         }
 331     }
 332 }
 333
 334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 335 {
 336     const HEVCSPS *sps = s->ps.sps;
 337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 338     int prev_delta_msb = 0;
 339     unsigned int nb_sps = 0, nb_sh;
 340     int i;
 341
 342     rps->nb_refs = 0;
 343     if (!sps->long_term_ref_pics_present_flag)
 344         return 0;
 345
 346     if (sps->num_long_term_ref_pics_sps > 0)
 347         nb_sps = get_ue_golomb_long(gb);
 348     nb_sh = get_ue_golomb_long(gb);
 349
 350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 351         return AVERROR_INVALIDDATA;
 352
 353     rps->nb_refs = nb_sh + nb_sps;
 354
 355     for (i = 0; i < rps->nb_refs; i++) {
 356         uint8_t delta_poc_msb_present;
 357
 358         if (i < nb_sps) {
 359             uint8_t lt_idx_sps = 0;
 360
 361             if (sps->num_long_term_ref_pics_sps > 1)
 362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 363
 364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 366         } else {
 367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 368             rps->used[i] = get_bits1(gb);
 369         }
 370
 371         delta_poc_msb_present = get_bits1(gb);
 372         if (delta_poc_msb_present) {
 373             int delta = get_ue_golomb_long(gb);
 374
 375             if (i && i != nb_sps)
 376                 delta += prev_delta_msb;
 377
 378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 379             prev_delta_msb = delta;
 380         }
 381     }
 382
 383     return 0;
 384 }
 385
 386 static void export_stream_params(AVCodecContext *avctx,
 387                                  const HEVCContext *s, const HEVCSPS *sps)
 388 {
 389     const HEVCVPS *vps = (const HEVCVPS*)s->ps.vps_list[sps->vps_id]->data;
 390     unsigned int num = 0, den = 0;
 391
 392     avctx->pix_fmt             = sps->pix_fmt;
 393     avctx->coded_width         = sps->width;
 394     avctx->coded_height        = sps->height;
 395     avctx->width               = sps->output_width;
 396     avctx->height              = sps->output_height;
 397     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 398     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 399     avctx->level               = sps->ptl.general_ptl.level_idc;
 400
 401     ff_set_sar(avctx, sps->vui.sar);
 402
 403     if (sps->vui.video_signal_type_present_flag)
 404         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 405                                                             : AVCOL_RANGE_MPEG;
 406     else
 407         avctx->color_range = AVCOL_RANGE_MPEG;
 408
 409     if (sps->vui.colour_description_present_flag) {
 410         avctx->color_primaries = sps->vui.colour_primaries;
 411         avctx->color_trc       = sps->vui.transfer_characteristic;
 412         avctx->colorspace      = sps->vui.matrix_coeffs;
 413     } else {
 414         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 415         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 416         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 417     }
 418
 419     if (vps->vps_timing_info_present_flag) {
 420         num = vps->vps_num_units_in_tick;
 421         den = vps->vps_time_scale;
 422     } else if (sps->vui.vui_timing_info_present_flag) {
 423         num = sps->vui.vui_num_units_in_tick;
 424         den = sps->vui.vui_time_scale;
 425     }
 426
 427     if (num != 0 && den != 0)
 428         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 429                   num, den, 1 << 30);
 430 }
 431
 432 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 433 {
 434     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
 435     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 436     int ret;
 437
 438     export_stream_params(s->avctx, s, sps);
 439
 440     pic_arrays_free(s);
 441     ret = pic_arrays_init(s, sps);
 442     if (ret < 0)
 443         goto fail;
 444
 445     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 446 #if CONFIG_HEVC_DXVA2_HWACCEL
 447         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 448 #endif
 449 #if CONFIG_HEVC_D3D11VA_HWACCEL
 450         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 451 #endif
 452     }
 453
 454     *fmt++ = sps->pix_fmt;
 455     *fmt = AV_PIX_FMT_NONE;
 456
 457     ret = ff_get_format(s->avctx, pix_fmts);
 458     if (ret < 0)
 459         goto fail;
 460     s->avctx->pix_fmt = ret;
 461
 462     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 463     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 464     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 465
 466     if (sps->sao_enabled && !s->avctx->hwaccel) {
 467         av_frame_unref(s->tmp_frame);
 468         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 469         if (ret < 0)
 470             goto fail;
 471         s->frame = s->tmp_frame;
 472     }
 473
 474     s->ps.sps = sps;
 475     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 476
 477     return 0;
 478
 479 fail:
 480     pic_arrays_free(s);
 481     s->ps.sps = NULL;
 482     return ret;
 483 }
 484
 485 static int hls_slice_header(HEVCContext *s)
 486 {
 487     GetBitContext *gb = &s->HEVClc.gb;
 488     SliceHeader *sh   = &s->sh;
 489     int i, ret;
 490
 491     // Coded parameters
 492     sh->first_slice_in_pic_flag = get_bits1(gb);
 493     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 494         s->seq_decode = (s->seq_decode + 1) & 0xff;
 495         s->max_ra     = INT_MAX;
 496         if (IS_IDR(s))
 497             ff_hevc_clear_refs(s);
 498     }
 499     if (IS_IRAP(s))
 500         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 501
 502     sh->pps_id = get_ue_golomb_long(gb);
 503     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 504         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 505         return AVERROR_INVALIDDATA;
 506     }
 507     if (!sh->first_slice_in_pic_flag &&
 508         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 509         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 510         return AVERROR_INVALIDDATA;
 511     }
 512     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 513
 514     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 515         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 516
 517         ff_hevc_clear_refs(s);
 518         ret = set_sps(s, s->ps.sps);
 519         if (ret < 0)
 520             return ret;
 521
 522         s->seq_decode = (s->seq_decode + 1) & 0xff;
 523         s->max_ra     = INT_MAX;
 524     }
 525
 526     sh->dependent_slice_segment_flag = 0;
 527     if (!sh->first_slice_in_pic_flag) {
 528         int slice_address_length;
 529
 530         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 531             sh->dependent_slice_segment_flag = get_bits1(gb);
 532
 533         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 534                                             s->ps.sps->ctb_height);
 535         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 536         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 537             av_log(s->avctx, AV_LOG_ERROR,
 538                    "Invalid slice segment address: %u.\n",
 539                    sh->slice_segment_addr);
 540             return AVERROR_INVALIDDATA;
 541         }
 542
 543         if (!sh->dependent_slice_segment_flag) {
 544             sh->slice_addr = sh->slice_segment_addr;
 545             s->slice_idx++;
 546         }
 547     } else {
 548         sh->slice_segment_addr = sh->slice_addr = 0;
 549         s->slice_idx           = 0;
 550         s->slice_initialized   = 0;
 551     }
 552
 553     if (!sh->dependent_slice_segment_flag) {
 554         s->slice_initialized = 0;
 555
 556         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 557             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 558
 559         sh->slice_type = get_ue_golomb_long(gb);
 560         if (!(sh->slice_type == I_SLICE ||
 561               sh->slice_type == P_SLICE ||
 562               sh->slice_type == B_SLICE)) {
 563             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 564                    sh->slice_type);
 565             return AVERROR_INVALIDDATA;
 566         }
 567         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 568             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 569             return AVERROR_INVALIDDATA;
 570         }
 571
 572         // when flag is not present, picture is inferred to be output
 573         sh->pic_output_flag = 1;
 574         if (s->ps.pps->output_flag_present_flag)
 575             sh->pic_output_flag = get_bits1(gb);
 576
 577         if (s->ps.sps->separate_colour_plane_flag)
 578             sh->colour_plane_id = get_bits(gb, 2);
 579
 580         if (!IS_IDR(s)) {
 581             int poc;
 582
 583             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 584             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 585             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 586                 av_log(s->avctx, AV_LOG_WARNING,
 587                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 588                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 589                     return AVERROR_INVALIDDATA;
 590                 poc = s->poc;
 591             }
 592             s->poc = poc;
 593
 594             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 595             if (!sh->short_term_ref_pic_set_sps_flag) {
 596                 int pos = get_bits_left(gb);
 597                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 598                 if (ret < 0)
 599                     return ret;
 600
 601                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 602                 sh->short_term_rps = &sh->slice_rps;
 603             } else {
 604                 int numbits, rps_idx;
 605
 606                 if (!s->ps.sps->nb_st_rps) {
 607                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 608                     return AVERROR_INVALIDDATA;
 609                 }
 610
 611                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 612                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 613                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 614             }
 615
 616             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 617             if (ret < 0) {
 618                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 619                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 620                     return AVERROR_INVALIDDATA;
 621             }
 622
 623             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 624                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 625             else
 626                 sh->slice_temporal_mvp_enabled_flag = 0;
 627         } else {
 628             s->sh.short_term_rps = NULL;
 629             s->poc               = 0;
 630         }
 631
 632         /* 8.3.1 */
 633         if (s->temporal_id == 0 &&
 634             s->nal_unit_type != NAL_TRAIL_N &&
 635             s->nal_unit_type != NAL_TSA_N   &&
 636             s->nal_unit_type != NAL_STSA_N  &&
 637             s->nal_unit_type != NAL_RADL_N  &&
 638             s->nal_unit_type != NAL_RADL_R  &&
 639             s->nal_unit_type != NAL_RASL_N  &&
 640             s->nal_unit_type != NAL_RASL_R)
 641             s->pocTid0 = s->poc;
 642
 643         if (s->ps.sps->sao_enabled) {
 644             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 645             sh->slice_sample_adaptive_offset_flag[1] =
 646             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 647         } else {
 648             sh->slice_sample_adaptive_offset_flag[0] = 0;
 649             sh->slice_sample_adaptive_offset_flag[1] = 0;
 650             sh->slice_sample_adaptive_offset_flag[2] = 0;
 651         }
 652
 653         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 654         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 655             int nb_refs;
 656
 657             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 658             if (sh->slice_type == B_SLICE)
 659                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 660
 661             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 662                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 663                 if (sh->slice_type == B_SLICE)
 664                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 665             }
 666             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 667                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 668                        sh->nb_refs[L0], sh->nb_refs[L1]);
 669                 return AVERROR_INVALIDDATA;
 670             }
 671
 672             sh->rpl_modification_flag[0] = 0;
 673             sh->rpl_modification_flag[1] = 0;
 674             nb_refs = ff_hevc_frame_nb_refs(s);
 675             if (!nb_refs) {
 676                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 677                 return AVERROR_INVALIDDATA;
 678             }
 679
 680             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 681                 sh->rpl_modification_flag[0] = get_bits1(gb);
 682                 if (sh->rpl_modification_flag[0]) {
 683                     for (i = 0; i < sh->nb_refs[L0]; i++)
 684                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 685                 }
 686
 687                 if (sh->slice_type == B_SLICE) {
 688                     sh->rpl_modification_flag[1] = get_bits1(gb);
 689                     if (sh->rpl_modification_flag[1] == 1)
 690                         for (i = 0; i < sh->nb_refs[L1]; i++)
 691                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 692                 }
 693             }
 694
 695             if (sh->slice_type == B_SLICE)
 696                 sh->mvd_l1_zero_flag = get_bits1(gb);
 697
 698             if (s->ps.pps->cabac_init_present_flag)
 699                 sh->cabac_init_flag = get_bits1(gb);
 700             else
 701                 sh->cabac_init_flag = 0;
 702
 703             sh->collocated_ref_idx = 0;
 704             if (sh->slice_temporal_mvp_enabled_flag) {
 705                 sh->collocated_list = L0;
 706                 if (sh->slice_type == B_SLICE)
 707                     sh->collocated_list = !get_bits1(gb);
 708
 709                 if (sh->nb_refs[sh->collocated_list] > 1) {
 710                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 711                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 712                         av_log(s->avctx, AV_LOG_ERROR,
 713                                "Invalid collocated_ref_idx: %d.\n",
 714                                sh->collocated_ref_idx);
 715                         return AVERROR_INVALIDDATA;
 716                     }
 717                 }
 718             }
 719
 720             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 721                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 722                 pred_weight_table(s, gb);
 723             }
 724
 725             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 726             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 727                 av_log(s->avctx, AV_LOG_ERROR,
 728                        "Invalid number of merging MVP candidates: %d.\n",
 729                        sh->max_num_merge_cand);
 730                 return AVERROR_INVALIDDATA;
 731             }
 732         }
 733
 734         sh->slice_qp_delta = get_se_golomb(gb);
 735
 736         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 737             sh->slice_cb_qp_offset = get_se_golomb(gb);
 738             sh->slice_cr_qp_offset = get_se_golomb(gb);
 739         } else {
 740             sh->slice_cb_qp_offset = 0;
 741             sh->slice_cr_qp_offset = 0;
 742         }
 743
 744         if (s->ps.pps->deblocking_filter_control_present_flag) {
 745             int deblocking_filter_override_flag = 0;
 746
 747             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 748                 deblocking_filter_override_flag = get_bits1(gb);
 749
 750             if (deblocking_filter_override_flag) {
 751                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 752                 if (!sh->disable_deblocking_filter_flag) {
 753                     sh->beta_offset = get_se_golomb(gb) * 2;
 754                     sh->tc_offset   = get_se_golomb(gb) * 2;
 755                 }
 756             } else {
 757                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 758                 sh->beta_offset                    = s->ps.pps->beta_offset;
 759                 sh->tc_offset                      = s->ps.pps->tc_offset;
 760             }
 761         } else {
 762             sh->disable_deblocking_filter_flag = 0;
 763             sh->beta_offset                    = 0;
 764             sh->tc_offset                      = 0;
 765         }
 766
 767         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 768             (sh->slice_sample_adaptive_offset_flag[0] ||
 769              sh->slice_sample_adaptive_offset_flag[1] ||
 770              !sh->disable_deblocking_filter_flag)) {
 771             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 772         } else {
 773             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 774         }
 775     } else if (!s->slice_initialized) {
 776         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 777         return AVERROR_INVALIDDATA;
 778     }
 779
 780     sh->num_entry_point_offsets = 0;
 781     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 782         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 783         if (sh->num_entry_point_offsets > 0) {
 784             int offset_len = get_ue_golomb_long(gb) + 1;
 785
 786             for (i = 0; i < sh->num_entry_point_offsets; i++)
 787                 skip_bits(gb, offset_len);
 788         }
 789     }
 790
 791     if (s->ps.pps->slice_header_extension_present_flag) {
 792         unsigned int length = get_ue_golomb_long(gb);
 793         for (i = 0; i < length; i++)
 794             skip_bits(gb, 8);  // slice_header_extension_data_byte
 795     }
 796
 797     // Inferred parameters
 798     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 799     if (sh->slice_qp > 51 ||
 800         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 801         av_log(s->avctx, AV_LOG_ERROR,
 802                "The slice_qp %d is outside the valid range "
 803                "[%d, 51].\n",
 804                sh->slice_qp,
 805                -s->ps.sps->qp_bd_offset);
 806         return AVERROR_INVALIDDATA;
 807     }
 808
 809     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 810
 811     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 812         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 813         return AVERROR_INVALIDDATA;
 814     }
 815
 816     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 817
 818     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 819         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
 820                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 821
 822     s->slice_initialized = 1;
 823
 824     return 0;
 825 }
 826
 827 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 828
 829 #define SET_SAO(elem, value)                            \
 830 do {                                                    \
 831     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 832         sao->elem = value;                              \
 833     else if (sao_merge_left_flag)                       \
 834         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 835     else if (sao_merge_up_flag)                         \
 836         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 837     else                                                \
 838         sao->elem = 0;                                  \
 839 } while (0)
 840
 841 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 842 {
 843     HEVCLocalContext *lc    = &s->HEVClc;
 844     int sao_merge_left_flag = 0;
 845     int sao_merge_up_flag   = 0;
 846     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
 847     SAOParams *sao          = &CTB(s->sao, rx, ry);
 848     int c_idx, i;
 849
 850     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 851         s->sh.slice_sample_adaptive_offset_flag[1]) {
 852         if (rx > 0) {
 853             if (lc->ctb_left_flag)
 854                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 855         }
 856         if (ry > 0 && !sao_merge_left_flag) {
 857             if (lc->ctb_up_flag)
 858                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 859         }
 860     }
 861
 862     for (c_idx = 0; c_idx < 3; c_idx++) {
 863         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 864             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 865             continue;
 866         }
 867
 868         if (c_idx == 2) {
 869             sao->type_idx[2] = sao->type_idx[1];
 870             sao->eo_class[2] = sao->eo_class[1];
 871         } else {
 872             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 873         }
 874
 875         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 876             continue;
 877
 878         for (i = 0; i < 4; i++)
 879             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 880
 881         if (sao->type_idx[c_idx] == SAO_BAND) {
 882             for (i = 0; i < 4; i++) {
 883                 if (sao->offset_abs[c_idx][i]) {
 884                     SET_SAO(offset_sign[c_idx][i],
 885                             ff_hevc_sao_offset_sign_decode(s));
 886                 } else {
 887                     sao->offset_sign[c_idx][i] = 0;
 888                 }
 889             }
 890             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 891         } else if (c_idx != 2) {
 892             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 893         }
 894
 895         // Inferred parameters
 896         sao->offset_val[c_idx][0] = 0;
 897         for (i = 0; i < 4; i++) {
 898             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 899             if (sao->type_idx[c_idx] == SAO_EDGE) {
 900                 if (i > 1)
 901                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 902             } else if (sao->offset_sign[c_idx][i]) {
 903                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 904             }
 905         }
 906     }
 907 }
 908
 909 #undef SET_SAO
 910 #undef CTB
 911
 912 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 913                                 int log2_trafo_size, enum ScanType scan_idx,
 914                                 int c_idx)
 915 {
 916 #define GET_COORD(offset, n)                                    \
 917     do {                                                        \
 918         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 919         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 920     } while (0)
 921     HEVCLocalContext *lc    = &s->HEVClc;
 922     int transform_skip_flag = 0;
 923
 924     int last_significant_coeff_x, last_significant_coeff_y;
 925     int last_scan_pos;
 926     int n_end;
 927     int num_coeff    = 0;
 928     int greater1_ctx = 1;
 929
 930     int num_last_subset;
 931     int x_cg_last_sig, y_cg_last_sig;
 932
 933     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 934
 935     ptrdiff_t stride = s->frame->linesize[c_idx];
 936     int hshift       = s->ps.sps->hshift[c_idx];
 937     int vshift       = s->ps.sps->vshift[c_idx];
 938     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 939                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
 940     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 941     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 942
 943     int trafo_size = 1 << log2_trafo_size;
 944     int i, qp, shift, add, scale, scale_m;
 945     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 946     const uint8_t *scale_matrix;
 947     uint8_t dc_scale;
 948
 949     // Derive QP for dequant
 950     if (!lc->cu.cu_transquant_bypass_flag) {
 951         static const int qp_c[] = {
 952             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 953         };
 954
 955         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 956             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 957             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 958             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 959         };
 960
 961         static const uint8_t div6[51 + 2 * 6 + 1] = {
 962             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 963             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 964             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 965         };
 966         int qp_y = lc->qp_y;
 967
 968         if (c_idx == 0) {
 969             qp = qp_y + s->ps.sps->qp_bd_offset;
 970         } else {
 971             int qp_i, offset;
 972
 973             if (c_idx == 1)
 974                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 975             else
 976                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 977
 978             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
 979             if (qp_i < 30)
 980                 qp = qp_i;
 981             else if (qp_i > 43)
 982                 qp = qp_i - 6;
 983             else
 984                 qp = qp_c[qp_i - 30];
 985
 986             qp += s->ps.sps->qp_bd_offset;
 987         }
 988
 989         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
 990         add      = 1 << (shift - 1);
 991         scale    = level_scale[rem6[qp]] << (div6[qp]);
 992         scale_m  = 16; // default when no custom scaling lists.
 993         dc_scale = 16;
 994
 995         if (s->ps.sps->scaling_list_enable_flag) {
 996             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
 997                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
 998             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 999
1000             if (log2_trafo_size != 5)
1001                 matrix_id = 3 * matrix_id + c_idx;
1002
1003             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
1004             if (log2_trafo_size >= 4)
1005                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
1006         }
1007     }
1008
1009     if (s->ps.pps->transform_skip_enabled_flag &&
1010         !lc->cu.cu_transquant_bypass_flag   &&
1011         log2_trafo_size == 2) {
1012         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
1013     }
1014
1015     last_significant_coeff_x =
1016         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
1017     last_significant_coeff_y =
1018         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
1019
1020     if (last_significant_coeff_x > 3) {
1021         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
1022         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
1023                                    (2 + (last_significant_coeff_x & 1)) +
1024                                    suffix;
1025     }
1026
1027     if (last_significant_coeff_y > 3) {
1028         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1029         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1030                                    (2 + (last_significant_coeff_y & 1)) +
1031                                    suffix;
1032     }
1033
1034     if (scan_idx == SCAN_VERT)
1035         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1036
1037     x_cg_last_sig = last_significant_coeff_x >> 2;
1038     y_cg_last_sig = last_significant_coeff_y >> 2;
1039
1040     switch (scan_idx) {
1041     case SCAN_DIAG: {
1042         int last_x_c = last_significant_coeff_x & 3;
1043         int last_y_c = last_significant_coeff_y & 3;
1044
1045         scan_x_off = ff_hevc_diag_scan4x4_x;
1046         scan_y_off = ff_hevc_diag_scan4x4_y;
1047         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1048         if (trafo_size == 4) {
1049             scan_x_cg = scan_1x1;
1050             scan_y_cg = scan_1x1;
1051         } else if (trafo_size == 8) {
1052             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1053             scan_x_cg  = diag_scan2x2_x;
1054             scan_y_cg  = diag_scan2x2_y;
1055         } else if (trafo_size == 16) {
1056             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1057             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1058             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1059         } else { // trafo_size == 32
1060             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1061             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1062             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1063         }
1064         break;
1065     }
1066     case SCAN_HORIZ:
1067         scan_x_cg  = horiz_scan2x2_x;
1068         scan_y_cg  = horiz_scan2x2_y;
1069         scan_x_off = horiz_scan4x4_x;
1070         scan_y_off = horiz_scan4x4_y;
1071         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1072         break;
1073     default: //SCAN_VERT
1074         scan_x_cg  = horiz_scan2x2_y;
1075         scan_y_cg  = horiz_scan2x2_x;
1076         scan_x_off = horiz_scan4x4_y;
1077         scan_y_off = horiz_scan4x4_x;
1078         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1079         break;
1080     }
1081     num_coeff++;
1082     num_last_subset = (num_coeff - 1) >> 4;
1083
1084     for (i = num_last_subset; i >= 0; i--) {
1085         int n, m;
1086         int x_cg, y_cg, x_c, y_c;
1087         int implicit_non_zero_coeff = 0;
1088         int64_t trans_coeff_level;
1089         int prev_sig = 0;
1090         int offset   = i << 4;
1091
1092         uint8_t significant_coeff_flag_idx[16];
1093         uint8_t nb_significant_coeff_flag = 0;
1094
1095         x_cg = scan_x_cg[i];
1096         y_cg = scan_y_cg[i];
1097
1098         if (i < num_last_subset && i > 0) {
1099             int ctx_cg = 0;
1100             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1101                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1102             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1103                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1104
1105             significant_coeff_group_flag[x_cg][y_cg] =
1106                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1107             implicit_non_zero_coeff = 1;
1108         } else {
1109             significant_coeff_group_flag[x_cg][y_cg] =
1110                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1111                  (x_cg == 0 && y_cg == 0));
1112         }
1113
1114         last_scan_pos = num_coeff - offset - 1;
1115
1116         if (i == num_last_subset) {
1117             n_end                         = last_scan_pos - 1;
1118             significant_coeff_flag_idx[0] = last_scan_pos;
1119             nb_significant_coeff_flag     = 1;
1120         } else {
1121             n_end = 15;
1122         }
1123
1124         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1125             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1126         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1127             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1128
1129         for (n = n_end; n >= 0; n--) {
1130             GET_COORD(offset, n);
1131
1132             if (significant_coeff_group_flag[x_cg][y_cg] &&
1133                 (n > 0 || implicit_non_zero_coeff == 0)) {
1134                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1135                                                           log2_trafo_size,
1136                                                           scan_idx,
1137                                                           prev_sig) == 1) {
1138                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1139                     nb_significant_coeff_flag++;
1140                     implicit_non_zero_coeff = 0;
1141                 }
1142             } else {
1143                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1144                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1145                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1146                     nb_significant_coeff_flag++;
1147                 }
1148             }
1149         }
1150
1151         n_end = nb_significant_coeff_flag;
1152
1153         if (n_end) {
1154             int first_nz_pos_in_cg = 16;
1155             int last_nz_pos_in_cg = -1;
1156             int c_rice_param = 0;
1157             int first_greater1_coeff_idx = -1;
1158             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1159             uint16_t coeff_sign_flag;
1160             int sum_abs = 0;
1161             int sign_hidden = 0;
1162
1163             // initialize first elem of coeff_bas_level_greater1_flag
1164             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1165
1166             if (!(i == num_last_subset) && greater1_ctx == 0)
1167                 ctx_set++;
1168             greater1_ctx      = 1;
1169             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1170
1171             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1172                 int n_idx = significant_coeff_flag_idx[m];
1173                 int inc   = (ctx_set << 2) + greater1_ctx;
1174                 coeff_abs_level_greater1_flag[n_idx] =
1175                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1176                 if (coeff_abs_level_greater1_flag[n_idx]) {
1177                     greater1_ctx = 0;
1178                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1179                     greater1_ctx++;
1180                 }
1181
1182                 if (coeff_abs_level_greater1_flag[n_idx] &&
1183                     first_greater1_coeff_idx == -1)
1184                     first_greater1_coeff_idx = n_idx;
1185             }
1186             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1187             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1188                                  !lc->cu.cu_transquant_bypass_flag;
1189
1190             if (first_greater1_coeff_idx != -1) {
1191                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1192             }
1193             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1194                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1195             } else {
1196                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1197             }
1198
1199             for (m = 0; m < n_end; m++) {
1200                 n = significant_coeff_flag_idx[m];
1201                 GET_COORD(offset, n);
1202                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1203                 if (trans_coeff_level == ((m < 8) ?
1204                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1205                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1206
1207                     trans_coeff_level += last_coeff_abs_level_remaining;
1208                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1209                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1210                 }
1211                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1212                     sum_abs += trans_coeff_level;
1213                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1214                         trans_coeff_level = -trans_coeff_level;
1215                 }
1216                 if (coeff_sign_flag >> 15)
1217                     trans_coeff_level = -trans_coeff_level;
1218                 coeff_sign_flag <<= 1;
1219                 if (!lc->cu.cu_transquant_bypass_flag) {
1220                     if (s->ps.sps->scaling_list_enable_flag) {
1221                         if (y_c || x_c || log2_trafo_size < 4) {
1222                             int pos;
1223                             switch (log2_trafo_size) {
1224                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1225                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1226                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1227                             default: pos = (y_c        << 2) +  x_c;
1228                             }
1229                             scale_m = scale_matrix[pos];
1230                         } else {
1231                             scale_m = dc_scale;
1232                         }
1233                     }
1234                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1235                     if(trans_coeff_level < 0) {
1236                         if((~trans_coeff_level) & 0xFffffffffff8000)
1237                             trans_coeff_level = -32768;
1238                     } else {
1239                         if (trans_coeff_level & 0xffffffffffff8000)
1240                             trans_coeff_level = 32767;
1241                     }
1242                 }
1243                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1244             }
1245         }
1246     }
1247
1248     if (lc->cu.cu_transquant_bypass_flag) {
1249         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1250     } else {
1251         if (transform_skip_flag)
1252             s->hevcdsp.transform_skip(dst, coeffs, stride);
1253         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1254                  log2_trafo_size == 2)
1255             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1256         else
1257             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1258     }
1259 }
1260
1261 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1262                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1263                               int log2_cb_size, int log2_trafo_size,
1264                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1265 {
1266     HEVCLocalContext *lc = &s->HEVClc;
1267
1268     if (lc->cu.pred_mode == MODE_INTRA) {
1269         int trafo_size = 1 << log2_trafo_size;
1270         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1271
1272         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1273         if (log2_trafo_size > 2) {
1274             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1275             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1276             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1277             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1278         } else if (blk_idx == 3) {
1279             trafo_size = trafo_size << s->ps.sps->hshift[1];
1280             ff_hevc_set_neighbour_available(s, xBase, yBase,
1281                                             trafo_size, trafo_size);
1282             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1283             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1284         }
1285     }
1286
1287     if (cbf_luma || cbf_cb || cbf_cr) {
1288         int scan_idx   = SCAN_DIAG;
1289         int scan_idx_c = SCAN_DIAG;
1290
1291         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1292             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1293             if (lc->tu.cu_qp_delta != 0)
1294                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1295                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1296             lc->tu.is_cu_qp_delta_coded = 1;
1297
1298             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1299                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1300                 av_log(s->avctx, AV_LOG_ERROR,
1301                        "The cu_qp_delta %d is outside the valid range "
1302                        "[%d, %d].\n",
1303                        lc->tu.cu_qp_delta,
1304                        -(26 + s->ps.sps->qp_bd_offset / 2),
1305                         (25 + s->ps.sps->qp_bd_offset / 2));
1306                 return AVERROR_INVALIDDATA;
1307             }
1308
1309             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1310         }
1311
1312         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1313             if (lc->tu.cur_intra_pred_mode >= 6 &&
1314                 lc->tu.cur_intra_pred_mode <= 14) {
1315                 scan_idx = SCAN_VERT;
1316             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1317                        lc->tu.cur_intra_pred_mode <= 30) {
1318                 scan_idx = SCAN_HORIZ;
1319             }
1320
1321             if (lc->pu.intra_pred_mode_c >=  6 &&
1322                 lc->pu.intra_pred_mode_c <= 14) {
1323                 scan_idx_c = SCAN_VERT;
1324             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1325                        lc->pu.intra_pred_mode_c <= 30) {
1326                 scan_idx_c = SCAN_HORIZ;
1327             }
1328         }
1329
1330         if (cbf_luma)
1331             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1332         if (log2_trafo_size > 2) {
1333             if (cbf_cb)
1334                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1335             if (cbf_cr)
1336                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1337         } else if (blk_idx == 3) {
1338             if (cbf_cb)
1339                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1340             if (cbf_cr)
1341                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1342         }
1343     }
1344     return 0;
1345 }
1346
1347 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1348 {
1349     int cb_size          = 1 << log2_cb_size;
1350     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1351
1352     int min_pu_width     = s->ps.sps->min_pu_width;
1353     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1354     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1355     int i, j;
1356
1357     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1358         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1359             s->is_pcm[i + j * min_pu_width] = 2;
1360 }
1361
1362 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1363                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1364                               int log2_cb_size, int log2_trafo_size,
1365                               int trafo_depth, int blk_idx,
1366                               int cbf_cb, int cbf_cr)
1367 {
1368     HEVCLocalContext *lc = &s->HEVClc;
1369     uint8_t split_transform_flag;
1370     int ret;
1371
1372     if (lc->cu.intra_split_flag) {
1373         if (trafo_depth == 1)
1374             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1375     } else {
1376         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1377     }
1378
1379     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1380         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1381         trafo_depth     < lc->cu.max_trafo_depth       &&
1382         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1383         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1384     } else {
1385         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1386                           lc->cu.pred_mode == MODE_INTER &&
1387                           lc->cu.part_mode != PART_2Nx2N &&
1388                           trafo_depth == 0;
1389
1390         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1391                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1392                                inter_split;
1393     }
1394
1395     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1396         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1397     else if (log2_trafo_size > 2 || trafo_depth == 0)
1398         cbf_cb = 0;
1399     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1400         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1401     else if (log2_trafo_size > 2 || trafo_depth == 0)
1402         cbf_cr = 0;
1403
1404     if (split_transform_flag) {
1405         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1406         const int x1 = x0 + trafo_size_split;
1407         const int y1 = y0 + trafo_size_split;
1408
1409 #define SUBDIVIDE(x, y, idx)                                                    \
1410 do {                                                                            \
1411     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1412                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1413                              cbf_cb, cbf_cr);                                   \
1414     if (ret < 0)                                                                \
1415         return ret;                                                             \
1416 } while (0)
1417
1418         SUBDIVIDE(x0, y0, 0);
1419         SUBDIVIDE(x1, y0, 1);
1420         SUBDIVIDE(x0, y1, 2);
1421         SUBDIVIDE(x1, y1, 3);
1422
1423 #undef SUBDIVIDE
1424     } else {
1425         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1426         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1427         int min_tu_width     = s->ps.sps->min_tb_width;
1428         int cbf_luma         = 1;
1429
1430         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1431             cbf_cb || cbf_cr)
1432             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1433
1434         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1435                                  log2_cb_size, log2_trafo_size,
1436                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1437         if (ret < 0)
1438             return ret;
1439         // TODO: store cbf_luma somewhere else
1440         if (cbf_luma) {
1441             int i, j;
1442             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1443                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1444                     int x_tu = (x0 + j) >> log2_min_tu_size;
1445                     int y_tu = (y0 + i) >> log2_min_tu_size;
1446                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1447                 }
1448         }
1449         if (!s->sh.disable_deblocking_filter_flag) {
1450             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1451             if (s->ps.pps->transquant_bypass_enable_flag &&
1452                 lc->cu.cu_transquant_bypass_flag)
1453                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1454         }
1455     }
1456     return 0;
1457 }
1458
1459 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1460 {
1461     //TODO: non-4:2:0 support
1462     HEVCLocalContext *lc = &s->HEVClc;
1463     GetBitContext gb;
1464     int cb_size   = 1 << log2_cb_size;
1465     int stride0   = s->frame->linesize[0];
1466     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1467     int   stride1 = s->frame->linesize[1];
1468     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1469     int   stride2 = s->frame->linesize[2];
1470     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1471
1472     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1473     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1474     int ret;
1475
1476     if (!s->sh.disable_deblocking_filter_flag)
1477         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1478
1479     ret = init_get_bits(&gb, pcm, length);
1480     if (ret < 0)
1481         return ret;
1482
1483     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1484     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1485     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1486     return 0;
1487 }
1488
1489 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1490 {
1491     HEVCLocalContext *lc = &s->HEVClc;
1492     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1493     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1494
1495     if (x)
1496         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1497     if (y)
1498         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1499
1500     switch (x) {
1501     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1502     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1503     case 0: lc->pu.mvd.x = 0;                               break;
1504     }
1505
1506     switch (y) {
1507     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1508     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1509     case 0: lc->pu.mvd.y = 0;                               break;
1510     }
1511 }
1512
1513 /**
1514  * 8.5.3.2.2.1 Luma sample interpolation process
1515  *
1516  * @param s HEVC decoding context
1517  * @param dst target buffer for block data at block position
1518  * @param dststride stride of the dst buffer
1519  * @param ref reference picture buffer at origin (0, 0)
1520  * @param mv motion vector (relative to block position) to get pixel data from
1521  * @param x_off horizontal position of block from origin (0, 0)
1522  * @param y_off vertical position of block from origin (0, 0)
1523  * @param block_w width of block
1524  * @param block_h height of block
1525  */
1526 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1527                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1528                     int block_w, int block_h)
1529 {
1530     HEVCLocalContext *lc = &s->HEVClc;
1531     uint8_t *src         = ref->data[0];
1532     ptrdiff_t srcstride  = ref->linesize[0];
1533     int pic_width        = s->ps.sps->width;
1534     int pic_height       = s->ps.sps->height;
1535
1536     int mx         = mv->x & 3;
1537     int my         = mv->y & 3;
1538     int extra_left = ff_hevc_qpel_extra_before[mx];
1539     int extra_top  = ff_hevc_qpel_extra_before[my];
1540
1541     x_off += mv->x >> 2;
1542     y_off += mv->y >> 2;
1543     src   += y_off * srcstride + (x_off << s->ps.sps->pixel_shift);
1544
1545     if (x_off < extra_left || y_off < extra_top ||
1546         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1547         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1548         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1549         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1550         int buf_offset = extra_top *
1551                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1552
1553         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1554                                  edge_emu_stride, srcstride,
1555                                  block_w + ff_hevc_qpel_extra[mx],
1556                                  block_h + ff_hevc_qpel_extra[my],
1557                                  x_off - extra_left, y_off - extra_top,
1558                                  pic_width, pic_height);
1559         src = lc->edge_emu_buffer + buf_offset;
1560         srcstride = edge_emu_stride;
1561     }
1562     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1563                                      block_h, lc->mc_buffer);
1564 }
1565
1566 /**
1567  * 8.5.3.2.2.2 Chroma sample interpolation process
1568  *
1569  * @param s HEVC decoding context
1570  * @param dst1 target buffer for block data at block position (U plane)
1571  * @param dst2 target buffer for block data at block position (V plane)
1572  * @param dststride stride of the dst1 and dst2 buffers
1573  * @param ref reference picture buffer at origin (0, 0)
1574  * @param mv motion vector (relative to block position) to get pixel data from
1575  * @param x_off horizontal position of block from origin (0, 0)
1576  * @param y_off vertical position of block from origin (0, 0)
1577  * @param block_w width of block
1578  * @param block_h height of block
1579  */
1580 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1581                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1582                       int x_off, int y_off, int block_w, int block_h)
1583 {
1584     HEVCLocalContext *lc = &s->HEVClc;
1585     uint8_t *src1        = ref->data[1];
1586     uint8_t *src2        = ref->data[2];
1587     ptrdiff_t src1stride = ref->linesize[1];
1588     ptrdiff_t src2stride = ref->linesize[2];
1589     int pic_width        = s->ps.sps->width >> 1;
1590     int pic_height       = s->ps.sps->height >> 1;
1591
1592     int mx = mv->x & 7;
1593     int my = mv->y & 7;
1594
1595     x_off += mv->x >> 3;
1596     y_off += mv->y >> 3;
1597     src1  += y_off * src1stride + (x_off << s->ps.sps->pixel_shift);
1598     src2  += y_off * src2stride + (x_off << s->ps.sps->pixel_shift);
1599
1600     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1601         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1602         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1603         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1604         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1605         int buf_offset1 = EPEL_EXTRA_BEFORE *
1606                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1607         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1608         int buf_offset2 = EPEL_EXTRA_BEFORE *
1609                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1610
1611         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1612                                  edge_emu_stride, src1stride,
1613                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1614                                  x_off - EPEL_EXTRA_BEFORE,
1615                                  y_off - EPEL_EXTRA_BEFORE,
1616                                  pic_width, pic_height);
1617
1618         src1 = lc->edge_emu_buffer + buf_offset1;
1619         src1stride = edge_emu_stride;
1620         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1621                                              block_w, block_h, mx, my, lc->mc_buffer);
1622
1623         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1624                                  edge_emu_stride, src2stride,
1625                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1626                                  x_off - EPEL_EXTRA_BEFORE,
1627                                  y_off - EPEL_EXTRA_BEFORE,
1628                                  pic_width, pic_height);
1629         src2 = lc->edge_emu_buffer + buf_offset2;
1630         src2stride = edge_emu_stride;
1631
1632         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1633                                              block_w, block_h, mx, my,
1634                                              lc->mc_buffer);
1635     } else {
1636         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1637                                              block_w, block_h, mx, my,
1638                                              lc->mc_buffer);
1639         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1640                                              block_w, block_h, mx, my,
1641                                              lc->mc_buffer);
1642     }
1643 }
1644
1645 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1646                                 const Mv *mv, int y0, int height)
1647 {
1648     int y = (mv->y >> 2) + y0 + height + 9;
1649     ff_thread_await_progress(&ref->tf, y, 0);
1650 }
1651
1652 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1653                                   int nPbH, int log2_cb_size, int part_idx,
1654                                   int merge_idx, MvField *mv)
1655 {
1656     HEVCLocalContext *lc             = &s->HEVClc;
1657     enum InterPredIdc inter_pred_idc = PRED_L0;
1658     int mvp_flag;
1659
1660     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1661     if (s->sh.slice_type == B_SLICE)
1662         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1663
1664     if (inter_pred_idc != PRED_L1) {
1665         if (s->sh.nb_refs[L0])
1666             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1667
1668         mv->pred_flag[0] = 1;
1669         hls_mvd_coding(s, x0, y0, 0);
1670         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1671         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1672                                  part_idx, merge_idx, mv, mvp_flag, 0);
1673         mv->mv[0].x += lc->pu.mvd.x;
1674         mv->mv[0].y += lc->pu.mvd.y;
1675     }
1676
1677     if (inter_pred_idc != PRED_L0) {
1678         if (s->sh.nb_refs[L1])
1679             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1680
1681         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1682             AV_ZERO32(&lc->pu.mvd);
1683         } else {
1684             hls_mvd_coding(s, x0, y0, 1);
1685         }
1686
1687         mv->pred_flag[1] = 1;
1688         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1689         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1690                                  part_idx, merge_idx, mv, mvp_flag, 1);
1691         mv->mv[1].x += lc->pu.mvd.x;
1692         mv->mv[1].y += lc->pu.mvd.y;
1693     }
1694 }
1695
1696 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1697                                 int nPbW, int nPbH,
1698                                 int log2_cb_size, int partIdx)
1699 {
1700 #define POS(c_idx, x, y)                                                              \
1701     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1702                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1703     HEVCLocalContext *lc = &s->HEVClc;
1704     int merge_idx = 0;
1705     struct MvField current_mv = {{{ 0 }}};
1706
1707     int min_pu_width = s->ps.sps->min_pu_width;
1708
1709     MvField *tab_mvf = s->ref->tab_mvf;
1710     RefPicList  *refPicList = s->ref->refPicList;
1711     HEVCFrame *ref0, *ref1;
1712
1713     int tmpstride = MAX_PB_SIZE;
1714
1715     uint8_t *dst0 = POS(0, x0, y0);
1716     uint8_t *dst1 = POS(1, x0, y0);
1717     uint8_t *dst2 = POS(2, x0, y0);
1718     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1719     int min_cb_width     = s->ps.sps->min_cb_width;
1720     int x_cb             = x0 >> log2_min_cb_size;
1721     int y_cb             = y0 >> log2_min_cb_size;
1722     int x_pu, y_pu;
1723     int i, j;
1724
1725     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1726
1727     if (!skip_flag)
1728         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1729
1730     if (skip_flag || lc->pu.merge_flag) {
1731         if (s->sh.max_num_merge_cand > 1)
1732             merge_idx = ff_hevc_merge_idx_decode(s);
1733         else
1734             merge_idx = 0;
1735
1736         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1737                                    partIdx, merge_idx, &current_mv);
1738     } else {
1739         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1740                               partIdx, merge_idx, &current_mv);
1741     }
1742
1743     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1744     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1745
1746     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1747         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1748             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1749
1750     if (current_mv.pred_flag[0]) {
1751         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1752         if (!ref0)
1753             return;
1754         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1755     }
1756     if (current_mv.pred_flag[1]) {
1757         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1758         if (!ref1)
1759             return;
1760         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1761     }
1762
1763     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1764         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1765         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1766
1767         luma_mc(s, tmp, tmpstride, ref0->frame,
1768                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1769
1770         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1771             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1772             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1773                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1774                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1775                                      dst0, s->frame->linesize[0], tmp,
1776                                      tmpstride, nPbW, nPbH);
1777         } else {
1778             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1779         }
1780         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1781                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1782
1783         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1784             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1785             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1786                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1787                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1788                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1789                                      nPbW / 2, nPbH / 2);
1790             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1791                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1792                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1793                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1794                                      nPbW / 2, nPbH / 2);
1795         } else {
1796             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1797             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1798         }
1799     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1800         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1801         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1802
1803         luma_mc(s, tmp, tmpstride, ref1->frame,
1804                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1805
1806         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1807             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1808             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1809                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1810                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1811                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1812                                       nPbW, nPbH);
1813         } else {
1814             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1815         }
1816
1817         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1818                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1819
1820         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1821             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1822             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1823                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1824                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1825                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1826             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1827                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1828                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1829                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1830         } else {
1831             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1832             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1833         }
1834     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1835         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1836         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1837         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1838         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1839
1840         luma_mc(s, tmp, tmpstride, ref0->frame,
1841                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1842         luma_mc(s, tmp2, tmpstride, ref1->frame,
1843                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1844
1845         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1846             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1847             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1848                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1849                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1850                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1851                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1852                                          dst0, s->frame->linesize[0],
1853                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1854         } else {
1855             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1856                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1857         }
1858
1859         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1860                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1861         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1862                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1863
1864         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1865             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1866             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1867                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1868                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1869                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1870                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1871                                          dst1, s->frame->linesize[1], tmp, tmp3,
1872                                          tmpstride, nPbW / 2, nPbH / 2);
1873             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1874                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1875                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1876                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1877                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1878                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1879                                          tmpstride, nPbW / 2, nPbH / 2);
1880         } else {
1881             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1882             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1883         }
1884     }
1885 }
1886
1887 /**
1888  * 8.4.1
1889  */
1890 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1891                                 int prev_intra_luma_pred_flag)
1892 {
1893     HEVCLocalContext *lc = &s->HEVClc;
1894     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1895     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1896     int min_pu_width     = s->ps.sps->min_pu_width;
1897     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1898     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1899     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1900
1901     int cand_up   = (lc->ctb_up_flag || y0b) ?
1902                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1903     int cand_left = (lc->ctb_left_flag || x0b) ?
1904                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1905
1906     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1907
1908     MvField *tab_mvf = s->ref->tab_mvf;
1909     int intra_pred_mode;
1910     int candidate[3];
1911     int i, j;
1912
1913     // intra_pred_mode prediction does not cross vertical CTB boundaries
1914     if ((y0 - 1) < y_ctb)
1915         cand_up = INTRA_DC;
1916
1917     if (cand_left == cand_up) {
1918         if (cand_left < 2) {
1919             candidate[0] = INTRA_PLANAR;
1920             candidate[1] = INTRA_DC;
1921             candidate[2] = INTRA_ANGULAR_26;
1922         } else {
1923             candidate[0] = cand_left;
1924             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1925             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1926         }
1927     } else {
1928         candidate[0] = cand_left;
1929         candidate[1] = cand_up;
1930         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1931             candidate[2] = INTRA_PLANAR;
1932         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1933             candidate[2] = INTRA_DC;
1934         } else {
1935             candidate[2] = INTRA_ANGULAR_26;
1936         }
1937     }
1938
1939     if (prev_intra_luma_pred_flag) {
1940         intra_pred_mode = candidate[lc->pu.mpm_idx];
1941     } else {
1942         if (candidate[0] > candidate[1])
1943             FFSWAP(uint8_t, candidate[0], candidate[1]);
1944         if (candidate[0] > candidate[2])
1945             FFSWAP(uint8_t, candidate[0], candidate[2]);
1946         if (candidate[1] > candidate[2])
1947             FFSWAP(uint8_t, candidate[1], candidate[2]);
1948
1949         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1950         for (i = 0; i < 3; i++)
1951             if (intra_pred_mode >= candidate[i])
1952                 intra_pred_mode++;
1953     }
1954
1955     /* write the intra prediction units into the mv array */
1956     if (!size_in_pus)
1957         size_in_pus = 1;
1958     for (i = 0; i < size_in_pus; i++) {
1959         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1960                intra_pred_mode, size_in_pus);
1961
1962         for (j = 0; j < size_in_pus; j++) {
1963             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1964             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1965             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1966             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1967             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1969             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1970             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1971             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1972         }
1973     }
1974
1975     return intra_pred_mode;
1976 }
1977
1978 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1979                                           int log2_cb_size, int ct_depth)
1980 {
1981     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1982     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1983     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1984     int y;
1985
1986     for (y = 0; y < length; y++)
1987         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1988                ct_depth, length);
1989 }
1990
1991 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1992                                   int log2_cb_size)
1993 {
1994     HEVCLocalContext *lc = &s->HEVClc;
1995     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1996     uint8_t prev_intra_luma_pred_flag[4];
1997     int split   = lc->cu.part_mode == PART_NxN;
1998     int pb_size = (1 << log2_cb_size) >> split;
1999     int side    = split + 1;
2000     int chroma_mode;
2001     int i, j;
2002
2003     for (i = 0; i < side; i++)
2004         for (j = 0; j < side; j++)
2005             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2006
2007     for (i = 0; i < side; i++) {
2008         for (j = 0; j < side; j++) {
2009             if (prev_intra_luma_pred_flag[2 * i + j])
2010                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2011             else
2012                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2013
2014             lc->pu.intra_pred_mode[2 * i + j] =
2015                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2016                                      prev_intra_luma_pred_flag[2 * i + j]);
2017         }
2018     }
2019
2020     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2021     if (chroma_mode != 4) {
2022         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2023             lc->pu.intra_pred_mode_c = 34;
2024         else
2025             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2026     } else {
2027         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2028     }
2029 }
2030
2031 static void intra_prediction_unit_default_value(HEVCContext *s,
2032                                                 int x0, int y0,
2033                                                 int log2_cb_size)
2034 {
2035     HEVCLocalContext *lc = &s->HEVClc;
2036     int pb_size          = 1 << log2_cb_size;
2037     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2038     int min_pu_width     = s->ps.sps->min_pu_width;
2039     MvField *tab_mvf     = s->ref->tab_mvf;
2040     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2041     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2042     int j, k;
2043
2044     if (size_in_pus == 0)
2045         size_in_pus = 1;
2046     for (j = 0; j < size_in_pus; j++) {
2047         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2048         for (k = 0; k < size_in_pus; k++)
2049             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2050     }
2051 }
2052
2053 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2054 {
2055     int cb_size          = 1 << log2_cb_size;
2056     HEVCLocalContext *lc = &s->HEVClc;
2057     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2058     int length           = cb_size >> log2_min_cb_size;
2059     int min_cb_width     = s->ps.sps->min_cb_width;
2060     int x_cb             = x0 >> log2_min_cb_size;
2061     int y_cb             = y0 >> log2_min_cb_size;
2062     int x, y, ret;
2063
2064     lc->cu.x                = x0;
2065     lc->cu.y                = y0;
2066     lc->cu.pred_mode        = MODE_INTRA;
2067     lc->cu.part_mode        = PART_2Nx2N;
2068     lc->cu.intra_split_flag = 0;
2069
2070     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2071     for (x = 0; x < 4; x++)
2072         lc->pu.intra_pred_mode[x] = 1;
2073     if (s->ps.pps->transquant_bypass_enable_flag) {
2074         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2075         if (lc->cu.cu_transquant_bypass_flag)
2076             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2077     } else
2078         lc->cu.cu_transquant_bypass_flag = 0;
2079
2080     if (s->sh.slice_type != I_SLICE) {
2081         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2082
2083         x = y_cb * min_cb_width + x_cb;
2084         for (y = 0; y < length; y++) {
2085             memset(&s->skip_flag[x], skip_flag, length);
2086             x += min_cb_width;
2087         }
2088         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2089     }
2090
2091     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2092         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2093         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2094
2095         if (!s->sh.disable_deblocking_filter_flag)
2096             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2097     } else {
2098         int pcm_flag = 0;
2099
2100         if (s->sh.slice_type != I_SLICE)
2101             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2102         if (lc->cu.pred_mode != MODE_INTRA ||
2103             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2104             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2105             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2106                                       lc->cu.pred_mode == MODE_INTRA;
2107         }
2108
2109         if (lc->cu.pred_mode == MODE_INTRA) {
2110             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2111                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2112                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2113                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2114             }
2115             if (pcm_flag) {
2116                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2117                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2118                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2119                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2120
2121                 if (ret < 0)
2122                     return ret;
2123             } else {
2124                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2125             }
2126         } else {
2127             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2128             switch (lc->cu.part_mode) {
2129             case PART_2Nx2N:
2130                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2131                 break;
2132             case PART_2NxN:
2133                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2134                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2135                 break;
2136             case PART_Nx2N:
2137                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2138                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2139                 break;
2140             case PART_2NxnU:
2141                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2142                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2143                 break;
2144             case PART_2NxnD:
2145                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2146                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2147                 break;
2148             case PART_nLx2N:
2149                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2150                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2151                 break;
2152             case PART_nRx2N:
2153                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2154                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2155                 break;
2156             case PART_NxN:
2157                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2158                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2159                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2160                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2161                 break;
2162             }
2163         }
2164
2165         if (!pcm_flag) {
2166             int rqt_root_cbf = 1;
2167
2168             if (lc->cu.pred_mode != MODE_INTRA &&
2169                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2170                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2171             }
2172             if (rqt_root_cbf) {
2173                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2174                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2175                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2176                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2177                                          log2_cb_size,
2178                                          log2_cb_size, 0, 0, 0, 0);
2179                 if (ret < 0)
2180                     return ret;
2181             } else {
2182                 if (!s->sh.disable_deblocking_filter_flag)
2183                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2184             }
2185         }
2186     }
2187
2188     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2189         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2190
2191     x = y_cb * min_cb_width + x_cb;
2192     for (y = 0; y < length; y++) {
2193         memset(&s->qp_y_tab[x], lc->qp_y, length);
2194         x += min_cb_width;
2195     }
2196
2197     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2198
2199     return 0;
2200 }
2201
2202 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2203                                int log2_cb_size, int cb_depth)
2204 {
2205     HEVCLocalContext *lc = &s->HEVClc;
2206     const int cb_size    = 1 << log2_cb_size;
2207     int split_cu;
2208
2209     lc->ct.depth = cb_depth;
2210     if (x0 + cb_size <= s->ps.sps->width  &&
2211         y0 + cb_size <= s->ps.sps->height &&
2212         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2213         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2214     } else {
2215         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2216     }
2217     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2218         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2219         lc->tu.is_cu_qp_delta_coded = 0;
2220         lc->tu.cu_qp_delta          = 0;
2221     }
2222
2223     if (split_cu) {
2224         const int cb_size_split = cb_size >> 1;
2225         const int x1 = x0 + cb_size_split;
2226         const int y1 = y0 + cb_size_split;
2227
2228         log2_cb_size--;
2229         cb_depth++;
2230
2231 #define SUBDIVIDE(x, y)                                                \
2232 do {                                                                   \
2233     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2234         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2235         if (ret < 0)                                                   \
2236             return ret;                                                \
2237     }                                                                  \
2238 } while (0)
2239
2240         SUBDIVIDE(x0, y0);
2241         SUBDIVIDE(x1, y0);
2242         SUBDIVIDE(x0, y1);
2243         SUBDIVIDE(x1, y1);
2244     } else {
2245         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2246         if (ret < 0)
2247             return ret;
2248     }
2249
2250     return 0;
2251 }
2252
2253 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2254                                  int ctb_addr_ts)
2255 {
2256     HEVCLocalContext *lc  = &s->HEVClc;
2257     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2258     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2259     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2260
2261     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2262
2263     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2264         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2265             lc->first_qp_group = 1;
2266         lc->end_of_tiles_x = s->ps.sps->width;
2267     } else if (s->ps.pps->tiles_enabled_flag) {
2268         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2269             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2270             lc->start_of_tiles_x = x_ctb;
2271             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2272             lc->first_qp_group   = 1;
2273         }
2274     } else {
2275         lc->end_of_tiles_x = s->ps.sps->width;
2276     }
2277
2278     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2279
2280     lc->boundary_flags = 0;
2281     if (s->ps.pps->tiles_enabled_flag) {
2282         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2283             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2284         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2285             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2286         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2287             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2288         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2289             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2290     } else {
2291         if (!ctb_addr_in_slice > 0)
2292             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2293         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2294             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2295     }
2296
2297     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2298     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2299     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2300     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2301 }
2302
2303 static int hls_slice_data(HEVCContext *s)
2304 {
2305     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2306     int more_data   = 1;
2307     int x_ctb       = 0;
2308     int y_ctb       = 0;
2309     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2310     int ret;
2311
2312     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2313         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2314
2315         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2316         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2317         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2318
2319         ff_hevc_cabac_init(s, ctb_addr_ts);
2320
2321         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2322
2323         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2324         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2325         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2326
2327         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2328         if (ret < 0)
2329             return ret;
2330         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2331
2332         ctb_addr_ts++;
2333         ff_hevc_save_states(s, ctb_addr_ts);
2334         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2335     }
2336
2337     if (x_ctb + ctb_size >= s->ps.sps->width &&
2338         y_ctb + ctb_size >= s->ps.sps->height)
2339         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2340
2341     return ctb_addr_ts;
2342 }
2343
2344 static void restore_tqb_pixels(HEVCContext *s)
2345 {
2346     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2347     int x, y, c_idx;
2348
2349     for (c_idx = 0; c_idx < 3; c_idx++) {
2350         ptrdiff_t stride = s->frame->linesize[c_idx];
2351         int hshift       = s->ps.sps->hshift[c_idx];
2352         int vshift       = s->ps.sps->vshift[c_idx];
2353         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2354             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2355                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2356                     int n;
2357                     int len      = min_pu_size >> hshift;
2358                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2359                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2360                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2361                         memcpy(dst, src, len);
2362                         src += stride;
2363                         dst += stride;
2364                     }
2365                 }
2366             }
2367         }
2368     }
2369 }
2370
2371 static int set_side_data(HEVCContext *s)
2372 {
2373     AVFrame *out = s->ref->frame;
2374
2375     if (s->sei_frame_packing_present &&
2376         s->frame_packing_arrangement_type >= 3 &&
2377         s->frame_packing_arrangement_type <= 5 &&
2378         s->content_interpretation_type > 0 &&
2379         s->content_interpretation_type < 3) {
2380         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2381         if (!stereo)
2382             return AVERROR(ENOMEM);
2383
2384         switch (s->frame_packing_arrangement_type) {
2385         case 3:
2386             if (s->quincunx_subsampling)
2387                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2388             else
2389                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2390             break;
2391         case 4:
2392             stereo->type = AV_STEREO3D_TOPBOTTOM;
2393             break;
2394         case 5:
2395             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2396             break;
2397         }
2398
2399         if (s->content_interpretation_type == 2)
2400             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2401     }
2402
2403     if (s->sei_display_orientation_present &&
2404         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2405         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2406         AVFrameSideData *rotation = av_frame_new_side_data(out,
2407                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2408                                                            sizeof(int32_t) * 9);
2409         if (!rotation)
2410             return AVERROR(ENOMEM);
2411
2412         av_display_rotation_set((int32_t *)rotation->data, angle);
2413         av_display_matrix_flip((int32_t *)rotation->data,
2414                                s->sei_hflip, s->sei_vflip);
2415     }
2416
2417     return 0;
2418 }
2419
2420 static int hevc_frame_start(HEVCContext *s)
2421 {
2422     HEVCLocalContext *lc = &s->HEVClc;
2423     int ret;
2424
2425     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2426     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2427     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2428     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2429
2430     lc->start_of_tiles_x = 0;
2431     s->is_decoded        = 0;
2432     s->first_nal_type    = s->nal_unit_type;
2433
2434     if (s->ps.pps->tiles_enabled_flag)
2435         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2436
2437     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2438                               s->poc);
2439     if (ret < 0)
2440         goto fail;
2441
2442     ret = ff_hevc_frame_rps(s);
2443     if (ret < 0) {
2444         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2445         goto fail;
2446     }
2447
2448     s->ref->frame->key_frame = IS_IRAP(s);
2449
2450     ret = set_side_data(s);
2451     if (ret < 0)
2452         goto fail;
2453
2454     av_frame_unref(s->output_frame);
2455     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2456     if (ret < 0)
2457         goto fail;
2458
2459     ff_thread_finish_setup(s->avctx);
2460
2461     return 0;
2462
2463 fail:
2464     if (s->ref)
2465         ff_hevc_unref_frame(s, s->ref, ~0);
2466     s->ref = NULL;
2467     return ret;
2468 }
2469
2470 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2471 {
2472     HEVCLocalContext *lc = &s->HEVClc;
2473     GetBitContext *gb    = &lc->gb;
2474     int ctb_addr_ts, ret;
2475
2476     *gb              = nal->gb;
2477     s->nal_unit_type = nal->type;
2478     s->temporal_id   = nal->temporal_id;
2479
2480     switch (s->nal_unit_type) {
2481     case NAL_VPS:
2482         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2483         if (ret < 0)
2484             goto fail;
2485         break;
2486     case NAL_SPS:
2487         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2488                                      s->apply_defdispwin);
2489         if (ret < 0)
2490             goto fail;
2491         break;
2492     case NAL_PPS:
2493         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2494         if (ret < 0)
2495             goto fail;
2496         break;
2497     case NAL_SEI_PREFIX:
2498     case NAL_SEI_SUFFIX:
2499         ret = ff_hevc_decode_nal_sei(s);
2500         if (ret < 0)
2501             goto fail;
2502         break;
2503     case NAL_TRAIL_R:
2504     case NAL_TRAIL_N:
2505     case NAL_TSA_N:
2506     case NAL_TSA_R:
2507     case NAL_STSA_N:
2508     case NAL_STSA_R:
2509     case NAL_BLA_W_LP:
2510     case NAL_BLA_W_RADL:
2511     case NAL_BLA_N_LP:
2512     case NAL_IDR_W_RADL:
2513     case NAL_IDR_N_LP:
2514     case NAL_CRA_NUT:
2515     case NAL_RADL_N:
2516     case NAL_RADL_R:
2517     case NAL_RASL_N:
2518     case NAL_RASL_R:
2519         ret = hls_slice_header(s);
2520         if (ret < 0)
2521             return ret;
2522
2523         if (s->max_ra == INT_MAX) {
2524             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2525                 s->max_ra = s->poc;
2526             } else {
2527                 if (IS_IDR(s))
2528                     s->max_ra = INT_MIN;
2529             }
2530         }
2531
2532         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2533             s->poc <= s->max_ra) {
2534             s->is_decoded = 0;
2535             break;
2536         } else {
2537             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2538                 s->max_ra = INT_MIN;
2539         }
2540
2541         if (s->sh.first_slice_in_pic_flag) {
2542             ret = hevc_frame_start(s);
2543             if (ret < 0)
2544                 return ret;
2545         } else if (!s->ref) {
2546             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2547             goto fail;
2548         }
2549
2550         if (s->nal_unit_type != s->first_nal_type) {
2551             av_log(s->avctx, AV_LOG_ERROR,
2552                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2553                    s->first_nal_type, s->nal_unit_type);
2554             return AVERROR_INVALIDDATA;
2555         }
2556
2557         if (!s->sh.dependent_slice_segment_flag &&
2558             s->sh.slice_type != I_SLICE) {
2559             ret = ff_hevc_slice_rpl(s);
2560             if (ret < 0) {
2561                 av_log(s->avctx, AV_LOG_WARNING,
2562                        "Error constructing the reference lists for the current slice.\n");
2563                 goto fail;
2564             }
2565         }
2566
2567         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2568             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2569             if (ret < 0)
2570                 goto fail;
2571         }
2572
2573         if (s->avctx->hwaccel) {
2574             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2575             if (ret < 0)
2576                 goto fail;
2577         } else {
2578             ctb_addr_ts = hls_slice_data(s);
2579             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2580                 s->is_decoded = 1;
2581                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2582                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2583                     s->ps.sps->sao_enabled)
2584                     restore_tqb_pixels(s);
2585             }
2586
2587             if (ctb_addr_ts < 0) {
2588                 ret = ctb_addr_ts;
2589                 goto fail;
2590             }
2591         }
2592         break;
2593     case NAL_EOS_NUT:
2594     case NAL_EOB_NUT:
2595         s->seq_decode = (s->seq_decode + 1) & 0xff;
2596         s->max_ra     = INT_MAX;
2597         break;
2598     case NAL_AUD:
2599     case NAL_FD_NUT:
2600         break;
2601     default:
2602         av_log(s->avctx, AV_LOG_INFO,
2603                "Skipping NAL unit %d\n", s->nal_unit_type);
2604     }
2605
2606     return 0;
2607 fail:
2608     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2609         return ret;
2610     return 0;
2611 }
2612
2613 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2614 {
2615     int i, ret = 0;
2616
2617     s->ref = NULL;
2618     s->eos = 0;
2619
2620     /* split the input packet into NAL units, so we know the upper bound on the
2621      * number of slices in the frame */
2622     ret = ff_hevc_split_packet(&s->pkt, buf, length, s->avctx, s->is_nalff,
2623                                s->nal_length_size);
2624     if (ret < 0) {
2625         av_log(s->avctx, AV_LOG_ERROR,
2626                "Error splitting the input into NAL units.\n");
2627         return ret;
2628     }
2629
2630     for (i = 0; i < s->pkt.nb_nals; i++) {
2631         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2632             s->pkt.nals[i].type == NAL_EOS_NUT)
2633             s->eos = 1;
2634     }
2635
2636     /* decode the NAL units */
2637     for (i = 0; i < s->pkt.nb_nals; i++) {
2638         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2639         if (ret < 0) {
2640             av_log(s->avctx, AV_LOG_WARNING,
2641                    "Error parsing NAL unit #%d.\n", i);
2642             goto fail;
2643         }
2644     }
2645
2646 fail:
2647     if (s->ref)
2648         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2649
2650     return ret;
2651 }
2652
2653 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2654 {
2655     int i;
2656     for (i = 0; i < 16; i++)
2657         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2658 }
2659
2660 static int verify_md5(HEVCContext *s, AVFrame *frame)
2661 {
2662     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2663     int pixel_shift;
2664     int i, j;
2665
2666     if (!desc)
2667         return AVERROR(EINVAL);
2668
2669     pixel_shift = desc->comp[0].depth_minus1 > 7;
2670
2671     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2672            s->poc);
2673
2674     /* the checksums are LE, so we have to byteswap for >8bpp formats
2675      * on BE arches */
2676 #if HAVE_BIGENDIAN
2677     if (pixel_shift && !s->checksum_buf) {
2678         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2679                        FFMAX3(frame->linesize[0], frame->linesize[1],
2680                               frame->linesize[2]));
2681         if (!s->checksum_buf)
2682             return AVERROR(ENOMEM);
2683     }
2684 #endif
2685
2686     for (i = 0; frame->data[i]; i++) {
2687         int width  = s->avctx->coded_width;
2688         int height = s->avctx->coded_height;
2689         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2690         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2691         uint8_t md5[16];
2692
2693         av_md5_init(s->md5_ctx);
2694         for (j = 0; j < h; j++) {
2695             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2696 #if HAVE_BIGENDIAN
2697             if (pixel_shift) {
2698                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2699                                     (const uint16_t *) src, w);
2700                 src = s->checksum_buf;
2701             }
2702 #endif
2703             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2704         }
2705         av_md5_final(s->md5_ctx, md5);
2706
2707         if (!memcmp(md5, s->md5[i], 16)) {
2708             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2709             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2710             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2711         } else {
2712             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2713             print_md5(s->avctx, AV_LOG_ERROR, md5);
2714             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2715             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2716             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2717             return AVERROR_INVALIDDATA;
2718         }
2719     }
2720
2721     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2722
2723     return 0;
2724 }
2725
2726 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2727                              AVPacket *avpkt)
2728 {
2729     int ret;
2730     HEVCContext *s = avctx->priv_data;
2731
2732     if (!avpkt->size) {
2733         ret = ff_hevc_output_frame(s, data, 1);
2734         if (ret < 0)
2735             return ret;
2736
2737         *got_output = ret;
2738         return 0;
2739     }
2740
2741     s->ref = NULL;
2742     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2743     if (ret < 0)
2744         return ret;
2745
2746     if (avctx->hwaccel) {
2747         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2748             av_log(avctx, AV_LOG_ERROR,
2749                    "hardware accelerator failed to decode picture\n");
2750     } else {
2751         /* verify the SEI checksum */
2752         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2753             s->is_md5) {
2754             ret = verify_md5(s, s->ref->frame);
2755             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2756                 ff_hevc_unref_frame(s, s->ref, ~0);
2757                 return ret;
2758             }
2759         }
2760     }
2761     s->is_md5 = 0;
2762
2763     if (s->is_decoded) {
2764         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2765         s->is_decoded = 0;
2766     }
2767
2768     if (s->output_frame->buf[0]) {
2769         av_frame_move_ref(data, s->output_frame);
2770         *got_output = 1;
2771     }
2772
2773     return avpkt->size;
2774 }
2775
2776 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2777 {
2778     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2779     if (ret < 0)
2780         return ret;
2781
2782     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2783     if (!dst->tab_mvf_buf)
2784         goto fail;
2785     dst->tab_mvf = src->tab_mvf;
2786
2787     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2788     if (!dst->rpl_tab_buf)
2789         goto fail;
2790     dst->rpl_tab = src->rpl_tab;
2791
2792     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2793     if (!dst->rpl_buf)
2794         goto fail;
2795
2796     dst->poc        = src->poc;
2797     dst->ctb_count  = src->ctb_count;
2798     dst->window     = src->window;
2799     dst->flags      = src->flags;
2800     dst->sequence   = src->sequence;
2801
2802     if (src->hwaccel_picture_private) {
2803         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2804         if (!dst->hwaccel_priv_buf)
2805             goto fail;
2806         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2807     }
2808
2809     return 0;
2810 fail:
2811     ff_hevc_unref_frame(s, dst, ~0);
2812     return AVERROR(ENOMEM);
2813 }
2814
2815 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2816 {
2817     HEVCContext       *s = avctx->priv_data;
2818     int i;
2819
2820     pic_arrays_free(s);
2821
2822     av_freep(&s->md5_ctx);
2823
2824     av_frame_free(&s->tmp_frame);
2825     av_frame_free(&s->output_frame);
2826
2827     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2828         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2829         av_frame_free(&s->DPB[i].frame);
2830     }
2831
2832     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2833         av_buffer_unref(&s->ps.vps_list[i]);
2834     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2835         av_buffer_unref(&s->ps.sps_list[i]);
2836     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2837         av_buffer_unref(&s->ps.pps_list[i]);
2838
2839     for (i = 0; i < s->pkt.nals_allocated; i++)
2840         av_freep(&s->pkt.nals[i].rbsp_buffer);
2841     av_freep(&s->pkt.nals);
2842     s->pkt.nals_allocated = 0;
2843
2844     return 0;
2845 }
2846
2847 static av_cold int hevc_init_context(AVCodecContext *avctx)
2848 {
2849     HEVCContext *s = avctx->priv_data;
2850     int i;
2851
2852     s->avctx = avctx;
2853
2854     s->tmp_frame = av_frame_alloc();
2855     if (!s->tmp_frame)
2856         goto fail;
2857
2858     s->output_frame = av_frame_alloc();
2859     if (!s->output_frame)
2860         goto fail;
2861
2862     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2863         s->DPB[i].frame = av_frame_alloc();
2864         if (!s->DPB[i].frame)
2865             goto fail;
2866         s->DPB[i].tf.f = s->DPB[i].frame;
2867     }
2868
2869     s->max_ra = INT_MAX;
2870
2871     s->md5_ctx = av_md5_alloc();
2872     if (!s->md5_ctx)
2873         goto fail;
2874
2875     ff_bswapdsp_init(&s->bdsp);
2876
2877     s->context_initialized = 1;
2878
2879     return 0;
2880
2881 fail:
2882     hevc_decode_free(avctx);
2883     return AVERROR(ENOMEM);
2884 }
2885
2886 static int hevc_update_thread_context(AVCodecContext *dst,
2887                                       const AVCodecContext *src)
2888 {
2889     HEVCContext *s  = dst->priv_data;
2890     HEVCContext *s0 = src->priv_data;
2891     int i, ret;
2892
2893     if (!s->context_initialized) {
2894         ret = hevc_init_context(dst);
2895         if (ret < 0)
2896             return ret;
2897     }
2898
2899     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2900         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2901         if (s0->DPB[i].frame->buf[0]) {
2902             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2903             if (ret < 0)
2904                 return ret;
2905         }
2906     }
2907
2908     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2909         av_buffer_unref(&s->ps.vps_list[i]);
2910         if (s0->ps.vps_list[i]) {
2911             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2912             if (!s->ps.vps_list[i])
2913                 return AVERROR(ENOMEM);
2914         }
2915     }
2916
2917     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2918         av_buffer_unref(&s->ps.sps_list[i]);
2919         if (s0->ps.sps_list[i]) {
2920             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2921             if (!s->ps.sps_list[i])
2922                 return AVERROR(ENOMEM);
2923         }
2924     }
2925
2926     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2927         av_buffer_unref(&s->ps.pps_list[i]);
2928         if (s0->ps.pps_list[i]) {
2929             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2930             if (!s->ps.pps_list[i])
2931                 return AVERROR(ENOMEM);
2932         }
2933     }
2934
2935     if (s->ps.sps != s0->ps.sps)
2936         ret = set_sps(s, s0->ps.sps);
2937
2938     s->seq_decode = s0->seq_decode;
2939     s->seq_output = s0->seq_output;
2940     s->pocTid0    = s0->pocTid0;
2941     s->max_ra     = s0->max_ra;
2942
2943     s->is_nalff        = s0->is_nalff;
2944     s->nal_length_size = s0->nal_length_size;
2945
2946     if (s0->eos) {
2947         s->seq_decode = (s->seq_decode + 1) & 0xff;
2948         s->max_ra = INT_MAX;
2949     }
2950
2951     return 0;
2952 }
2953
2954 static int hevc_decode_extradata(HEVCContext *s)
2955 {
2956     AVCodecContext *avctx = s->avctx;
2957     GetByteContext gb;
2958     int ret, i;
2959
2960     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2961
2962     if (avctx->extradata_size > 3 &&
2963         (avctx->extradata[0] || avctx->extradata[1] ||
2964          avctx->extradata[2] > 1)) {
2965         /* It seems the extradata is encoded as hvcC format.
2966          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2967          * is finalized. When finalized, configurationVersion will be 1 and we
2968          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2969         int i, j, num_arrays, nal_len_size;
2970
2971         s->is_nalff = 1;
2972
2973         bytestream2_skip(&gb, 21);
2974         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2975         num_arrays   = bytestream2_get_byte(&gb);
2976
2977         /* nal units in the hvcC always have length coded with 2 bytes,
2978          * so put a fake nal_length_size = 2 while parsing them */
2979         s->nal_length_size = 2;
2980
2981         /* Decode nal units from hvcC. */
2982         for (i = 0; i < num_arrays; i++) {
2983             int type = bytestream2_get_byte(&gb) & 0x3f;
2984             int cnt  = bytestream2_get_be16(&gb);
2985
2986             for (j = 0; j < cnt; j++) {
2987                 // +2 for the nal size field
2988                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2989                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2990                     av_log(s->avctx, AV_LOG_ERROR,
2991                            "Invalid NAL unit size in extradata.\n");
2992                     return AVERROR_INVALIDDATA;
2993                 }
2994
2995                 ret = decode_nal_units(s, gb.buffer, nalsize);
2996                 if (ret < 0) {
2997                     av_log(avctx, AV_LOG_ERROR,
2998                            "Decoding nal unit %d %d from hvcC failed\n",
2999                            type, i);
3000                     return ret;
3001                 }
3002                 bytestream2_skip(&gb, nalsize);
3003             }
3004         }
3005
3006         /* Now store right nal length size, that will be used to parse
3007          * all other nals */
3008         s->nal_length_size = nal_len_size;
3009     } else {
3010         s->is_nalff = 0;
3011         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3012         if (ret < 0)
3013             return ret;
3014     }
3015
3016     /* export stream parameters from the first SPS */
3017     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3018         if (s->ps.sps_list[i]) {
3019             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3020             export_stream_params(s->avctx, s, sps);
3021             break;
3022         }
3023     }
3024
3025     return 0;
3026 }
3027
3028 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3029 {
3030     HEVCContext *s = avctx->priv_data;
3031     int ret;
3032
3033     ff_init_cabac_states();
3034
3035     avctx->internal->allocate_progress = 1;
3036
3037     ret = hevc_init_context(avctx);
3038     if (ret < 0)
3039         return ret;
3040
3041     if (avctx->extradata_size > 0 && avctx->extradata) {
3042         ret = hevc_decode_extradata(s);
3043         if (ret < 0) {
3044             hevc_decode_free(avctx);
3045             return ret;
3046         }
3047     }
3048
3049     return 0;
3050 }
3051
3052 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3053 {
3054     HEVCContext *s = avctx->priv_data;
3055     int ret;
3056
3057     memset(s, 0, sizeof(*s));
3058
3059     ret = hevc_init_context(avctx);
3060     if (ret < 0)
3061         return ret;
3062
3063     return 0;
3064 }
3065
3066 static void hevc_decode_flush(AVCodecContext *avctx)
3067 {
3068     HEVCContext *s = avctx->priv_data;
3069     ff_hevc_flush_dpb(s);
3070     s->max_ra = INT_MAX;
3071 }
3072
3073 #define OFFSET(x) offsetof(HEVCContext, x)
3074 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3075
3076 static const AVProfile profiles[] = {
3077     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3078     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3079     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3080     { FF_PROFILE_UNKNOWN },
3081 };
3082
3083 static const AVOption options[] = {
3084     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3085         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3086     { NULL },
3087 };
3088
3089 static const AVClass hevc_decoder_class = {
3090     .class_name = "HEVC decoder",
3091     .item_name  = av_default_item_name,
3092     .option     = options,
3093     .version    = LIBAVUTIL_VERSION_INT,
3094 };
3095
3096 AVCodec ff_hevc_decoder = {
3097     .name                  = "hevc",
3098     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3099     .type                  = AVMEDIA_TYPE_VIDEO,
3100     .id                    = AV_CODEC_ID_HEVC,
3101     .priv_data_size        = sizeof(HEVCContext),
3102     .priv_class            = &hevc_decoder_class,
3103     .init                  = hevc_decode_init,
3104     .close                 = hevc_decode_free,
3105     .decode                = hevc_decode_frame,
3106     .flush                 = hevc_decode_flush,
3107     .update_thread_context = hevc_update_thread_context,
3108     .init_thread_copy      = hevc_init_thread_copy,
3109     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3110                              CODEC_CAP_FRAME_THREADS,
3111     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3112 };