git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40 #include "profiles.h"
  41
  42 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
  43 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
  44 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
  45
  46 static const uint8_t scan_1x1[1] = { 0 };
  47
  48 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  49
  50 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  51
  52 static const uint8_t horiz_scan4x4_x[16] = {
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56     0, 1, 2, 3,
  57 };
  58
  59 static const uint8_t horiz_scan4x4_y[16] = {
  60     0, 0, 0, 0,
  61     1, 1, 1, 1,
  62     2, 2, 2, 2,
  63     3, 3, 3, 3,
  64 };
  65
  66 static const uint8_t horiz_scan8x8_inv[8][8] = {
  67     {  0,  1,  2,  3, 16, 17, 18, 19, },
  68     {  4,  5,  6,  7, 20, 21, 22, 23, },
  69     {  8,  9, 10, 11, 24, 25, 26, 27, },
  70     { 12, 13, 14, 15, 28, 29, 30, 31, },
  71     { 32, 33, 34, 35, 48, 49, 50, 51, },
  72     { 36, 37, 38, 39, 52, 53, 54, 55, },
  73     { 40, 41, 42, 43, 56, 57, 58, 59, },
  74     { 44, 45, 46, 47, 60, 61, 62, 63, },
  75 };
  76
  77 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  78
  79 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  80
  81 static const uint8_t diag_scan2x2_inv[2][2] = {
  82     { 0, 2, },
  83     { 1, 3, },
  84 };
  85
  86 static const uint8_t diag_scan4x4_inv[4][4] = {
  87     { 0,  2,  5,  9, },
  88     { 1,  4,  8, 12, },
  89     { 3,  7, 11, 14, },
  90     { 6, 10, 13, 15, },
  91 };
  92
  93 static const uint8_t diag_scan8x8_inv[8][8] = {
  94     {  0,  2,  5,  9, 14, 20, 27, 35, },
  95     {  1,  4,  8, 13, 19, 26, 34, 42, },
  96     {  3,  7, 12, 18, 25, 33, 41, 48, },
  97     {  6, 11, 17, 24, 32, 40, 47, 53, },
  98     { 10, 16, 23, 31, 39, 46, 52, 57, },
  99     { 15, 22, 30, 38, 45, 51, 56, 60, },
 100     { 21, 29, 37, 44, 50, 55, 59, 62, },
 101     { 28, 36, 43, 49, 54, 58, 61, 63, },
 102 };
 103
 104 /**
 105  * NOTE: Each function hls_foo correspond to the function foo in the
 106  * specification (HLS stands for High Level Syntax).
 107  */
 108
 109 /**
 110  * Section 5.7
 111  */
 112
 113 /* free everything allocated  by pic_arrays_init() */
 114 static void pic_arrays_free(HEVCContext *s)
 115 {
 116     av_freep(&s->sao);
 117     av_freep(&s->deblock);
 118
 119     av_freep(&s->skip_flag);
 120     av_freep(&s->tab_ct_depth);
 121
 122     av_freep(&s->tab_ipm);
 123     av_freep(&s->cbf_luma);
 124     av_freep(&s->is_pcm);
 125
 126     av_freep(&s->qp_y_tab);
 127     av_freep(&s->tab_slice_address);
 128     av_freep(&s->filter_slice_edges);
 129
 130     av_freep(&s->horizontal_bs);
 131     av_freep(&s->vertical_bs);
 132
 133     av_buffer_pool_uninit(&s->tab_mvf_pool);
 134     av_buffer_pool_uninit(&s->rpl_tab_pool);
 135 }
 136
 137 /* allocate arrays that depend on frame dimensions */
 138 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 139 {
 140     int log2_min_cb_size = sps->log2_min_cb_size;
 141     int width            = sps->width;
 142     int height           = sps->height;
 143     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 144                            ((height >> log2_min_cb_size) + 1);
 145     int ctb_count        = sps->ctb_width * sps->ctb_height;
 146     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 147
 148     s->bs_width  = width  >> 3;
 149     s->bs_height = height >> 3;
 150
 151     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 152     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 153     if (!s->sao || !s->deblock)
 154         goto fail;
 155
 156     s->skip_flag    = av_malloc(pic_size_in_ctb);
 157     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 158     if (!s->skip_flag || !s->tab_ct_depth)
 159         goto fail;
 160
 161     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 162     s->tab_ipm  = av_mallocz(min_pu_size);
 163     s->is_pcm   = av_malloc(min_pu_size);
 164     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 165         goto fail;
 166
 167     s->filter_slice_edges = av_malloc(ctb_count);
 168     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 169                                       sizeof(*s->tab_slice_address));
 170     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 171                                       sizeof(*s->qp_y_tab));
 172     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 173         goto fail;
 174
 175     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 176     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 177     if (!s->horizontal_bs || !s->vertical_bs)
 178         goto fail;
 179
 180     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 181                                           av_buffer_alloc);
 182     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 183                                           av_buffer_allocz);
 184     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 185         goto fail;
 186
 187     return 0;
 188
 189 fail:
 190     pic_arrays_free(s);
 191     return AVERROR(ENOMEM);
 192 }
 193
 194 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 195 {
 196     int i = 0;
 197     int j = 0;
 198     uint8_t luma_weight_l0_flag[16];
 199     uint8_t chroma_weight_l0_flag[16];
 200     uint8_t luma_weight_l1_flag[16];
 201     uint8_t chroma_weight_l1_flag[16];
 202
 203     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 204     if (s->ps.sps->chroma_format_idc != 0) {
 205         int delta = get_se_golomb(gb);
 206         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 207     }
 208
 209     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 210         luma_weight_l0_flag[i] = get_bits1(gb);
 211         if (!luma_weight_l0_flag[i]) {
 212             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 213             s->sh.luma_offset_l0[i] = 0;
 214         }
 215     }
 216     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 217         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 218             chroma_weight_l0_flag[i] = get_bits1(gb);
 219     } else {
 220         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 221             chroma_weight_l0_flag[i] = 0;
 222     }
 223     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 224         if (luma_weight_l0_flag[i]) {
 225             int delta_luma_weight_l0 = get_se_golomb(gb);
 226             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 227             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 228         }
 229         if (chroma_weight_l0_flag[i]) {
 230             for (j = 0; j < 2; j++) {
 231                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 232                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 233                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 234                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 235                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 236             }
 237         } else {
 238             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 239             s->sh.chroma_offset_l0[i][0] = 0;
 240             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 241             s->sh.chroma_offset_l0[i][1] = 0;
 242         }
 243     }
 244     if (s->sh.slice_type == B_SLICE) {
 245         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 246             luma_weight_l1_flag[i] = get_bits1(gb);
 247             if (!luma_weight_l1_flag[i]) {
 248                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 249                 s->sh.luma_offset_l1[i] = 0;
 250             }
 251         }
 252         if (s->ps.sps->chroma_format_idc != 0) {
 253             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 254                 chroma_weight_l1_flag[i] = get_bits1(gb);
 255         } else {
 256             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 257                 chroma_weight_l1_flag[i] = 0;
 258         }
 259         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 260             if (luma_weight_l1_flag[i]) {
 261                 int delta_luma_weight_l1 = get_se_golomb(gb);
 262                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 263                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 264             }
 265             if (chroma_weight_l1_flag[i]) {
 266                 for (j = 0; j < 2; j++) {
 267                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 268                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 269                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 270                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 271                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 272                 }
 273             } else {
 274                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 275                 s->sh.chroma_offset_l1[i][0] = 0;
 276                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 277                 s->sh.chroma_offset_l1[i][1] = 0;
 278             }
 279         }
 280     }
 281 }
 282
 283 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 284 {
 285     const HEVCSPS *sps = s->ps.sps;
 286     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 287     int prev_delta_msb = 0;
 288     unsigned int nb_sps = 0, nb_sh;
 289     int i;
 290
 291     rps->nb_refs = 0;
 292     if (!sps->long_term_ref_pics_present_flag)
 293         return 0;
 294
 295     if (sps->num_long_term_ref_pics_sps > 0)
 296         nb_sps = get_ue_golomb_long(gb);
 297     nb_sh = get_ue_golomb_long(gb);
 298
 299     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 300         return AVERROR_INVALIDDATA;
 301
 302     rps->nb_refs = nb_sh + nb_sps;
 303
 304     for (i = 0; i < rps->nb_refs; i++) {
 305         uint8_t delta_poc_msb_present;
 306
 307         if (i < nb_sps) {
 308             uint8_t lt_idx_sps = 0;
 309
 310             if (sps->num_long_term_ref_pics_sps > 1)
 311                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 312
 313             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 314             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 315         } else {
 316             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 317             rps->used[i] = get_bits1(gb);
 318         }
 319
 320         delta_poc_msb_present = get_bits1(gb);
 321         if (delta_poc_msb_present) {
 322             int delta = get_ue_golomb_long(gb);
 323
 324             if (i && i != nb_sps)
 325                 delta += prev_delta_msb;
 326
 327             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 328             prev_delta_msb = delta;
 329         }
 330     }
 331
 332     return 0;
 333 }
 334
 335 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 336                                  const HEVCSPS *sps)
 337 {
 338     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 339     unsigned int num = 0, den = 0;
 340
 341     avctx->pix_fmt             = sps->pix_fmt;
 342     avctx->coded_width         = sps->width;
 343     avctx->coded_height        = sps->height;
 344     avctx->width               = sps->output_width;
 345     avctx->height              = sps->output_height;
 346     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 347     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 348     avctx->level               = sps->ptl.general_ptl.level_idc;
 349
 350     ff_set_sar(avctx, sps->vui.sar);
 351
 352     if (sps->vui.video_signal_type_present_flag)
 353         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 354                                                             : AVCOL_RANGE_MPEG;
 355     else
 356         avctx->color_range = AVCOL_RANGE_MPEG;
 357
 358     if (sps->vui.colour_description_present_flag) {
 359         avctx->color_primaries = sps->vui.colour_primaries;
 360         avctx->color_trc       = sps->vui.transfer_characteristic;
 361         avctx->colorspace      = sps->vui.matrix_coeffs;
 362     } else {
 363         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 364         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 365         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 366     }
 367
 368     if (vps->vps_timing_info_present_flag) {
 369         num = vps->vps_num_units_in_tick;
 370         den = vps->vps_time_scale;
 371     } else if (sps->vui.vui_timing_info_present_flag) {
 372         num = sps->vui.vui_num_units_in_tick;
 373         den = sps->vui.vui_time_scale;
 374     }
 375
 376     if (num != 0 && den != 0)
 377         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 378                   num, den, 1 << 30);
 379 }
 380
 381 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 382 {
 383     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
 384     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 385     int ret;
 386
 387     pic_arrays_free(s);
 388     s->ps.sps = NULL;
 389     s->ps.vps = NULL;
 390
 391     if (!sps)
 392         return 0;
 393
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     export_stream_params(s->avctx, &s->ps, sps);
 399
 400     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 401 #if CONFIG_HEVC_DXVA2_HWACCEL
 402         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 403 #endif
 404 #if CONFIG_HEVC_D3D11VA_HWACCEL
 405         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 406 #endif
 407     }
 408
 409     *fmt++ = sps->pix_fmt;
 410     *fmt = AV_PIX_FMT_NONE;
 411
 412     ret = ff_get_format(s->avctx, pix_fmts);
 413     if (ret < 0)
 414         goto fail;
 415     s->avctx->pix_fmt = ret;
 416
 417     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 418     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 419     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 420
 421     if (sps->sao_enabled && !s->avctx->hwaccel) {
 422         av_frame_unref(s->tmp_frame);
 423         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 424         if (ret < 0)
 425             goto fail;
 426         s->frame = s->tmp_frame;
 427     }
 428
 429     s->ps.sps = sps;
 430     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 431
 432     return 0;
 433
 434 fail:
 435     pic_arrays_free(s);
 436     s->ps.sps = NULL;
 437     return ret;
 438 }
 439
 440 static int hls_slice_header(HEVCContext *s)
 441 {
 442     GetBitContext *gb = &s->HEVClc.gb;
 443     SliceHeader *sh   = &s->sh;
 444     int i, ret;
 445
 446     // Coded parameters
 447     sh->first_slice_in_pic_flag = get_bits1(gb);
 448     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 449         s->seq_decode = (s->seq_decode + 1) & 0xff;
 450         s->max_ra     = INT_MAX;
 451         if (IS_IDR(s))
 452             ff_hevc_clear_refs(s);
 453     }
 454     if (IS_IRAP(s))
 455         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 456
 457     sh->pps_id = get_ue_golomb_long(gb);
 458     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 459         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 460         return AVERROR_INVALIDDATA;
 461     }
 462     if (!sh->first_slice_in_pic_flag &&
 463         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 464         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 465         return AVERROR_INVALIDDATA;
 466     }
 467     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 468
 469     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 470         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 471
 472         ff_hevc_clear_refs(s);
 473         ret = set_sps(s, s->ps.sps);
 474         if (ret < 0)
 475             return ret;
 476
 477         s->seq_decode = (s->seq_decode + 1) & 0xff;
 478         s->max_ra     = INT_MAX;
 479     }
 480
 481     sh->dependent_slice_segment_flag = 0;
 482     if (!sh->first_slice_in_pic_flag) {
 483         int slice_address_length;
 484
 485         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 486             sh->dependent_slice_segment_flag = get_bits1(gb);
 487
 488         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 489                                             s->ps.sps->ctb_height);
 490         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 491         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 492             av_log(s->avctx, AV_LOG_ERROR,
 493                    "Invalid slice segment address: %u.\n",
 494                    sh->slice_segment_addr);
 495             return AVERROR_INVALIDDATA;
 496         }
 497
 498         if (!sh->dependent_slice_segment_flag) {
 499             sh->slice_addr = sh->slice_segment_addr;
 500             s->slice_idx++;
 501         }
 502     } else {
 503         sh->slice_segment_addr = sh->slice_addr = 0;
 504         s->slice_idx           = 0;
 505         s->slice_initialized   = 0;
 506     }
 507
 508     if (!sh->dependent_slice_segment_flag) {
 509         s->slice_initialized = 0;
 510
 511         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 512             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 513
 514         sh->slice_type = get_ue_golomb_long(gb);
 515         if (!(sh->slice_type == I_SLICE ||
 516               sh->slice_type == P_SLICE ||
 517               sh->slice_type == B_SLICE)) {
 518             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 519                    sh->slice_type);
 520             return AVERROR_INVALIDDATA;
 521         }
 522         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 523             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 524             return AVERROR_INVALIDDATA;
 525         }
 526
 527         // when flag is not present, picture is inferred to be output
 528         sh->pic_output_flag = 1;
 529         if (s->ps.pps->output_flag_present_flag)
 530             sh->pic_output_flag = get_bits1(gb);
 531
 532         if (s->ps.sps->separate_colour_plane_flag)
 533             sh->colour_plane_id = get_bits(gb, 2);
 534
 535         if (!IS_IDR(s)) {
 536             int poc;
 537
 538             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 539             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 540             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 541                 av_log(s->avctx, AV_LOG_WARNING,
 542                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 543                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 544                     return AVERROR_INVALIDDATA;
 545                 poc = s->poc;
 546             }
 547             s->poc = poc;
 548
 549             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 550             if (!sh->short_term_ref_pic_set_sps_flag) {
 551                 int pos = get_bits_left(gb);
 552                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 553                 if (ret < 0)
 554                     return ret;
 555
 556                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 557                 sh->short_term_rps = &sh->slice_rps;
 558             } else {
 559                 int numbits, rps_idx;
 560
 561                 if (!s->ps.sps->nb_st_rps) {
 562                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 563                     return AVERROR_INVALIDDATA;
 564                 }
 565
 566                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 567                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 568                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 569             }
 570
 571             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 572             if (ret < 0) {
 573                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 574                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 575                     return AVERROR_INVALIDDATA;
 576             }
 577
 578             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 579                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 580             else
 581                 sh->slice_temporal_mvp_enabled_flag = 0;
 582         } else {
 583             s->sh.short_term_rps = NULL;
 584             s->poc               = 0;
 585         }
 586
 587         /* 8.3.1 */
 588         if (s->temporal_id == 0 &&
 589             s->nal_unit_type != NAL_TRAIL_N &&
 590             s->nal_unit_type != NAL_TSA_N   &&
 591             s->nal_unit_type != NAL_STSA_N  &&
 592             s->nal_unit_type != NAL_RADL_N  &&
 593             s->nal_unit_type != NAL_RADL_R  &&
 594             s->nal_unit_type != NAL_RASL_N  &&
 595             s->nal_unit_type != NAL_RASL_R)
 596             s->pocTid0 = s->poc;
 597
 598         if (s->ps.sps->sao_enabled) {
 599             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 600             sh->slice_sample_adaptive_offset_flag[1] =
 601             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 602         } else {
 603             sh->slice_sample_adaptive_offset_flag[0] = 0;
 604             sh->slice_sample_adaptive_offset_flag[1] = 0;
 605             sh->slice_sample_adaptive_offset_flag[2] = 0;
 606         }
 607
 608         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 609         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 610             int nb_refs;
 611
 612             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 613             if (sh->slice_type == B_SLICE)
 614                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 615
 616             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 617                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 618                 if (sh->slice_type == B_SLICE)
 619                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 620             }
 621             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 622                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 623                        sh->nb_refs[L0], sh->nb_refs[L1]);
 624                 return AVERROR_INVALIDDATA;
 625             }
 626
 627             sh->rpl_modification_flag[0] = 0;
 628             sh->rpl_modification_flag[1] = 0;
 629             nb_refs = ff_hevc_frame_nb_refs(s);
 630             if (!nb_refs) {
 631                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 632                 return AVERROR_INVALIDDATA;
 633             }
 634
 635             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 636                 sh->rpl_modification_flag[0] = get_bits1(gb);
 637                 if (sh->rpl_modification_flag[0]) {
 638                     for (i = 0; i < sh->nb_refs[L0]; i++)
 639                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 640                 }
 641
 642                 if (sh->slice_type == B_SLICE) {
 643                     sh->rpl_modification_flag[1] = get_bits1(gb);
 644                     if (sh->rpl_modification_flag[1] == 1)
 645                         for (i = 0; i < sh->nb_refs[L1]; i++)
 646                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 647                 }
 648             }
 649
 650             if (sh->slice_type == B_SLICE)
 651                 sh->mvd_l1_zero_flag = get_bits1(gb);
 652
 653             if (s->ps.pps->cabac_init_present_flag)
 654                 sh->cabac_init_flag = get_bits1(gb);
 655             else
 656                 sh->cabac_init_flag = 0;
 657
 658             sh->collocated_ref_idx = 0;
 659             if (sh->slice_temporal_mvp_enabled_flag) {
 660                 sh->collocated_list = L0;
 661                 if (sh->slice_type == B_SLICE)
 662                     sh->collocated_list = !get_bits1(gb);
 663
 664                 if (sh->nb_refs[sh->collocated_list] > 1) {
 665                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 666                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 667                         av_log(s->avctx, AV_LOG_ERROR,
 668                                "Invalid collocated_ref_idx: %d.\n",
 669                                sh->collocated_ref_idx);
 670                         return AVERROR_INVALIDDATA;
 671                     }
 672                 }
 673             }
 674
 675             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 676                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 677                 pred_weight_table(s, gb);
 678             }
 679
 680             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 681             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 682                 av_log(s->avctx, AV_LOG_ERROR,
 683                        "Invalid number of merging MVP candidates: %d.\n",
 684                        sh->max_num_merge_cand);
 685                 return AVERROR_INVALIDDATA;
 686             }
 687         }
 688
 689         sh->slice_qp_delta = get_se_golomb(gb);
 690
 691         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 692             sh->slice_cb_qp_offset = get_se_golomb(gb);
 693             sh->slice_cr_qp_offset = get_se_golomb(gb);
 694         } else {
 695             sh->slice_cb_qp_offset = 0;
 696             sh->slice_cr_qp_offset = 0;
 697         }
 698
 699         if (s->ps.pps->deblocking_filter_control_present_flag) {
 700             int deblocking_filter_override_flag = 0;
 701
 702             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 703                 deblocking_filter_override_flag = get_bits1(gb);
 704
 705             if (deblocking_filter_override_flag) {
 706                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 707                 if (!sh->disable_deblocking_filter_flag) {
 708                     sh->beta_offset = get_se_golomb(gb) * 2;
 709                     sh->tc_offset   = get_se_golomb(gb) * 2;
 710                 }
 711             } else {
 712                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 713                 sh->beta_offset                    = s->ps.pps->beta_offset;
 714                 sh->tc_offset                      = s->ps.pps->tc_offset;
 715             }
 716         } else {
 717             sh->disable_deblocking_filter_flag = 0;
 718             sh->beta_offset                    = 0;
 719             sh->tc_offset                      = 0;
 720         }
 721
 722         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 723             (sh->slice_sample_adaptive_offset_flag[0] ||
 724              sh->slice_sample_adaptive_offset_flag[1] ||
 725              !sh->disable_deblocking_filter_flag)) {
 726             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 727         } else {
 728             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 729         }
 730     } else if (!s->slice_initialized) {
 731         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 732         return AVERROR_INVALIDDATA;
 733     }
 734
 735     sh->num_entry_point_offsets = 0;
 736     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 737         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 738         if (sh->num_entry_point_offsets > 0) {
 739             int offset_len = get_ue_golomb_long(gb) + 1;
 740
 741             for (i = 0; i < sh->num_entry_point_offsets; i++)
 742                 skip_bits(gb, offset_len);
 743         }
 744     }
 745
 746     if (s->ps.pps->slice_header_extension_present_flag) {
 747         unsigned int length = get_ue_golomb_long(gb);
 748         for (i = 0; i < length; i++)
 749             skip_bits(gb, 8);  // slice_header_extension_data_byte
 750     }
 751
 752     // Inferred parameters
 753     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 754     if (sh->slice_qp > 51 ||
 755         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 756         av_log(s->avctx, AV_LOG_ERROR,
 757                "The slice_qp %d is outside the valid range "
 758                "[%d, 51].\n",
 759                sh->slice_qp,
 760                -s->ps.sps->qp_bd_offset);
 761         return AVERROR_INVALIDDATA;
 762     }
 763
 764     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 765
 766     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 767         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 768         return AVERROR_INVALIDDATA;
 769     }
 770
 771     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 772
 773     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 774         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
 775                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 776
 777     s->slice_initialized = 1;
 778
 779     return 0;
 780 }
 781
 782 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 783
 784 #define SET_SAO(elem, value)                            \
 785 do {                                                    \
 786     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 787         sao->elem = value;                              \
 788     else if (sao_merge_left_flag)                       \
 789         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 790     else if (sao_merge_up_flag)                         \
 791         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 792     else                                                \
 793         sao->elem = 0;                                  \
 794 } while (0)
 795
 796 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 797 {
 798     HEVCLocalContext *lc    = &s->HEVClc;
 799     int sao_merge_left_flag = 0;
 800     int sao_merge_up_flag   = 0;
 801     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
 802     SAOParams *sao          = &CTB(s->sao, rx, ry);
 803     int c_idx, i;
 804
 805     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 806         s->sh.slice_sample_adaptive_offset_flag[1]) {
 807         if (rx > 0) {
 808             if (lc->ctb_left_flag)
 809                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 810         }
 811         if (ry > 0 && !sao_merge_left_flag) {
 812             if (lc->ctb_up_flag)
 813                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 814         }
 815     }
 816
 817     for (c_idx = 0; c_idx < 3; c_idx++) {
 818         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 819             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 820             continue;
 821         }
 822
 823         if (c_idx == 2) {
 824             sao->type_idx[2] = sao->type_idx[1];
 825             sao->eo_class[2] = sao->eo_class[1];
 826         } else {
 827             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 828         }
 829
 830         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 831             continue;
 832
 833         for (i = 0; i < 4; i++)
 834             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 835
 836         if (sao->type_idx[c_idx] == SAO_BAND) {
 837             for (i = 0; i < 4; i++) {
 838                 if (sao->offset_abs[c_idx][i]) {
 839                     SET_SAO(offset_sign[c_idx][i],
 840                             ff_hevc_sao_offset_sign_decode(s));
 841                 } else {
 842                     sao->offset_sign[c_idx][i] = 0;
 843                 }
 844             }
 845             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 846         } else if (c_idx != 2) {
 847             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 848         }
 849
 850         // Inferred parameters
 851         sao->offset_val[c_idx][0] = 0;
 852         for (i = 0; i < 4; i++) {
 853             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 854             if (sao->type_idx[c_idx] == SAO_EDGE) {
 855                 if (i > 1)
 856                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 857             } else if (sao->offset_sign[c_idx][i]) {
 858                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 859             }
 860         }
 861     }
 862 }
 863
 864 #undef SET_SAO
 865 #undef CTB
 866
 867 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 868                                 int log2_trafo_size, enum ScanType scan_idx,
 869                                 int c_idx)
 870 {
 871 #define GET_COORD(offset, n)                                    \
 872     do {                                                        \
 873         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 874         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 875     } while (0)
 876     HEVCLocalContext *lc    = &s->HEVClc;
 877     int transform_skip_flag = 0;
 878
 879     int last_significant_coeff_x, last_significant_coeff_y;
 880     int last_scan_pos;
 881     int n_end;
 882     int num_coeff    = 0;
 883     int greater1_ctx = 1;
 884
 885     int num_last_subset;
 886     int x_cg_last_sig, y_cg_last_sig;
 887
 888     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 889
 890     ptrdiff_t stride = s->frame->linesize[c_idx];
 891     int hshift       = s->ps.sps->hshift[c_idx];
 892     int vshift       = s->ps.sps->vshift[c_idx];
 893     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 894                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
 895     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 896     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 897
 898     int trafo_size = 1 << log2_trafo_size;
 899     int i, qp, shift, add, scale, scale_m;
 900     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 901     const uint8_t *scale_matrix;
 902     uint8_t dc_scale;
 903
 904     // Derive QP for dequant
 905     if (!lc->cu.cu_transquant_bypass_flag) {
 906         static const int qp_c[] = {
 907             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 908         };
 909
 910         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 911             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 912             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 913             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 914         };
 915
 916         static const uint8_t div6[51 + 2 * 6 + 1] = {
 917             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 918             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 919             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 920         };
 921         int qp_y = lc->qp_y;
 922
 923         if (c_idx == 0) {
 924             qp = qp_y + s->ps.sps->qp_bd_offset;
 925         } else {
 926             int qp_i, offset;
 927
 928             if (c_idx == 1)
 929                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 930             else
 931                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 932
 933             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
 934             if (qp_i < 30)
 935                 qp = qp_i;
 936             else if (qp_i > 43)
 937                 qp = qp_i - 6;
 938             else
 939                 qp = qp_c[qp_i - 30];
 940
 941             qp += s->ps.sps->qp_bd_offset;
 942         }
 943
 944         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
 945         add      = 1 << (shift - 1);
 946         scale    = level_scale[rem6[qp]] << (div6[qp]);
 947         scale_m  = 16; // default when no custom scaling lists.
 948         dc_scale = 16;
 949
 950         if (s->ps.sps->scaling_list_enable_flag) {
 951             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
 952                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
 953             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 954
 955             if (log2_trafo_size != 5)
 956                 matrix_id = 3 * matrix_id + c_idx;
 957
 958             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 959             if (log2_trafo_size >= 4)
 960                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 961         }
 962     }
 963
 964     if (s->ps.pps->transform_skip_enabled_flag &&
 965         !lc->cu.cu_transquant_bypass_flag   &&
 966         log2_trafo_size == 2) {
 967         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 968     }
 969
 970     last_significant_coeff_x =
 971         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 972     last_significant_coeff_y =
 973         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 974
 975     if (last_significant_coeff_x > 3) {
 976         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 977         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 978                                    (2 + (last_significant_coeff_x & 1)) +
 979                                    suffix;
 980     }
 981
 982     if (last_significant_coeff_y > 3) {
 983         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 984         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 985                                    (2 + (last_significant_coeff_y & 1)) +
 986                                    suffix;
 987     }
 988
 989     if (scan_idx == SCAN_VERT)
 990         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 991
 992     x_cg_last_sig = last_significant_coeff_x >> 2;
 993     y_cg_last_sig = last_significant_coeff_y >> 2;
 994
 995     switch (scan_idx) {
 996     case SCAN_DIAG: {
 997         int last_x_c = last_significant_coeff_x & 3;
 998         int last_y_c = last_significant_coeff_y & 3;
 999
1000         scan_x_off = ff_hevc_diag_scan4x4_x;
1001         scan_y_off = ff_hevc_diag_scan4x4_y;
1002         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1003         if (trafo_size == 4) {
1004             scan_x_cg = scan_1x1;
1005             scan_y_cg = scan_1x1;
1006         } else if (trafo_size == 8) {
1007             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1008             scan_x_cg  = diag_scan2x2_x;
1009             scan_y_cg  = diag_scan2x2_y;
1010         } else if (trafo_size == 16) {
1011             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1012             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1013             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1014         } else { // trafo_size == 32
1015             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1016             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1017             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1018         }
1019         break;
1020     }
1021     case SCAN_HORIZ:
1022         scan_x_cg  = horiz_scan2x2_x;
1023         scan_y_cg  = horiz_scan2x2_y;
1024         scan_x_off = horiz_scan4x4_x;
1025         scan_y_off = horiz_scan4x4_y;
1026         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1027         break;
1028     default: //SCAN_VERT
1029         scan_x_cg  = horiz_scan2x2_y;
1030         scan_y_cg  = horiz_scan2x2_x;
1031         scan_x_off = horiz_scan4x4_y;
1032         scan_y_off = horiz_scan4x4_x;
1033         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1034         break;
1035     }
1036     num_coeff++;
1037     num_last_subset = (num_coeff - 1) >> 4;
1038
1039     for (i = num_last_subset; i >= 0; i--) {
1040         int n, m;
1041         int x_cg, y_cg, x_c, y_c;
1042         int implicit_non_zero_coeff = 0;
1043         int64_t trans_coeff_level;
1044         int prev_sig = 0;
1045         int offset   = i << 4;
1046
1047         uint8_t significant_coeff_flag_idx[16];
1048         uint8_t nb_significant_coeff_flag = 0;
1049
1050         x_cg = scan_x_cg[i];
1051         y_cg = scan_y_cg[i];
1052
1053         if (i < num_last_subset && i > 0) {
1054             int ctx_cg = 0;
1055             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1056                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1057             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1058                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1059
1060             significant_coeff_group_flag[x_cg][y_cg] =
1061                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1062             implicit_non_zero_coeff = 1;
1063         } else {
1064             significant_coeff_group_flag[x_cg][y_cg] =
1065                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1066                  (x_cg == 0 && y_cg == 0));
1067         }
1068
1069         last_scan_pos = num_coeff - offset - 1;
1070
1071         if (i == num_last_subset) {
1072             n_end                         = last_scan_pos - 1;
1073             significant_coeff_flag_idx[0] = last_scan_pos;
1074             nb_significant_coeff_flag     = 1;
1075         } else {
1076             n_end = 15;
1077         }
1078
1079         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1080             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1081         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1082             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1083
1084         for (n = n_end; n >= 0; n--) {
1085             GET_COORD(offset, n);
1086
1087             if (significant_coeff_group_flag[x_cg][y_cg] &&
1088                 (n > 0 || implicit_non_zero_coeff == 0)) {
1089                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1090                                                           log2_trafo_size,
1091                                                           scan_idx,
1092                                                           prev_sig) == 1) {
1093                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1094                     nb_significant_coeff_flag++;
1095                     implicit_non_zero_coeff = 0;
1096                 }
1097             } else {
1098                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1099                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1100                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1101                     nb_significant_coeff_flag++;
1102                 }
1103             }
1104         }
1105
1106         n_end = nb_significant_coeff_flag;
1107
1108         if (n_end) {
1109             int first_nz_pos_in_cg = 16;
1110             int last_nz_pos_in_cg = -1;
1111             int c_rice_param = 0;
1112             int first_greater1_coeff_idx = -1;
1113             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1114             uint16_t coeff_sign_flag;
1115             int sum_abs = 0;
1116             int sign_hidden = 0;
1117
1118             // initialize first elem of coeff_bas_level_greater1_flag
1119             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1120
1121             if (!(i == num_last_subset) && greater1_ctx == 0)
1122                 ctx_set++;
1123             greater1_ctx      = 1;
1124             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1125
1126             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1127                 int n_idx = significant_coeff_flag_idx[m];
1128                 int inc   = (ctx_set << 2) + greater1_ctx;
1129                 coeff_abs_level_greater1_flag[n_idx] =
1130                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1131                 if (coeff_abs_level_greater1_flag[n_idx]) {
1132                     greater1_ctx = 0;
1133                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1134                     greater1_ctx++;
1135                 }
1136
1137                 if (coeff_abs_level_greater1_flag[n_idx] &&
1138                     first_greater1_coeff_idx == -1)
1139                     first_greater1_coeff_idx = n_idx;
1140             }
1141             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1142             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1143                                  !lc->cu.cu_transquant_bypass_flag;
1144
1145             if (first_greater1_coeff_idx != -1) {
1146                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1147             }
1148             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1149                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1150             } else {
1151                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1152             }
1153
1154             for (m = 0; m < n_end; m++) {
1155                 n = significant_coeff_flag_idx[m];
1156                 GET_COORD(offset, n);
1157                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1158                 if (trans_coeff_level == ((m < 8) ?
1159                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1160                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1161
1162                     trans_coeff_level += last_coeff_abs_level_remaining;
1163                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1164                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1165                 }
1166                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1167                     sum_abs += trans_coeff_level;
1168                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1169                         trans_coeff_level = -trans_coeff_level;
1170                 }
1171                 if (coeff_sign_flag >> 15)
1172                     trans_coeff_level = -trans_coeff_level;
1173                 coeff_sign_flag <<= 1;
1174                 if (!lc->cu.cu_transquant_bypass_flag) {
1175                     if (s->ps.sps->scaling_list_enable_flag) {
1176                         if (y_c || x_c || log2_trafo_size < 4) {
1177                             int pos;
1178                             switch (log2_trafo_size) {
1179                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1180                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1181                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1182                             default: pos = (y_c        << 2) +  x_c;
1183                             }
1184                             scale_m = scale_matrix[pos];
1185                         } else {
1186                             scale_m = dc_scale;
1187                         }
1188                     }
1189                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1190                     if(trans_coeff_level < 0) {
1191                         if((~trans_coeff_level) & 0xFffffffffff8000)
1192                             trans_coeff_level = -32768;
1193                     } else {
1194                         if (trans_coeff_level & 0xffffffffffff8000)
1195                             trans_coeff_level = 32767;
1196                     }
1197                 }
1198                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1199             }
1200         }
1201     }
1202
1203     if (lc->cu.cu_transquant_bypass_flag) {
1204         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1205     } else {
1206         if (transform_skip_flag)
1207             s->hevcdsp.transform_skip(dst, coeffs, stride);
1208         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1209                  log2_trafo_size == 2)
1210             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1211         else
1212             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1213     }
1214 }
1215
1216 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1217                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1218                               int log2_cb_size, int log2_trafo_size,
1219                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1220 {
1221     HEVCLocalContext *lc = &s->HEVClc;
1222
1223     if (lc->cu.pred_mode == MODE_INTRA) {
1224         int trafo_size = 1 << log2_trafo_size;
1225         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1226
1227         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1228         if (log2_trafo_size > 2) {
1229             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1230             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1231             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1232             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1233         } else if (blk_idx == 3) {
1234             trafo_size = trafo_size << s->ps.sps->hshift[1];
1235             ff_hevc_set_neighbour_available(s, xBase, yBase,
1236                                             trafo_size, trafo_size);
1237             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1238             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1239         }
1240     }
1241
1242     if (cbf_luma || cbf_cb || cbf_cr) {
1243         int scan_idx   = SCAN_DIAG;
1244         int scan_idx_c = SCAN_DIAG;
1245
1246         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1247             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1248             if (lc->tu.cu_qp_delta != 0)
1249                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1250                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1251             lc->tu.is_cu_qp_delta_coded = 1;
1252
1253             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1254                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1255                 av_log(s->avctx, AV_LOG_ERROR,
1256                        "The cu_qp_delta %d is outside the valid range "
1257                        "[%d, %d].\n",
1258                        lc->tu.cu_qp_delta,
1259                        -(26 + s->ps.sps->qp_bd_offset / 2),
1260                         (25 + s->ps.sps->qp_bd_offset / 2));
1261                 return AVERROR_INVALIDDATA;
1262             }
1263
1264             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1265         }
1266
1267         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1268             if (lc->tu.cur_intra_pred_mode >= 6 &&
1269                 lc->tu.cur_intra_pred_mode <= 14) {
1270                 scan_idx = SCAN_VERT;
1271             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1272                        lc->tu.cur_intra_pred_mode <= 30) {
1273                 scan_idx = SCAN_HORIZ;
1274             }
1275
1276             if (lc->pu.intra_pred_mode_c >=  6 &&
1277                 lc->pu.intra_pred_mode_c <= 14) {
1278                 scan_idx_c = SCAN_VERT;
1279             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1280                        lc->pu.intra_pred_mode_c <= 30) {
1281                 scan_idx_c = SCAN_HORIZ;
1282             }
1283         }
1284
1285         if (cbf_luma)
1286             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1287         if (log2_trafo_size > 2) {
1288             if (cbf_cb)
1289                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1290             if (cbf_cr)
1291                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1292         } else if (blk_idx == 3) {
1293             if (cbf_cb)
1294                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1295             if (cbf_cr)
1296                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1297         }
1298     }
1299     return 0;
1300 }
1301
1302 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1303 {
1304     int cb_size          = 1 << log2_cb_size;
1305     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1306
1307     int min_pu_width     = s->ps.sps->min_pu_width;
1308     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1309     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1310     int i, j;
1311
1312     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1313         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1314             s->is_pcm[i + j * min_pu_width] = 2;
1315 }
1316
1317 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1318                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1319                               int log2_cb_size, int log2_trafo_size,
1320                               int trafo_depth, int blk_idx,
1321                               int cbf_cb, int cbf_cr)
1322 {
1323     HEVCLocalContext *lc = &s->HEVClc;
1324     uint8_t split_transform_flag;
1325     int ret;
1326
1327     if (lc->cu.intra_split_flag) {
1328         if (trafo_depth == 1)
1329             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1330     } else {
1331         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1332     }
1333
1334     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1335         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1336         trafo_depth     < lc->cu.max_trafo_depth       &&
1337         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1338         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1339     } else {
1340         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1341                           lc->cu.pred_mode == MODE_INTER &&
1342                           lc->cu.part_mode != PART_2Nx2N &&
1343                           trafo_depth == 0;
1344
1345         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1346                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1347                                inter_split;
1348     }
1349
1350     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1351         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1352     else if (log2_trafo_size > 2 || trafo_depth == 0)
1353         cbf_cb = 0;
1354     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1355         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1356     else if (log2_trafo_size > 2 || trafo_depth == 0)
1357         cbf_cr = 0;
1358
1359     if (split_transform_flag) {
1360         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1361         const int x1 = x0 + trafo_size_split;
1362         const int y1 = y0 + trafo_size_split;
1363
1364 #define SUBDIVIDE(x, y, idx)                                                    \
1365 do {                                                                            \
1366     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1367                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1368                              cbf_cb, cbf_cr);                                   \
1369     if (ret < 0)                                                                \
1370         return ret;                                                             \
1371 } while (0)
1372
1373         SUBDIVIDE(x0, y0, 0);
1374         SUBDIVIDE(x1, y0, 1);
1375         SUBDIVIDE(x0, y1, 2);
1376         SUBDIVIDE(x1, y1, 3);
1377
1378 #undef SUBDIVIDE
1379     } else {
1380         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1381         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1382         int min_tu_width     = s->ps.sps->min_tb_width;
1383         int cbf_luma         = 1;
1384
1385         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1386             cbf_cb || cbf_cr)
1387             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1388
1389         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1390                                  log2_cb_size, log2_trafo_size,
1391                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1392         if (ret < 0)
1393             return ret;
1394         // TODO: store cbf_luma somewhere else
1395         if (cbf_luma) {
1396             int i, j;
1397             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1398                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1399                     int x_tu = (x0 + j) >> log2_min_tu_size;
1400                     int y_tu = (y0 + i) >> log2_min_tu_size;
1401                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1402                 }
1403         }
1404         if (!s->sh.disable_deblocking_filter_flag) {
1405             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1406             if (s->ps.pps->transquant_bypass_enable_flag &&
1407                 lc->cu.cu_transquant_bypass_flag)
1408                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1409         }
1410     }
1411     return 0;
1412 }
1413
1414 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1415 {
1416     //TODO: non-4:2:0 support
1417     HEVCLocalContext *lc = &s->HEVClc;
1418     GetBitContext gb;
1419     int cb_size   = 1 << log2_cb_size;
1420     int stride0   = s->frame->linesize[0];
1421     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1422     int   stride1 = s->frame->linesize[1];
1423     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1424     int   stride2 = s->frame->linesize[2];
1425     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1426
1427     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1428     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1429     int ret;
1430
1431     if (!s->sh.disable_deblocking_filter_flag)
1432         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1433
1434     ret = init_get_bits(&gb, pcm, length);
1435     if (ret < 0)
1436         return ret;
1437
1438     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1439     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1440     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1441     return 0;
1442 }
1443
1444 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1445 {
1446     HEVCLocalContext *lc = &s->HEVClc;
1447     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1448     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1449
1450     if (x)
1451         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1452     if (y)
1453         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1454
1455     switch (x) {
1456     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1457     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1458     case 0: lc->pu.mvd.x = 0;                               break;
1459     }
1460
1461     switch (y) {
1462     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1463     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1464     case 0: lc->pu.mvd.y = 0;                               break;
1465     }
1466 }
1467
1468 /**
1469  * 8.5.3.2.2.1 Luma sample interpolation process
1470  *
1471  * @param s HEVC decoding context
1472  * @param dst target buffer for block data at block position
1473  * @param dststride stride of the dst buffer
1474  * @param ref reference picture buffer at origin (0, 0)
1475  * @param mv motion vector (relative to block position) to get pixel data from
1476  * @param x_off horizontal position of block from origin (0, 0)
1477  * @param y_off vertical position of block from origin (0, 0)
1478  * @param block_w width of block
1479  * @param block_h height of block
1480  */
1481 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1482                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1483                     int block_w, int block_h, int pred_idx)
1484 {
1485     HEVCLocalContext *lc = &s->HEVClc;
1486     uint8_t *src         = ref->data[0];
1487     ptrdiff_t srcstride  = ref->linesize[0];
1488     int pic_width        = s->ps.sps->width;
1489     int pic_height       = s->ps.sps->height;
1490
1491     int mx         = mv->x & 3;
1492     int my         = mv->y & 3;
1493     int extra_left = ff_hevc_qpel_extra_before[mx];
1494     int extra_top  = ff_hevc_qpel_extra_before[my];
1495
1496     x_off += mv->x >> 2;
1497     y_off += mv->y >> 2;
1498     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1499
1500     if (x_off < extra_left || y_off < extra_top ||
1501         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1502         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1503         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1504         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1505         int buf_offset = extra_top *
1506                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1507
1508         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1509                                  edge_emu_stride, srcstride,
1510                                  block_w + ff_hevc_qpel_extra[mx],
1511                                  block_h + ff_hevc_qpel_extra[my],
1512                                  x_off - extra_left, y_off - extra_top,
1513                                  pic_width, pic_height);
1514         src = lc->edge_emu_buffer + buf_offset;
1515         srcstride = edge_emu_stride;
1516     }
1517     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1518                                                    block_h, mx, my, lc->mc_buffer);
1519 }
1520
1521 /**
1522  * 8.5.3.2.2.2 Chroma sample interpolation process
1523  *
1524  * @param s HEVC decoding context
1525  * @param dst1 target buffer for block data at block position (U plane)
1526  * @param dst2 target buffer for block data at block position (V plane)
1527  * @param dststride stride of the dst1 and dst2 buffers
1528  * @param ref reference picture buffer at origin (0, 0)
1529  * @param mv motion vector (relative to block position) to get pixel data from
1530  * @param x_off horizontal position of block from origin (0, 0)
1531  * @param y_off vertical position of block from origin (0, 0)
1532  * @param block_w width of block
1533  * @param block_h height of block
1534  */
1535 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1536                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1537                       int x_off, int y_off, int block_w, int block_h, int pred_idx)
1538 {
1539     HEVCLocalContext *lc = &s->HEVClc;
1540     uint8_t *src1        = ref->data[1];
1541     uint8_t *src2        = ref->data[2];
1542     ptrdiff_t src1stride = ref->linesize[1];
1543     ptrdiff_t src2stride = ref->linesize[2];
1544     int pic_width        = s->ps.sps->width >> 1;
1545     int pic_height       = s->ps.sps->height >> 1;
1546
1547     int mx = mv->x & 7;
1548     int my = mv->y & 7;
1549
1550     x_off += mv->x >> 3;
1551     y_off += mv->y >> 3;
1552     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1553     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1554
1555     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1556         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1557         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1558         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1559         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1560         int buf_offset1 = EPEL_EXTRA_BEFORE *
1561                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1562         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1563         int buf_offset2 = EPEL_EXTRA_BEFORE *
1564                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1565
1566         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1567                                  edge_emu_stride, src1stride,
1568                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1569                                  x_off - EPEL_EXTRA_BEFORE,
1570                                  y_off - EPEL_EXTRA_BEFORE,
1571                                  pic_width, pic_height);
1572
1573         src1 = lc->edge_emu_buffer + buf_offset1;
1574         src1stride = edge_emu_stride;
1575         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1576                                                        block_h, mx, my, lc->mc_buffer);
1577
1578         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1579                                  edge_emu_stride, src2stride,
1580                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1581                                  x_off - EPEL_EXTRA_BEFORE,
1582                                  y_off - EPEL_EXTRA_BEFORE,
1583                                  pic_width, pic_height);
1584         src2 = lc->edge_emu_buffer + buf_offset2;
1585         src2stride = edge_emu_stride;
1586
1587         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1588                                                        block_h, mx, my, lc->mc_buffer);
1589     } else {
1590         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1591                                                        block_h, mx, my, lc->mc_buffer);
1592         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1593                                                        block_h, mx, my, lc->mc_buffer);
1594     }
1595 }
1596
1597 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1598                                 const Mv *mv, int y0, int height)
1599 {
1600     int y = (mv->y >> 2) + y0 + height + 9;
1601     ff_thread_await_progress(&ref->tf, y, 0);
1602 }
1603
1604 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1605                                   int nPbH, int log2_cb_size, int part_idx,
1606                                   int merge_idx, MvField *mv)
1607 {
1608     HEVCLocalContext *lc             = &s->HEVClc;
1609     enum InterPredIdc inter_pred_idc = PRED_L0;
1610     int mvp_flag;
1611
1612     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1613     if (s->sh.slice_type == B_SLICE)
1614         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1615
1616     if (inter_pred_idc != PRED_L1) {
1617         if (s->sh.nb_refs[L0])
1618             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1619
1620         mv->pred_flag[0] = 1;
1621         hls_mvd_coding(s, x0, y0, 0);
1622         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1623         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1624                                  part_idx, merge_idx, mv, mvp_flag, 0);
1625         mv->mv[0].x += lc->pu.mvd.x;
1626         mv->mv[0].y += lc->pu.mvd.y;
1627     }
1628
1629     if (inter_pred_idc != PRED_L0) {
1630         if (s->sh.nb_refs[L1])
1631             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1632
1633         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1634             AV_ZERO32(&lc->pu.mvd);
1635         } else {
1636             hls_mvd_coding(s, x0, y0, 1);
1637         }
1638
1639         mv->pred_flag[1] = 1;
1640         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1641         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1642                                  part_idx, merge_idx, mv, mvp_flag, 1);
1643         mv->mv[1].x += lc->pu.mvd.x;
1644         mv->mv[1].y += lc->pu.mvd.y;
1645     }
1646 }
1647
1648 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1649                                 int nPbW, int nPbH,
1650                                 int log2_cb_size, int partIdx)
1651 {
1652     static const int pred_indices[] = {
1653         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1654     };
1655     const int pred_idx = pred_indices[nPbW];
1656
1657 #define POS(c_idx, x, y)                                                              \
1658     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1659                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1660     HEVCLocalContext *lc = &s->HEVClc;
1661     int merge_idx = 0;
1662     struct MvField current_mv = {{{ 0 }}};
1663
1664     int min_pu_width = s->ps.sps->min_pu_width;
1665
1666     MvField *tab_mvf = s->ref->tab_mvf;
1667     RefPicList  *refPicList = s->ref->refPicList;
1668     HEVCFrame *ref0, *ref1;
1669
1670     int tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1671
1672     uint8_t *dst0 = POS(0, x0, y0);
1673     uint8_t *dst1 = POS(1, x0, y0);
1674     uint8_t *dst2 = POS(2, x0, y0);
1675     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1676     int min_cb_width     = s->ps.sps->min_cb_width;
1677     int x_cb             = x0 >> log2_min_cb_size;
1678     int y_cb             = y0 >> log2_min_cb_size;
1679     int x_pu, y_pu;
1680     int i, j;
1681
1682     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1683
1684     if (!skip_flag)
1685         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1686
1687     if (skip_flag || lc->pu.merge_flag) {
1688         if (s->sh.max_num_merge_cand > 1)
1689             merge_idx = ff_hevc_merge_idx_decode(s);
1690         else
1691             merge_idx = 0;
1692
1693         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1694                                    partIdx, merge_idx, &current_mv);
1695     } else {
1696         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1697                               partIdx, merge_idx, &current_mv);
1698     }
1699
1700     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1701     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1702
1703     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1704         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1705             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1706
1707     if (current_mv.pred_flag[0]) {
1708         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1709         if (!ref0)
1710             return;
1711         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1712     }
1713     if (current_mv.pred_flag[1]) {
1714         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1715         if (!ref1)
1716             return;
1717         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1718     }
1719
1720     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1721         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1722         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1723
1724         luma_mc(s, tmp, tmpstride, ref0->frame,
1725                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1726
1727         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1728             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1729             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1730                                                s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1731                                                s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1732                                                dst0, s->frame->linesize[0], tmp,
1733                                                tmpstride, nPbH);
1734         } else {
1735             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1736         }
1737         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1738                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1739
1740         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1741             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1742             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1743                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1744                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1745                                                       dst1, s->frame->linesize[1], tmp, tmpstride,
1746                                                       nPbH / 2);
1747             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1748                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1749                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1750                                                       dst2, s->frame->linesize[2], tmp2, tmpstride,
1751                                                       nPbH / 2);
1752         } else {
1753             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1754             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1755         }
1756     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1757         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1758         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1759
1760         luma_mc(s, tmp, tmpstride, ref1->frame,
1761                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1762
1763         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1764             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1765             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1766                                                s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1767                                                s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1768                                                dst0, s->frame->linesize[0], tmp, tmpstride,
1769                                                nPbH);
1770         } else {
1771             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1772         }
1773
1774         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1775                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1776
1777         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1778             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1779             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1780                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1781                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1782                                                       dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1783             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1784                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1785                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1786                                                       dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1787         } else {
1788             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1789             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1790         }
1791     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1792         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1793         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1794         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1795         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1796
1797         luma_mc(s, tmp, tmpstride, ref0->frame,
1798                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1799         luma_mc(s, tmp2, tmpstride, ref1->frame,
1800                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1801
1802         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1803             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1804             s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1805                                                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1806                                                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1807                                                    s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1808                                                    s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1809                                                    dst0, s->frame->linesize[0],
1810                                                    tmp, tmp2, tmpstride, nPbH);
1811         } else {
1812             s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1813                                                          tmp, tmp2, tmpstride, nPbH);
1814         }
1815
1816         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1817                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1818         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1819                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1820
1821         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1822             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1823             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1824                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1825                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1826                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1827                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1828                                                           dst1, s->frame->linesize[1], tmp, tmp3,
1829                                                           tmpstride, nPbH / 2);
1830             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1831                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1832                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1833                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1834                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1835                                                           dst2, s->frame->linesize[2], tmp2, tmp4,
1836                                                           tmpstride, nPbH / 2);
1837         } else {
1838             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
1839             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1840         }
1841     }
1842 }
1843
1844 /**
1845  * 8.4.1
1846  */
1847 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1848                                 int prev_intra_luma_pred_flag)
1849 {
1850     HEVCLocalContext *lc = &s->HEVClc;
1851     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1852     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1853     int min_pu_width     = s->ps.sps->min_pu_width;
1854     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1855     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1856     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1857
1858     int cand_up   = (lc->ctb_up_flag || y0b) ?
1859                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1860     int cand_left = (lc->ctb_left_flag || x0b) ?
1861                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1862
1863     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1864
1865     MvField *tab_mvf = s->ref->tab_mvf;
1866     int intra_pred_mode;
1867     int candidate[3];
1868     int i, j;
1869
1870     // intra_pred_mode prediction does not cross vertical CTB boundaries
1871     if ((y0 - 1) < y_ctb)
1872         cand_up = INTRA_DC;
1873
1874     if (cand_left == cand_up) {
1875         if (cand_left < 2) {
1876             candidate[0] = INTRA_PLANAR;
1877             candidate[1] = INTRA_DC;
1878             candidate[2] = INTRA_ANGULAR_26;
1879         } else {
1880             candidate[0] = cand_left;
1881             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1882             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1883         }
1884     } else {
1885         candidate[0] = cand_left;
1886         candidate[1] = cand_up;
1887         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1888             candidate[2] = INTRA_PLANAR;
1889         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1890             candidate[2] = INTRA_DC;
1891         } else {
1892             candidate[2] = INTRA_ANGULAR_26;
1893         }
1894     }
1895
1896     if (prev_intra_luma_pred_flag) {
1897         intra_pred_mode = candidate[lc->pu.mpm_idx];
1898     } else {
1899         if (candidate[0] > candidate[1])
1900             FFSWAP(uint8_t, candidate[0], candidate[1]);
1901         if (candidate[0] > candidate[2])
1902             FFSWAP(uint8_t, candidate[0], candidate[2]);
1903         if (candidate[1] > candidate[2])
1904             FFSWAP(uint8_t, candidate[1], candidate[2]);
1905
1906         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1907         for (i = 0; i < 3; i++)
1908             if (intra_pred_mode >= candidate[i])
1909                 intra_pred_mode++;
1910     }
1911
1912     /* write the intra prediction units into the mv array */
1913     if (!size_in_pus)
1914         size_in_pus = 1;
1915     for (i = 0; i < size_in_pus; i++) {
1916         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1917                intra_pred_mode, size_in_pus);
1918
1919         for (j = 0; j < size_in_pus; j++) {
1920             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1921             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1922             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1923             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1924             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1925             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1926             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1927             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1928             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1929         }
1930     }
1931
1932     return intra_pred_mode;
1933 }
1934
1935 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1936                                           int log2_cb_size, int ct_depth)
1937 {
1938     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1939     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1940     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1941     int y;
1942
1943     for (y = 0; y < length; y++)
1944         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1945                ct_depth, length);
1946 }
1947
1948 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1949                                   int log2_cb_size)
1950 {
1951     HEVCLocalContext *lc = &s->HEVClc;
1952     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1953     uint8_t prev_intra_luma_pred_flag[4];
1954     int split   = lc->cu.part_mode == PART_NxN;
1955     int pb_size = (1 << log2_cb_size) >> split;
1956     int side    = split + 1;
1957     int chroma_mode;
1958     int i, j;
1959
1960     for (i = 0; i < side; i++)
1961         for (j = 0; j < side; j++)
1962             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1963
1964     for (i = 0; i < side; i++) {
1965         for (j = 0; j < side; j++) {
1966             if (prev_intra_luma_pred_flag[2 * i + j])
1967                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1968             else
1969                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1970
1971             lc->pu.intra_pred_mode[2 * i + j] =
1972                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1973                                      prev_intra_luma_pred_flag[2 * i + j]);
1974         }
1975     }
1976
1977     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1978     if (chroma_mode != 4) {
1979         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1980             lc->pu.intra_pred_mode_c = 34;
1981         else
1982             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1983     } else {
1984         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1985     }
1986 }
1987
1988 static void intra_prediction_unit_default_value(HEVCContext *s,
1989                                                 int x0, int y0,
1990                                                 int log2_cb_size)
1991 {
1992     HEVCLocalContext *lc = &s->HEVClc;
1993     int pb_size          = 1 << log2_cb_size;
1994     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1995     int min_pu_width     = s->ps.sps->min_pu_width;
1996     MvField *tab_mvf     = s->ref->tab_mvf;
1997     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1998     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1999     int j, k;
2000
2001     if (size_in_pus == 0)
2002         size_in_pus = 1;
2003     for (j = 0; j < size_in_pus; j++) {
2004         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2005         for (k = 0; k < size_in_pus; k++)
2006             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2007     }
2008 }
2009
2010 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2011 {
2012     int cb_size          = 1 << log2_cb_size;
2013     HEVCLocalContext *lc = &s->HEVClc;
2014     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2015     int length           = cb_size >> log2_min_cb_size;
2016     int min_cb_width     = s->ps.sps->min_cb_width;
2017     int x_cb             = x0 >> log2_min_cb_size;
2018     int y_cb             = y0 >> log2_min_cb_size;
2019     int x, y, ret;
2020
2021     lc->cu.x                = x0;
2022     lc->cu.y                = y0;
2023     lc->cu.pred_mode        = MODE_INTRA;
2024     lc->cu.part_mode        = PART_2Nx2N;
2025     lc->cu.intra_split_flag = 0;
2026
2027     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2028     for (x = 0; x < 4; x++)
2029         lc->pu.intra_pred_mode[x] = 1;
2030     if (s->ps.pps->transquant_bypass_enable_flag) {
2031         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2032         if (lc->cu.cu_transquant_bypass_flag)
2033             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2034     } else
2035         lc->cu.cu_transquant_bypass_flag = 0;
2036
2037     if (s->sh.slice_type != I_SLICE) {
2038         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2039
2040         x = y_cb * min_cb_width + x_cb;
2041         for (y = 0; y < length; y++) {
2042             memset(&s->skip_flag[x], skip_flag, length);
2043             x += min_cb_width;
2044         }
2045         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2046     }
2047
2048     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2049         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2050         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2051
2052         if (!s->sh.disable_deblocking_filter_flag)
2053             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2054     } else {
2055         int pcm_flag = 0;
2056
2057         if (s->sh.slice_type != I_SLICE)
2058             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2059         if (lc->cu.pred_mode != MODE_INTRA ||
2060             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2061             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2062             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2063                                       lc->cu.pred_mode == MODE_INTRA;
2064         }
2065
2066         if (lc->cu.pred_mode == MODE_INTRA) {
2067             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2068                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2069                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2070                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2071             }
2072             if (pcm_flag) {
2073                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2074                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2075                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2076                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2077
2078                 if (ret < 0)
2079                     return ret;
2080             } else {
2081                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2082             }
2083         } else {
2084             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2085             switch (lc->cu.part_mode) {
2086             case PART_2Nx2N:
2087                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2088                 break;
2089             case PART_2NxN:
2090                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2091                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2092                 break;
2093             case PART_Nx2N:
2094                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2095                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2096                 break;
2097             case PART_2NxnU:
2098                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2099                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2100                 break;
2101             case PART_2NxnD:
2102                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2103                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2104                 break;
2105             case PART_nLx2N:
2106                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2107                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2108                 break;
2109             case PART_nRx2N:
2110                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2111                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2112                 break;
2113             case PART_NxN:
2114                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2115                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2116                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2117                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2118                 break;
2119             }
2120         }
2121
2122         if (!pcm_flag) {
2123             int rqt_root_cbf = 1;
2124
2125             if (lc->cu.pred_mode != MODE_INTRA &&
2126                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2127                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2128             }
2129             if (rqt_root_cbf) {
2130                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2131                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2132                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2133                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2134                                          log2_cb_size,
2135                                          log2_cb_size, 0, 0, 0, 0);
2136                 if (ret < 0)
2137                     return ret;
2138             } else {
2139                 if (!s->sh.disable_deblocking_filter_flag)
2140                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2141             }
2142         }
2143     }
2144
2145     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2146         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2147
2148     x = y_cb * min_cb_width + x_cb;
2149     for (y = 0; y < length; y++) {
2150         memset(&s->qp_y_tab[x], lc->qp_y, length);
2151         x += min_cb_width;
2152     }
2153
2154     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2155
2156     return 0;
2157 }
2158
2159 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2160                                int log2_cb_size, int cb_depth)
2161 {
2162     HEVCLocalContext *lc = &s->HEVClc;
2163     const int cb_size    = 1 << log2_cb_size;
2164     int split_cu;
2165
2166     lc->ct.depth = cb_depth;
2167     if (x0 + cb_size <= s->ps.sps->width  &&
2168         y0 + cb_size <= s->ps.sps->height &&
2169         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2170         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2171     } else {
2172         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2173     }
2174     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2175         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2176         lc->tu.is_cu_qp_delta_coded = 0;
2177         lc->tu.cu_qp_delta          = 0;
2178     }
2179
2180     if (split_cu) {
2181         const int cb_size_split = cb_size >> 1;
2182         const int x1 = x0 + cb_size_split;
2183         const int y1 = y0 + cb_size_split;
2184
2185         log2_cb_size--;
2186         cb_depth++;
2187
2188 #define SUBDIVIDE(x, y)                                                \
2189 do {                                                                   \
2190     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2191         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2192         if (ret < 0)                                                   \
2193             return ret;                                                \
2194     }                                                                  \
2195 } while (0)
2196
2197         SUBDIVIDE(x0, y0);
2198         SUBDIVIDE(x1, y0);
2199         SUBDIVIDE(x0, y1);
2200         SUBDIVIDE(x1, y1);
2201     } else {
2202         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2203         if (ret < 0)
2204             return ret;
2205     }
2206
2207     return 0;
2208 }
2209
2210 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2211                                  int ctb_addr_ts)
2212 {
2213     HEVCLocalContext *lc  = &s->HEVClc;
2214     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2215     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2216     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2217
2218     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2219
2220     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2221         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2222             lc->first_qp_group = 1;
2223         lc->end_of_tiles_x = s->ps.sps->width;
2224     } else if (s->ps.pps->tiles_enabled_flag) {
2225         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2226             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2227             lc->start_of_tiles_x = x_ctb;
2228             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2229             lc->first_qp_group   = 1;
2230         }
2231     } else {
2232         lc->end_of_tiles_x = s->ps.sps->width;
2233     }
2234
2235     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2236
2237     lc->boundary_flags = 0;
2238     if (s->ps.pps->tiles_enabled_flag) {
2239         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2240             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2241         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2242             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2243         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2244             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2245         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2246             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2247     } else {
2248         if (!ctb_addr_in_slice)
2249             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2250         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2251             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2252     }
2253
2254     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2255     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2256     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2257     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2258 }
2259
2260 static int hls_slice_data(HEVCContext *s)
2261 {
2262     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2263     int more_data   = 1;
2264     int x_ctb       = 0;
2265     int y_ctb       = 0;
2266     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2267     int ret;
2268
2269     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2270         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2271
2272         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2273         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2274         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2275
2276         ff_hevc_cabac_init(s, ctb_addr_ts);
2277
2278         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2279
2280         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2281         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2282         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2283
2284         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2285         if (ret < 0)
2286             return ret;
2287         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2288
2289         ctb_addr_ts++;
2290         ff_hevc_save_states(s, ctb_addr_ts);
2291         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2292     }
2293
2294     if (x_ctb + ctb_size >= s->ps.sps->width &&
2295         y_ctb + ctb_size >= s->ps.sps->height)
2296         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2297
2298     return ctb_addr_ts;
2299 }
2300
2301 static void restore_tqb_pixels(HEVCContext *s)
2302 {
2303     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2304     int x, y, c_idx;
2305
2306     for (c_idx = 0; c_idx < 3; c_idx++) {
2307         ptrdiff_t stride = s->frame->linesize[c_idx];
2308         int hshift       = s->ps.sps->hshift[c_idx];
2309         int vshift       = s->ps.sps->vshift[c_idx];
2310         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2311             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2312                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2313                     int n;
2314                     int len      = min_pu_size >> hshift;
2315                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2316                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2317                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2318                         memcpy(dst, src, len);
2319                         src += stride;
2320                         dst += stride;
2321                     }
2322                 }
2323             }
2324         }
2325     }
2326 }
2327
2328 static int set_side_data(HEVCContext *s)
2329 {
2330     AVFrame *out = s->ref->frame;
2331
2332     if (s->sei_frame_packing_present &&
2333         s->frame_packing_arrangement_type >= 3 &&
2334         s->frame_packing_arrangement_type <= 5 &&
2335         s->content_interpretation_type > 0 &&
2336         s->content_interpretation_type < 3) {
2337         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2338         if (!stereo)
2339             return AVERROR(ENOMEM);
2340
2341         switch (s->frame_packing_arrangement_type) {
2342         case 3:
2343             if (s->quincunx_subsampling)
2344                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2345             else
2346                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2347             break;
2348         case 4:
2349             stereo->type = AV_STEREO3D_TOPBOTTOM;
2350             break;
2351         case 5:
2352             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2353             break;
2354         }
2355
2356         if (s->content_interpretation_type == 2)
2357             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2358     }
2359
2360     if (s->sei_display_orientation_present &&
2361         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2362         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2363         AVFrameSideData *rotation = av_frame_new_side_data(out,
2364                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2365                                                            sizeof(int32_t) * 9);
2366         if (!rotation)
2367             return AVERROR(ENOMEM);
2368
2369         av_display_rotation_set((int32_t *)rotation->data, angle);
2370         av_display_matrix_flip((int32_t *)rotation->data,
2371                                s->sei_hflip, s->sei_vflip);
2372     }
2373
2374     return 0;
2375 }
2376
2377 static int hevc_frame_start(HEVCContext *s)
2378 {
2379     HEVCLocalContext *lc = &s->HEVClc;
2380     int ret;
2381
2382     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2383     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2384     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2385     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2386
2387     lc->start_of_tiles_x = 0;
2388     s->is_decoded        = 0;
2389     s->first_nal_type    = s->nal_unit_type;
2390
2391     if (s->ps.pps->tiles_enabled_flag)
2392         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2393
2394     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2395                               s->poc);
2396     if (ret < 0)
2397         goto fail;
2398
2399     ret = ff_hevc_frame_rps(s);
2400     if (ret < 0) {
2401         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2402         goto fail;
2403     }
2404
2405     s->ref->frame->key_frame = IS_IRAP(s);
2406
2407     ret = set_side_data(s);
2408     if (ret < 0)
2409         goto fail;
2410
2411     av_frame_unref(s->output_frame);
2412     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2413     if (ret < 0)
2414         goto fail;
2415
2416     ff_thread_finish_setup(s->avctx);
2417
2418     return 0;
2419
2420 fail:
2421     if (s->ref)
2422         ff_hevc_unref_frame(s, s->ref, ~0);
2423     s->ref = NULL;
2424     return ret;
2425 }
2426
2427 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2428 {
2429     HEVCLocalContext *lc = &s->HEVClc;
2430     GetBitContext *gb    = &lc->gb;
2431     int ctb_addr_ts, ret;
2432
2433     *gb              = nal->gb;
2434     s->nal_unit_type = nal->type;
2435     s->temporal_id   = nal->temporal_id;
2436
2437     switch (s->nal_unit_type) {
2438     case NAL_VPS:
2439         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2440         if (ret < 0)
2441             goto fail;
2442         break;
2443     case NAL_SPS:
2444         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2445                                      s->apply_defdispwin);
2446         if (ret < 0)
2447             goto fail;
2448         break;
2449     case NAL_PPS:
2450         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2451         if (ret < 0)
2452             goto fail;
2453         break;
2454     case NAL_SEI_PREFIX:
2455     case NAL_SEI_SUFFIX:
2456         ret = ff_hevc_decode_nal_sei(s);
2457         if (ret < 0)
2458             goto fail;
2459         break;
2460     case NAL_TRAIL_R:
2461     case NAL_TRAIL_N:
2462     case NAL_TSA_N:
2463     case NAL_TSA_R:
2464     case NAL_STSA_N:
2465     case NAL_STSA_R:
2466     case NAL_BLA_W_LP:
2467     case NAL_BLA_W_RADL:
2468     case NAL_BLA_N_LP:
2469     case NAL_IDR_W_RADL:
2470     case NAL_IDR_N_LP:
2471     case NAL_CRA_NUT:
2472     case NAL_RADL_N:
2473     case NAL_RADL_R:
2474     case NAL_RASL_N:
2475     case NAL_RASL_R:
2476         ret = hls_slice_header(s);
2477         if (ret < 0)
2478             return ret;
2479
2480         if (s->max_ra == INT_MAX) {
2481             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2482                 s->max_ra = s->poc;
2483             } else {
2484                 if (IS_IDR(s))
2485                     s->max_ra = INT_MIN;
2486             }
2487         }
2488
2489         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2490             s->poc <= s->max_ra) {
2491             s->is_decoded = 0;
2492             break;
2493         } else {
2494             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2495                 s->max_ra = INT_MIN;
2496         }
2497
2498         if (s->sh.first_slice_in_pic_flag) {
2499             ret = hevc_frame_start(s);
2500             if (ret < 0)
2501                 return ret;
2502         } else if (!s->ref) {
2503             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2504             goto fail;
2505         }
2506
2507         if (s->nal_unit_type != s->first_nal_type) {
2508             av_log(s->avctx, AV_LOG_ERROR,
2509                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2510                    s->first_nal_type, s->nal_unit_type);
2511             return AVERROR_INVALIDDATA;
2512         }
2513
2514         if (!s->sh.dependent_slice_segment_flag &&
2515             s->sh.slice_type != I_SLICE) {
2516             ret = ff_hevc_slice_rpl(s);
2517             if (ret < 0) {
2518                 av_log(s->avctx, AV_LOG_WARNING,
2519                        "Error constructing the reference lists for the current slice.\n");
2520                 goto fail;
2521             }
2522         }
2523
2524         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2525             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2526             if (ret < 0)
2527                 goto fail;
2528         }
2529
2530         if (s->avctx->hwaccel) {
2531             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2532             if (ret < 0)
2533                 goto fail;
2534         } else {
2535             ctb_addr_ts = hls_slice_data(s);
2536             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2537                 s->is_decoded = 1;
2538                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2539                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2540                     s->ps.sps->sao_enabled)
2541                     restore_tqb_pixels(s);
2542             }
2543
2544             if (ctb_addr_ts < 0) {
2545                 ret = ctb_addr_ts;
2546                 goto fail;
2547             }
2548         }
2549         break;
2550     case NAL_EOS_NUT:
2551     case NAL_EOB_NUT:
2552         s->seq_decode = (s->seq_decode + 1) & 0xff;
2553         s->max_ra     = INT_MAX;
2554         break;
2555     case NAL_AUD:
2556     case NAL_FD_NUT:
2557         break;
2558     default:
2559         av_log(s->avctx, AV_LOG_INFO,
2560                "Skipping NAL unit %d\n", s->nal_unit_type);
2561     }
2562
2563     return 0;
2564 fail:
2565     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2566         return ret;
2567     return 0;
2568 }
2569
2570 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2571 {
2572     int i, ret = 0;
2573
2574     s->ref = NULL;
2575     s->eos = 0;
2576
2577     /* split the input packet into NAL units, so we know the upper bound on the
2578      * number of slices in the frame */
2579     ret = ff_hevc_split_packet(&s->pkt, buf, length, s->avctx, s->is_nalff,
2580                                s->nal_length_size);
2581     if (ret < 0) {
2582         av_log(s->avctx, AV_LOG_ERROR,
2583                "Error splitting the input into NAL units.\n");
2584         return ret;
2585     }
2586
2587     for (i = 0; i < s->pkt.nb_nals; i++) {
2588         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2589             s->pkt.nals[i].type == NAL_EOS_NUT)
2590             s->eos = 1;
2591     }
2592
2593     /* decode the NAL units */
2594     for (i = 0; i < s->pkt.nb_nals; i++) {
2595         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2596         if (ret < 0) {
2597             av_log(s->avctx, AV_LOG_WARNING,
2598                    "Error parsing NAL unit #%d.\n", i);
2599             goto fail;
2600         }
2601     }
2602
2603 fail:
2604     if (s->ref)
2605         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2606
2607     return ret;
2608 }
2609
2610 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2611 {
2612     int i;
2613     for (i = 0; i < 16; i++)
2614         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2615 }
2616
2617 static int verify_md5(HEVCContext *s, AVFrame *frame)
2618 {
2619     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2620     int pixel_shift;
2621     int i, j;
2622
2623     if (!desc)
2624         return AVERROR(EINVAL);
2625
2626     pixel_shift = desc->comp[0].depth > 8;
2627
2628     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2629            s->poc);
2630
2631     /* the checksums are LE, so we have to byteswap for >8bpp formats
2632      * on BE arches */
2633 #if HAVE_BIGENDIAN
2634     if (pixel_shift && !s->checksum_buf) {
2635         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2636                        FFMAX3(frame->linesize[0], frame->linesize[1],
2637                               frame->linesize[2]));
2638         if (!s->checksum_buf)
2639             return AVERROR(ENOMEM);
2640     }
2641 #endif
2642
2643     for (i = 0; frame->data[i]; i++) {
2644         int width  = s->avctx->coded_width;
2645         int height = s->avctx->coded_height;
2646         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2647         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2648         uint8_t md5[16];
2649
2650         av_md5_init(s->md5_ctx);
2651         for (j = 0; j < h; j++) {
2652             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2653 #if HAVE_BIGENDIAN
2654             if (pixel_shift) {
2655                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2656                                     (const uint16_t *) src, w);
2657                 src = s->checksum_buf;
2658             }
2659 #endif
2660             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2661         }
2662         av_md5_final(s->md5_ctx, md5);
2663
2664         if (!memcmp(md5, s->md5[i], 16)) {
2665             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2666             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2667             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2668         } else {
2669             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2670             print_md5(s->avctx, AV_LOG_ERROR, md5);
2671             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2672             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2673             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2674             return AVERROR_INVALIDDATA;
2675         }
2676     }
2677
2678     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2679
2680     return 0;
2681 }
2682
2683 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2684                              AVPacket *avpkt)
2685 {
2686     int ret;
2687     HEVCContext *s = avctx->priv_data;
2688
2689     if (!avpkt->size) {
2690         ret = ff_hevc_output_frame(s, data, 1);
2691         if (ret < 0)
2692             return ret;
2693
2694         *got_output = ret;
2695         return 0;
2696     }
2697
2698     s->ref = NULL;
2699     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2700     if (ret < 0)
2701         return ret;
2702
2703     if (avctx->hwaccel) {
2704         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2705             av_log(avctx, AV_LOG_ERROR,
2706                    "hardware accelerator failed to decode picture\n");
2707     } else {
2708         /* verify the SEI checksum */
2709         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2710             s->is_md5) {
2711             ret = verify_md5(s, s->ref->frame);
2712             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2713                 ff_hevc_unref_frame(s, s->ref, ~0);
2714                 return ret;
2715             }
2716         }
2717     }
2718     s->is_md5 = 0;
2719
2720     if (s->is_decoded) {
2721         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2722         s->is_decoded = 0;
2723     }
2724
2725     if (s->output_frame->buf[0]) {
2726         av_frame_move_ref(data, s->output_frame);
2727         *got_output = 1;
2728     }
2729
2730     return avpkt->size;
2731 }
2732
2733 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2734 {
2735     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2736     if (ret < 0)
2737         return ret;
2738
2739     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2740     if (!dst->tab_mvf_buf)
2741         goto fail;
2742     dst->tab_mvf = src->tab_mvf;
2743
2744     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2745     if (!dst->rpl_tab_buf)
2746         goto fail;
2747     dst->rpl_tab = src->rpl_tab;
2748
2749     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2750     if (!dst->rpl_buf)
2751         goto fail;
2752
2753     dst->poc        = src->poc;
2754     dst->ctb_count  = src->ctb_count;
2755     dst->window     = src->window;
2756     dst->flags      = src->flags;
2757     dst->sequence   = src->sequence;
2758
2759     if (src->hwaccel_picture_private) {
2760         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2761         if (!dst->hwaccel_priv_buf)
2762             goto fail;
2763         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2764     }
2765
2766     return 0;
2767 fail:
2768     ff_hevc_unref_frame(s, dst, ~0);
2769     return AVERROR(ENOMEM);
2770 }
2771
2772 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2773 {
2774     HEVCContext       *s = avctx->priv_data;
2775     int i;
2776
2777     pic_arrays_free(s);
2778
2779     av_freep(&s->md5_ctx);
2780
2781     av_frame_free(&s->tmp_frame);
2782     av_frame_free(&s->output_frame);
2783
2784     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2785         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2786         av_frame_free(&s->DPB[i].frame);
2787     }
2788
2789     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2790         av_buffer_unref(&s->ps.vps_list[i]);
2791     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2792         av_buffer_unref(&s->ps.sps_list[i]);
2793     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2794         av_buffer_unref(&s->ps.pps_list[i]);
2795
2796     for (i = 0; i < s->pkt.nals_allocated; i++)
2797         av_freep(&s->pkt.nals[i].rbsp_buffer);
2798     av_freep(&s->pkt.nals);
2799     s->pkt.nals_allocated = 0;
2800
2801     return 0;
2802 }
2803
2804 static av_cold int hevc_init_context(AVCodecContext *avctx)
2805 {
2806     HEVCContext *s = avctx->priv_data;
2807     int i;
2808
2809     s->avctx = avctx;
2810
2811     s->tmp_frame = av_frame_alloc();
2812     if (!s->tmp_frame)
2813         goto fail;
2814
2815     s->output_frame = av_frame_alloc();
2816     if (!s->output_frame)
2817         goto fail;
2818
2819     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2820         s->DPB[i].frame = av_frame_alloc();
2821         if (!s->DPB[i].frame)
2822             goto fail;
2823         s->DPB[i].tf.f = s->DPB[i].frame;
2824     }
2825
2826     s->max_ra = INT_MAX;
2827
2828     s->md5_ctx = av_md5_alloc();
2829     if (!s->md5_ctx)
2830         goto fail;
2831
2832     ff_bswapdsp_init(&s->bdsp);
2833
2834     s->context_initialized = 1;
2835
2836     return 0;
2837
2838 fail:
2839     hevc_decode_free(avctx);
2840     return AVERROR(ENOMEM);
2841 }
2842
2843 static int hevc_update_thread_context(AVCodecContext *dst,
2844                                       const AVCodecContext *src)
2845 {
2846     HEVCContext *s  = dst->priv_data;
2847     HEVCContext *s0 = src->priv_data;
2848     int i, ret;
2849
2850     if (!s->context_initialized) {
2851         ret = hevc_init_context(dst);
2852         if (ret < 0)
2853             return ret;
2854     }
2855
2856     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2857         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2858         if (s0->DPB[i].frame->buf[0]) {
2859             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2860             if (ret < 0)
2861                 return ret;
2862         }
2863     }
2864
2865     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2866         av_buffer_unref(&s->ps.vps_list[i]);
2867         if (s0->ps.vps_list[i]) {
2868             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2869             if (!s->ps.vps_list[i])
2870                 return AVERROR(ENOMEM);
2871         }
2872     }
2873
2874     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2875         av_buffer_unref(&s->ps.sps_list[i]);
2876         if (s0->ps.sps_list[i]) {
2877             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2878             if (!s->ps.sps_list[i])
2879                 return AVERROR(ENOMEM);
2880         }
2881     }
2882
2883     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2884         av_buffer_unref(&s->ps.pps_list[i]);
2885         if (s0->ps.pps_list[i]) {
2886             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2887             if (!s->ps.pps_list[i])
2888                 return AVERROR(ENOMEM);
2889         }
2890     }
2891
2892     if (s->ps.sps != s0->ps.sps)
2893         ret = set_sps(s, s0->ps.sps);
2894
2895     s->seq_decode = s0->seq_decode;
2896     s->seq_output = s0->seq_output;
2897     s->pocTid0    = s0->pocTid0;
2898     s->max_ra     = s0->max_ra;
2899
2900     s->is_nalff        = s0->is_nalff;
2901     s->nal_length_size = s0->nal_length_size;
2902
2903     if (s0->eos) {
2904         s->seq_decode = (s->seq_decode + 1) & 0xff;
2905         s->max_ra = INT_MAX;
2906     }
2907
2908     return 0;
2909 }
2910
2911 static int hevc_decode_extradata(HEVCContext *s)
2912 {
2913     AVCodecContext *avctx = s->avctx;
2914     GetByteContext gb;
2915     int ret, i;
2916
2917     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2918
2919     if (avctx->extradata_size > 3 &&
2920         (avctx->extradata[0] || avctx->extradata[1] ||
2921          avctx->extradata[2] > 1)) {
2922         /* It seems the extradata is encoded as hvcC format.
2923          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2924          * is finalized. When finalized, configurationVersion will be 1 and we
2925          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2926         int i, j, num_arrays, nal_len_size;
2927
2928         s->is_nalff = 1;
2929
2930         bytestream2_skip(&gb, 21);
2931         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2932         num_arrays   = bytestream2_get_byte(&gb);
2933
2934         /* nal units in the hvcC always have length coded with 2 bytes,
2935          * so put a fake nal_length_size = 2 while parsing them */
2936         s->nal_length_size = 2;
2937
2938         /* Decode nal units from hvcC. */
2939         for (i = 0; i < num_arrays; i++) {
2940             int type = bytestream2_get_byte(&gb) & 0x3f;
2941             int cnt  = bytestream2_get_be16(&gb);
2942
2943             for (j = 0; j < cnt; j++) {
2944                 // +2 for the nal size field
2945                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2946                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2947                     av_log(s->avctx, AV_LOG_ERROR,
2948                            "Invalid NAL unit size in extradata.\n");
2949                     return AVERROR_INVALIDDATA;
2950                 }
2951
2952                 ret = decode_nal_units(s, gb.buffer, nalsize);
2953                 if (ret < 0) {
2954                     av_log(avctx, AV_LOG_ERROR,
2955                            "Decoding nal unit %d %d from hvcC failed\n",
2956                            type, i);
2957                     return ret;
2958                 }
2959                 bytestream2_skip(&gb, nalsize);
2960             }
2961         }
2962
2963         /* Now store right nal length size, that will be used to parse
2964          * all other nals */
2965         s->nal_length_size = nal_len_size;
2966     } else {
2967         s->is_nalff = 0;
2968         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
2969         if (ret < 0)
2970             return ret;
2971     }
2972
2973     /* export stream parameters from the first SPS */
2974     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2975         if (s->ps.sps_list[i]) {
2976             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2977             export_stream_params(s->avctx, &s->ps, sps);
2978             break;
2979         }
2980     }
2981
2982     return 0;
2983 }
2984
2985 static av_cold int hevc_decode_init(AVCodecContext *avctx)
2986 {
2987     HEVCContext *s = avctx->priv_data;
2988     int ret;
2989
2990     avctx->internal->allocate_progress = 1;
2991
2992     ret = hevc_init_context(avctx);
2993     if (ret < 0)
2994         return ret;
2995
2996     if (avctx->extradata_size > 0 && avctx->extradata) {
2997         ret = hevc_decode_extradata(s);
2998         if (ret < 0) {
2999             hevc_decode_free(avctx);
3000             return ret;
3001         }
3002     }
3003
3004     return 0;
3005 }
3006
3007 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3008 {
3009     HEVCContext *s = avctx->priv_data;
3010     int ret;
3011
3012     memset(s, 0, sizeof(*s));
3013
3014     ret = hevc_init_context(avctx);
3015     if (ret < 0)
3016         return ret;
3017
3018     return 0;
3019 }
3020
3021 static void hevc_decode_flush(AVCodecContext *avctx)
3022 {
3023     HEVCContext *s = avctx->priv_data;
3024     ff_hevc_flush_dpb(s);
3025     s->max_ra = INT_MAX;
3026 }
3027
3028 #define OFFSET(x) offsetof(HEVCContext, x)
3029 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3030
3031 static const AVOption options[] = {
3032     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3033         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3034     { NULL },
3035 };
3036
3037 static const AVClass hevc_decoder_class = {
3038     .class_name = "HEVC decoder",
3039     .item_name  = av_default_item_name,
3040     .option     = options,
3041     .version    = LIBAVUTIL_VERSION_INT,
3042 };
3043
3044 AVCodec ff_hevc_decoder = {
3045     .name                  = "hevc",
3046     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3047     .type                  = AVMEDIA_TYPE_VIDEO,
3048     .id                    = AV_CODEC_ID_HEVC,
3049     .priv_data_size        = sizeof(HEVCContext),
3050     .priv_class            = &hevc_decoder_class,
3051     .init                  = hevc_decode_init,
3052     .close                 = hevc_decode_free,
3053     .decode                = hevc_decode_frame,
3054     .flush                 = hevc_decode_flush,
3055     .update_thread_context = hevc_update_thread_context,
3056     .init_thread_copy      = hevc_init_thread_copy,
3057     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3058                              AV_CODEC_CAP_FRAME_THREADS,
3059     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3060 };