git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  86     0, 0, 1, 0,
  87     1, 2, 0, 1,
  88     2, 3, 1, 2,
  89     3, 2, 3, 3,
  90 };
  91
  92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  93     0, 1, 0, 2,
  94     1, 0, 3, 2,
  95     1, 0, 3, 2,
  96     1, 3, 2, 3,
  97 };
  98
  99 static const uint8_t diag_scan4x4_inv[4][4] = {
 100     { 0,  2,  5,  9, },
 101     { 1,  4,  8, 12, },
 102     { 3,  7, 11, 14, },
 103     { 6, 10, 13, 15, },
 104 };
 105
 106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 107     0, 0, 1, 0,
 108     1, 2, 0, 1,
 109     2, 3, 0, 1,
 110     2, 3, 4, 0,
 111     1, 2, 3, 4,
 112     5, 0, 1, 2,
 113     3, 4, 5, 6,
 114     0, 1, 2, 3,
 115     4, 5, 6, 7,
 116     1, 2, 3, 4,
 117     5, 6, 7, 2,
 118     3, 4, 5, 6,
 119     7, 3, 4, 5,
 120     6, 7, 4, 5,
 121     6, 7, 5, 6,
 122     7, 6, 7, 7,
 123 };
 124
 125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 126     0, 1, 0, 2,
 127     1, 0, 3, 2,
 128     1, 0, 4, 3,
 129     2, 1, 0, 5,
 130     4, 3, 2, 1,
 131     0, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 0,
 135     7, 6, 5, 4,
 136     3, 2, 1, 7,
 137     6, 5, 4, 3,
 138     2, 7, 6, 5,
 139     4, 3, 7, 6,
 140     5, 4, 7, 6,
 141     5, 7, 6, 7,
 142 };
 143
 144 static const uint8_t diag_scan8x8_inv[8][8] = {
 145     {  0,  2,  5,  9, 14, 20, 27, 35, },
 146     {  1,  4,  8, 13, 19, 26, 34, 42, },
 147     {  3,  7, 12, 18, 25, 33, 41, 48, },
 148     {  6, 11, 17, 24, 32, 40, 47, 53, },
 149     { 10, 16, 23, 31, 39, 46, 52, 57, },
 150     { 15, 22, 30, 38, 45, 51, 56, 60, },
 151     { 21, 29, 37, 44, 50, 55, 59, 62, },
 152     { 28, 36, 43, 49, 54, 58, 61, 63, },
 153 };
 154
 155 /**
 156  * NOTE: Each function hls_foo correspond to the function foo in the
 157  * specification (HLS stands for High Level Syntax).
 158  */
 159
 160 /**
 161  * Section 5.7
 162  */
 163
 164 /* free everything allocated  by pic_arrays_init() */
 165 static void pic_arrays_free(HEVCContext *s)
 166 {
 167     av_freep(&s->sao);
 168     av_freep(&s->deblock);
 169     av_freep(&s->split_cu_flag);
 170
 171     av_freep(&s->skip_flag);
 172     av_freep(&s->tab_ct_depth);
 173
 174     av_freep(&s->tab_ipm);
 175     av_freep(&s->cbf_luma);
 176     av_freep(&s->is_pcm);
 177
 178     av_freep(&s->qp_y_tab);
 179     av_freep(&s->tab_slice_address);
 180     av_freep(&s->filter_slice_edges);
 181
 182     av_freep(&s->horizontal_bs);
 183     av_freep(&s->vertical_bs);
 184
 185     av_buffer_pool_uninit(&s->tab_mvf_pool);
 186     av_buffer_pool_uninit(&s->rpl_tab_pool);
 187 }
 188
 189 /* allocate arrays that depend on frame dimensions */
 190 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 191 {
 192     int log2_min_cb_size = sps->log2_min_cb_size;
 193     int width            = sps->width;
 194     int height           = sps->height;
 195     int pic_size         = width * height;
 196     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 197                            ((height >> log2_min_cb_size) + 1);
 198     int ctb_count        = sps->ctb_width * sps->ctb_height;
 199     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 200
 201     s->bs_width  = width  >> 3;
 202     s->bs_height = height >> 3;
 203
 204     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 205     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 206     s->split_cu_flag = av_malloc(pic_size);
 207     if (!s->sao || !s->deblock || !s->split_cu_flag)
 208         goto fail;
 209
 210     s->skip_flag    = av_malloc(pic_size_in_ctb);
 211     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 212     if (!s->skip_flag || !s->tab_ct_depth)
 213         goto fail;
 214
 215     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 216     s->tab_ipm  = av_mallocz(min_pu_size);
 217     s->is_pcm   = av_malloc(min_pu_size);
 218     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 219         goto fail;
 220
 221     s->filter_slice_edges = av_malloc(ctb_count);
 222     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 223                                       sizeof(*s->tab_slice_address));
 224     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 225                                       sizeof(*s->qp_y_tab));
 226     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 227         goto fail;
 228
 229     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 231     if (!s->horizontal_bs || !s->vertical_bs)
 232         goto fail;
 233
 234     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 235                                           av_buffer_alloc);
 236     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 237                                           av_buffer_allocz);
 238     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 239         goto fail;
 240
 241     return 0;
 242
 243 fail:
 244     pic_arrays_free(s);
 245     return AVERROR(ENOMEM);
 246 }
 247
 248 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 249 {
 250     int i = 0;
 251     int j = 0;
 252     uint8_t luma_weight_l0_flag[16];
 253     uint8_t chroma_weight_l0_flag[16];
 254     uint8_t luma_weight_l1_flag[16];
 255     uint8_t chroma_weight_l1_flag[16];
 256
 257     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 258     if (s->sps->chroma_format_idc != 0) {
 259         int delta = get_se_golomb(gb);
 260         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 261     }
 262
 263     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 264         luma_weight_l0_flag[i] = get_bits1(gb);
 265         if (!luma_weight_l0_flag[i]) {
 266             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 267             s->sh.luma_offset_l0[i] = 0;
 268         }
 269     }
 270     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 272             chroma_weight_l0_flag[i] = get_bits1(gb);
 273     } else {
 274         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 275             chroma_weight_l0_flag[i] = 0;
 276     }
 277     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 278         if (luma_weight_l0_flag[i]) {
 279             int delta_luma_weight_l0 = get_se_golomb(gb);
 280             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 281             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 282         }
 283         if (chroma_weight_l0_flag[i]) {
 284             for (j = 0; j < 2; j++) {
 285                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 286                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 287                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 288                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 289                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 290             }
 291         } else {
 292             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 293             s->sh.chroma_offset_l0[i][0] = 0;
 294             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 295             s->sh.chroma_offset_l0[i][1] = 0;
 296         }
 297     }
 298     if (s->sh.slice_type == B_SLICE) {
 299         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 300             luma_weight_l1_flag[i] = get_bits1(gb);
 301             if (!luma_weight_l1_flag[i]) {
 302                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 303                 s->sh.luma_offset_l1[i] = 0;
 304             }
 305         }
 306         if (s->sps->chroma_format_idc != 0) {
 307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 308                 chroma_weight_l1_flag[i] = get_bits1(gb);
 309         } else {
 310             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 311                 chroma_weight_l1_flag[i] = 0;
 312         }
 313         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 314             if (luma_weight_l1_flag[i]) {
 315                 int delta_luma_weight_l1 = get_se_golomb(gb);
 316                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 317                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 318             }
 319             if (chroma_weight_l1_flag[i]) {
 320                 for (j = 0; j < 2; j++) {
 321                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 322                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 323                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 324                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 325                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 326                 }
 327             } else {
 328                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 329                 s->sh.chroma_offset_l1[i][0] = 0;
 330                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 331                 s->sh.chroma_offset_l1[i][1] = 0;
 332             }
 333         }
 334     }
 335 }
 336
 337 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 338 {
 339     const HEVCSPS *sps = s->sps;
 340     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 341     int prev_delta_msb = 0;
 342     unsigned int nb_sps = 0, nb_sh;
 343     int i;
 344
 345     rps->nb_refs = 0;
 346     if (!sps->long_term_ref_pics_present_flag)
 347         return 0;
 348
 349     if (sps->num_long_term_ref_pics_sps > 0)
 350         nb_sps = get_ue_golomb_long(gb);
 351     nb_sh = get_ue_golomb_long(gb);
 352
 353     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 354         return AVERROR_INVALIDDATA;
 355
 356     rps->nb_refs = nb_sh + nb_sps;
 357
 358     for (i = 0; i < rps->nb_refs; i++) {
 359         uint8_t delta_poc_msb_present;
 360
 361         if (i < nb_sps) {
 362             uint8_t lt_idx_sps = 0;
 363
 364             if (sps->num_long_term_ref_pics_sps > 1)
 365                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 366
 367             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 368             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 369         } else {
 370             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 371             rps->used[i] = get_bits1(gb);
 372         }
 373
 374         delta_poc_msb_present = get_bits1(gb);
 375         if (delta_poc_msb_present) {
 376             int delta = get_ue_golomb_long(gb);
 377
 378             if (i && i != nb_sps)
 379                 delta += prev_delta_msb;
 380
 381             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 382             prev_delta_msb = delta;
 383         }
 384     }
 385
 386     return 0;
 387 }
 388
 389 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 390 {
 391     int ret;
 392     unsigned int num = 0, den = 0;
 393
 394     pic_arrays_free(s);
 395     ret = pic_arrays_init(s, sps);
 396     if (ret < 0)
 397         goto fail;
 398
 399     s->avctx->coded_width         = sps->width;
 400     s->avctx->coded_height        = sps->height;
 401     s->avctx->width               = sps->output_width;
 402     s->avctx->height              = sps->output_height;
 403     s->avctx->pix_fmt             = sps->pix_fmt;
 404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 405
 406     ff_set_sar(s->avctx, sps->vui.sar);
 407
 408     if (sps->vui.video_signal_type_present_flag)
 409         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 410                                                                : AVCOL_RANGE_MPEG;
 411     else
 412         s->avctx->color_range = AVCOL_RANGE_MPEG;
 413
 414     if (sps->vui.colour_description_present_flag) {
 415         s->avctx->color_primaries = sps->vui.colour_primaries;
 416         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 417         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 418     } else {
 419         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 420         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 421         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 422     }
 423
 424     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 425     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 426     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 427
 428     if (sps->sao_enabled) {
 429         av_frame_unref(s->tmp_frame);
 430         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 431         if (ret < 0)
 432             goto fail;
 433         s->frame = s->tmp_frame;
 434     }
 435
 436     s->sps = sps;
 437     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 438
 439     if (s->vps->vps_timing_info_present_flag) {
 440         num = s->vps->vps_num_units_in_tick;
 441         den = s->vps->vps_time_scale;
 442     } else if (sps->vui.vui_timing_info_present_flag) {
 443         num = sps->vui.vui_num_units_in_tick;
 444         den = sps->vui.vui_time_scale;
 445     }
 446
 447     if (num != 0 && den != 0)
 448         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 449                   num, den, 1 << 30);
 450
 451     return 0;
 452
 453 fail:
 454     pic_arrays_free(s);
 455     s->sps = NULL;
 456     return ret;
 457 }
 458
 459 static int hls_slice_header(HEVCContext *s)
 460 {
 461     GetBitContext *gb = &s->HEVClc.gb;
 462     SliceHeader *sh   = &s->sh;
 463     int i, ret;
 464
 465     // Coded parameters
 466     sh->first_slice_in_pic_flag = get_bits1(gb);
 467     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 468         s->seq_decode = (s->seq_decode + 1) & 0xff;
 469         s->max_ra     = INT_MAX;
 470         if (IS_IDR(s))
 471             ff_hevc_clear_refs(s);
 472     }
 473     if (IS_IRAP(s))
 474         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 475
 476     sh->pps_id = get_ue_golomb_long(gb);
 477     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 478         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 479         return AVERROR_INVALIDDATA;
 480     }
 481     if (!sh->first_slice_in_pic_flag &&
 482         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 483         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 484         return AVERROR_INVALIDDATA;
 485     }
 486     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 487
 488     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 489         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 490
 491         ff_hevc_clear_refs(s);
 492         ret = set_sps(s, s->sps);
 493         if (ret < 0)
 494             return ret;
 495
 496         s->seq_decode = (s->seq_decode + 1) & 0xff;
 497         s->max_ra     = INT_MAX;
 498     }
 499
 500     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 501     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 502
 503     sh->dependent_slice_segment_flag = 0;
 504     if (!sh->first_slice_in_pic_flag) {
 505         int slice_address_length;
 506
 507         if (s->pps->dependent_slice_segments_enabled_flag)
 508             sh->dependent_slice_segment_flag = get_bits1(gb);
 509
 510         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 511                                             s->sps->ctb_height);
 512         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 513         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 514             av_log(s->avctx, AV_LOG_ERROR,
 515                    "Invalid slice segment address: %u.\n",
 516                    sh->slice_segment_addr);
 517             return AVERROR_INVALIDDATA;
 518         }
 519
 520         if (!sh->dependent_slice_segment_flag) {
 521             sh->slice_addr = sh->slice_segment_addr;
 522             s->slice_idx++;
 523         }
 524     } else {
 525         sh->slice_segment_addr = sh->slice_addr = 0;
 526         s->slice_idx           = 0;
 527         s->slice_initialized   = 0;
 528     }
 529
 530     if (!sh->dependent_slice_segment_flag) {
 531         s->slice_initialized = 0;
 532
 533         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 534             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 535
 536         sh->slice_type = get_ue_golomb_long(gb);
 537         if (!(sh->slice_type == I_SLICE ||
 538               sh->slice_type == P_SLICE ||
 539               sh->slice_type == B_SLICE)) {
 540             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 541                    sh->slice_type);
 542             return AVERROR_INVALIDDATA;
 543         }
 544         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 545             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 546             return AVERROR_INVALIDDATA;
 547         }
 548
 549         if (s->pps->output_flag_present_flag)
 550             sh->pic_output_flag = get_bits1(gb);
 551
 552         if (s->sps->separate_colour_plane_flag)
 553             sh->colour_plane_id = get_bits(gb, 2);
 554
 555         if (!IS_IDR(s)) {
 556             int short_term_ref_pic_set_sps_flag, poc;
 557
 558             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 559             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 560             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 561                 av_log(s->avctx, AV_LOG_WARNING,
 562                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 563                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 564                     return AVERROR_INVALIDDATA;
 565                 poc = s->poc;
 566             }
 567             s->poc = poc;
 568
 569             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 570             if (!short_term_ref_pic_set_sps_flag) {
 571                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 572                 if (ret < 0)
 573                     return ret;
 574
 575                 sh->short_term_rps = &sh->slice_rps;
 576             } else {
 577                 int numbits, rps_idx;
 578
 579                 if (!s->sps->nb_st_rps) {
 580                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 581                     return AVERROR_INVALIDDATA;
 582                 }
 583
 584                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 585                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 586                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 587             }
 588
 589             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 590             if (ret < 0) {
 591                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 592                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 593                     return AVERROR_INVALIDDATA;
 594             }
 595
 596             if (s->sps->sps_temporal_mvp_enabled_flag)
 597                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 598             else
 599                 sh->slice_temporal_mvp_enabled_flag = 0;
 600         } else {
 601             s->sh.short_term_rps = NULL;
 602             s->poc               = 0;
 603         }
 604
 605         /* 8.3.1 */
 606         if (s->temporal_id == 0 &&
 607             s->nal_unit_type != NAL_TRAIL_N &&
 608             s->nal_unit_type != NAL_TSA_N   &&
 609             s->nal_unit_type != NAL_STSA_N  &&
 610             s->nal_unit_type != NAL_RADL_N  &&
 611             s->nal_unit_type != NAL_RADL_R  &&
 612             s->nal_unit_type != NAL_RASL_N  &&
 613             s->nal_unit_type != NAL_RASL_R)
 614             s->pocTid0 = s->poc;
 615
 616         if (s->sps->sao_enabled) {
 617             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 618             sh->slice_sample_adaptive_offset_flag[1] =
 619             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 620         } else {
 621             sh->slice_sample_adaptive_offset_flag[0] = 0;
 622             sh->slice_sample_adaptive_offset_flag[1] = 0;
 623             sh->slice_sample_adaptive_offset_flag[2] = 0;
 624         }
 625
 626         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 627         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 628             int nb_refs;
 629
 630             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 631             if (sh->slice_type == B_SLICE)
 632                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 633
 634             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 635                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 636                 if (sh->slice_type == B_SLICE)
 637                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 638             }
 639             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 640                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 641                        sh->nb_refs[L0], sh->nb_refs[L1]);
 642                 return AVERROR_INVALIDDATA;
 643             }
 644
 645             sh->rpl_modification_flag[0] = 0;
 646             sh->rpl_modification_flag[1] = 0;
 647             nb_refs = ff_hevc_frame_nb_refs(s);
 648             if (!nb_refs) {
 649                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 650                 return AVERROR_INVALIDDATA;
 651             }
 652
 653             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 654                 sh->rpl_modification_flag[0] = get_bits1(gb);
 655                 if (sh->rpl_modification_flag[0]) {
 656                     for (i = 0; i < sh->nb_refs[L0]; i++)
 657                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 658                 }
 659
 660                 if (sh->slice_type == B_SLICE) {
 661                     sh->rpl_modification_flag[1] = get_bits1(gb);
 662                     if (sh->rpl_modification_flag[1] == 1)
 663                         for (i = 0; i < sh->nb_refs[L1]; i++)
 664                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 665                 }
 666             }
 667
 668             if (sh->slice_type == B_SLICE)
 669                 sh->mvd_l1_zero_flag = get_bits1(gb);
 670
 671             if (s->pps->cabac_init_present_flag)
 672                 sh->cabac_init_flag = get_bits1(gb);
 673             else
 674                 sh->cabac_init_flag = 0;
 675
 676             sh->collocated_ref_idx = 0;
 677             if (sh->slice_temporal_mvp_enabled_flag) {
 678                 sh->collocated_list = L0;
 679                 if (sh->slice_type == B_SLICE)
 680                     sh->collocated_list = !get_bits1(gb);
 681
 682                 if (sh->nb_refs[sh->collocated_list] > 1) {
 683                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 684                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 685                         av_log(s->avctx, AV_LOG_ERROR,
 686                                "Invalid collocated_ref_idx: %d.\n",
 687                                sh->collocated_ref_idx);
 688                         return AVERROR_INVALIDDATA;
 689                     }
 690                 }
 691             }
 692
 693             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 694                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 695                 pred_weight_table(s, gb);
 696             }
 697
 698             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 699             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 700                 av_log(s->avctx, AV_LOG_ERROR,
 701                        "Invalid number of merging MVP candidates: %d.\n",
 702                        sh->max_num_merge_cand);
 703                 return AVERROR_INVALIDDATA;
 704             }
 705         }
 706
 707         sh->slice_qp_delta = get_se_golomb(gb);
 708
 709         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 710             sh->slice_cb_qp_offset = get_se_golomb(gb);
 711             sh->slice_cr_qp_offset = get_se_golomb(gb);
 712         } else {
 713             sh->slice_cb_qp_offset = 0;
 714             sh->slice_cr_qp_offset = 0;
 715         }
 716
 717         if (s->pps->deblocking_filter_control_present_flag) {
 718             int deblocking_filter_override_flag = 0;
 719
 720             if (s->pps->deblocking_filter_override_enabled_flag)
 721                 deblocking_filter_override_flag = get_bits1(gb);
 722
 723             if (deblocking_filter_override_flag) {
 724                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 725                 if (!sh->disable_deblocking_filter_flag) {
 726                     sh->beta_offset = get_se_golomb(gb) * 2;
 727                     sh->tc_offset   = get_se_golomb(gb) * 2;
 728                 }
 729             } else {
 730                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 731                 sh->beta_offset                    = s->pps->beta_offset;
 732                 sh->tc_offset                      = s->pps->tc_offset;
 733             }
 734         } else {
 735             sh->disable_deblocking_filter_flag = 0;
 736             sh->beta_offset                    = 0;
 737             sh->tc_offset                      = 0;
 738         }
 739
 740         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 741             (sh->slice_sample_adaptive_offset_flag[0] ||
 742              sh->slice_sample_adaptive_offset_flag[1] ||
 743              !sh->disable_deblocking_filter_flag)) {
 744             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 745         } else {
 746             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 747         }
 748     } else if (!s->slice_initialized) {
 749         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 750         return AVERROR_INVALIDDATA;
 751     }
 752
 753     sh->num_entry_point_offsets = 0;
 754     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 755         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 756         if (sh->num_entry_point_offsets > 0) {
 757             int offset_len = get_ue_golomb_long(gb) + 1;
 758
 759             for (i = 0; i < sh->num_entry_point_offsets; i++)
 760                 skip_bits(gb, offset_len);
 761         }
 762     }
 763
 764     if (s->pps->slice_header_extension_present_flag) {
 765         unsigned int length = get_ue_golomb_long(gb);
 766         for (i = 0; i < length; i++)
 767             skip_bits(gb, 8);  // slice_header_extension_data_byte
 768     }
 769
 770     // Inferred parameters
 771     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 772     if (sh->slice_qp > 51 ||
 773         sh->slice_qp < -s->sps->qp_bd_offset) {
 774         av_log(s->avctx, AV_LOG_ERROR,
 775                "The slice_qp %d is outside the valid range "
 776                "[%d, 51].\n",
 777                sh->slice_qp,
 778                -s->sps->qp_bd_offset);
 779         return AVERROR_INVALIDDATA;
 780     }
 781
 782     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 783
 784     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 785         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 786         return AVERROR_INVALIDDATA;
 787     }
 788
 789     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 790
 791     if (!s->pps->cu_qp_delta_enabled_flag)
 792         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 793                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 794
 795     s->slice_initialized = 1;
 796
 797     return 0;
 798 }
 799
 800 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 801
 802 #define SET_SAO(elem, value)                            \
 803 do {                                                    \
 804     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 805         sao->elem = value;                              \
 806     else if (sao_merge_left_flag)                       \
 807         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 808     else if (sao_merge_up_flag)                         \
 809         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 810     else                                                \
 811         sao->elem = 0;                                  \
 812 } while (0)
 813
 814 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 815 {
 816     HEVCLocalContext *lc    = &s->HEVClc;
 817     int sao_merge_left_flag = 0;
 818     int sao_merge_up_flag   = 0;
 819     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 820     SAOParams *sao          = &CTB(s->sao, rx, ry);
 821     int c_idx, i;
 822
 823     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 824         s->sh.slice_sample_adaptive_offset_flag[1]) {
 825         if (rx > 0) {
 826             if (lc->ctb_left_flag)
 827                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 828         }
 829         if (ry > 0 && !sao_merge_left_flag) {
 830             if (lc->ctb_up_flag)
 831                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 832         }
 833     }
 834
 835     for (c_idx = 0; c_idx < 3; c_idx++) {
 836         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 837             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 838             continue;
 839         }
 840
 841         if (c_idx == 2) {
 842             sao->type_idx[2] = sao->type_idx[1];
 843             sao->eo_class[2] = sao->eo_class[1];
 844         } else {
 845             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 846         }
 847
 848         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 849             continue;
 850
 851         for (i = 0; i < 4; i++)
 852             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 853
 854         if (sao->type_idx[c_idx] == SAO_BAND) {
 855             for (i = 0; i < 4; i++) {
 856                 if (sao->offset_abs[c_idx][i]) {
 857                     SET_SAO(offset_sign[c_idx][i],
 858                             ff_hevc_sao_offset_sign_decode(s));
 859                 } else {
 860                     sao->offset_sign[c_idx][i] = 0;
 861                 }
 862             }
 863             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 864         } else if (c_idx != 2) {
 865             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 866         }
 867
 868         // Inferred parameters
 869         sao->offset_val[c_idx][0] = 0;
 870         for (i = 0; i < 4; i++) {
 871             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 872             if (sao->type_idx[c_idx] == SAO_EDGE) {
 873                 if (i > 1)
 874                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 875             } else if (sao->offset_sign[c_idx][i]) {
 876                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 877             }
 878         }
 879     }
 880 }
 881
 882 #undef SET_SAO
 883 #undef CTB
 884
 885 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 886                                 int log2_trafo_size, enum ScanType scan_idx,
 887                                 int c_idx)
 888 {
 889 #define GET_COORD(offset, n)                                    \
 890     do {                                                        \
 891         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 892         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 893     } while (0)
 894     HEVCLocalContext *lc    = &s->HEVClc;
 895     int transform_skip_flag = 0;
 896
 897     int last_significant_coeff_x, last_significant_coeff_y;
 898     int last_scan_pos;
 899     int n_end;
 900     int num_coeff    = 0;
 901     int greater1_ctx = 1;
 902
 903     int num_last_subset;
 904     int x_cg_last_sig, y_cg_last_sig;
 905
 906     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 907
 908     ptrdiff_t stride = s->frame->linesize[c_idx];
 909     int hshift       = s->sps->hshift[c_idx];
 910     int vshift       = s->sps->vshift[c_idx];
 911     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 912                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 913     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 914     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 915
 916     int trafo_size = 1 << log2_trafo_size;
 917     int i, qp, shift, add, scale, scale_m;
 918     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 919     const uint8_t *scale_matrix;
 920     uint8_t dc_scale;
 921
 922     // Derive QP for dequant
 923     if (!lc->cu.cu_transquant_bypass_flag) {
 924         static const int qp_c[] = {
 925             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 926         };
 927
 928         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 929             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 930             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 931             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 932         };
 933
 934         static const uint8_t div6[51 + 2 * 6 + 1] = {
 935             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 936             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 937             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 938         };
 939         int qp_y = lc->qp_y;
 940
 941         if (c_idx == 0) {
 942             qp = qp_y + s->sps->qp_bd_offset;
 943         } else {
 944             int qp_i, offset;
 945
 946             if (c_idx == 1)
 947                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 948             else
 949                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 950
 951             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 952             if (qp_i < 30)
 953                 qp = qp_i;
 954             else if (qp_i > 43)
 955                 qp = qp_i - 6;
 956             else
 957                 qp = qp_c[qp_i - 30];
 958
 959             qp += s->sps->qp_bd_offset;
 960         }
 961
 962         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 963         add      = 1 << (shift - 1);
 964         scale    = level_scale[rem6[qp]] << (div6[qp]);
 965         scale_m  = 16; // default when no custom scaling lists.
 966         dc_scale = 16;
 967
 968         if (s->sps->scaling_list_enable_flag) {
 969             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 970                                     &s->pps->scaling_list : &s->sps->scaling_list;
 971             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 972
 973             if (log2_trafo_size != 5)
 974                 matrix_id = 3 * matrix_id + c_idx;
 975
 976             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 977             if (log2_trafo_size >= 4)
 978                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 979         }
 980     }
 981
 982     if (s->pps->transform_skip_enabled_flag &&
 983         !lc->cu.cu_transquant_bypass_flag   &&
 984         log2_trafo_size == 2) {
 985         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 986     }
 987
 988     last_significant_coeff_x =
 989         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 990     last_significant_coeff_y =
 991         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 992
 993     if (last_significant_coeff_x > 3) {
 994         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 995         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 996                                    (2 + (last_significant_coeff_x & 1)) +
 997                                    suffix;
 998     }
 999
1000     if (last_significant_coeff_y > 3) {
1001         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1002         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1003                                    (2 + (last_significant_coeff_y & 1)) +
1004                                    suffix;
1005     }
1006
1007     if (scan_idx == SCAN_VERT)
1008         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1009
1010     x_cg_last_sig = last_significant_coeff_x >> 2;
1011     y_cg_last_sig = last_significant_coeff_y >> 2;
1012
1013     switch (scan_idx) {
1014     case SCAN_DIAG: {
1015         int last_x_c = last_significant_coeff_x & 3;
1016         int last_y_c = last_significant_coeff_y & 3;
1017
1018         scan_x_off = ff_hevc_diag_scan4x4_x;
1019         scan_y_off = ff_hevc_diag_scan4x4_y;
1020         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1021         if (trafo_size == 4) {
1022             scan_x_cg = scan_1x1;
1023             scan_y_cg = scan_1x1;
1024         } else if (trafo_size == 8) {
1025             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1026             scan_x_cg  = diag_scan2x2_x;
1027             scan_y_cg  = diag_scan2x2_y;
1028         } else if (trafo_size == 16) {
1029             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1030             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1031             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1032         } else { // trafo_size == 32
1033             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1034             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1035             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1036         }
1037         break;
1038     }
1039     case SCAN_HORIZ:
1040         scan_x_cg  = horiz_scan2x2_x;
1041         scan_y_cg  = horiz_scan2x2_y;
1042         scan_x_off = horiz_scan4x4_x;
1043         scan_y_off = horiz_scan4x4_y;
1044         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1045         break;
1046     default: //SCAN_VERT
1047         scan_x_cg  = horiz_scan2x2_y;
1048         scan_y_cg  = horiz_scan2x2_x;
1049         scan_x_off = horiz_scan4x4_y;
1050         scan_y_off = horiz_scan4x4_x;
1051         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1052         break;
1053     }
1054     num_coeff++;
1055     num_last_subset = (num_coeff - 1) >> 4;
1056
1057     for (i = num_last_subset; i >= 0; i--) {
1058         int n, m;
1059         int x_cg, y_cg, x_c, y_c;
1060         int implicit_non_zero_coeff = 0;
1061         int64_t trans_coeff_level;
1062         int prev_sig = 0;
1063         int offset   = i << 4;
1064
1065         uint8_t significant_coeff_flag_idx[16];
1066         uint8_t nb_significant_coeff_flag = 0;
1067
1068         x_cg = scan_x_cg[i];
1069         y_cg = scan_y_cg[i];
1070
1071         if (i < num_last_subset && i > 0) {
1072             int ctx_cg = 0;
1073             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1074                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1075             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1076                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1077
1078             significant_coeff_group_flag[x_cg][y_cg] =
1079                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1080             implicit_non_zero_coeff = 1;
1081         } else {
1082             significant_coeff_group_flag[x_cg][y_cg] =
1083                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1084                  (x_cg == 0 && y_cg == 0));
1085         }
1086
1087         last_scan_pos = num_coeff - offset - 1;
1088
1089         if (i == num_last_subset) {
1090             n_end                         = last_scan_pos - 1;
1091             significant_coeff_flag_idx[0] = last_scan_pos;
1092             nb_significant_coeff_flag     = 1;
1093         } else {
1094             n_end = 15;
1095         }
1096
1097         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1098             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1099         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1100             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1101
1102         for (n = n_end; n >= 0; n--) {
1103             GET_COORD(offset, n);
1104
1105             if (significant_coeff_group_flag[x_cg][y_cg] &&
1106                 (n > 0 || implicit_non_zero_coeff == 0)) {
1107                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1108                                                           log2_trafo_size,
1109                                                           scan_idx,
1110                                                           prev_sig) == 1) {
1111                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1112                     nb_significant_coeff_flag++;
1113                     implicit_non_zero_coeff = 0;
1114                 }
1115             } else {
1116                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1117                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1118                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1119                     nb_significant_coeff_flag++;
1120                 }
1121             }
1122         }
1123
1124         n_end = nb_significant_coeff_flag;
1125
1126         if (n_end) {
1127             int first_nz_pos_in_cg = 16;
1128             int last_nz_pos_in_cg = -1;
1129             int c_rice_param = 0;
1130             int first_greater1_coeff_idx = -1;
1131             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1132             uint16_t coeff_sign_flag;
1133             int sum_abs = 0;
1134             int sign_hidden = 0;
1135
1136             // initialize first elem of coeff_bas_level_greater1_flag
1137             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1138
1139             if (!(i == num_last_subset) && greater1_ctx == 0)
1140                 ctx_set++;
1141             greater1_ctx      = 1;
1142             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1143
1144             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1145                 int n_idx = significant_coeff_flag_idx[m];
1146                 int inc   = (ctx_set << 2) + greater1_ctx;
1147                 coeff_abs_level_greater1_flag[n_idx] =
1148                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1149                 if (coeff_abs_level_greater1_flag[n_idx]) {
1150                     greater1_ctx = 0;
1151                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1152                     greater1_ctx++;
1153                 }
1154
1155                 if (coeff_abs_level_greater1_flag[n_idx] &&
1156                     first_greater1_coeff_idx == -1)
1157                     first_greater1_coeff_idx = n_idx;
1158             }
1159             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1160             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1161                                  !lc->cu.cu_transquant_bypass_flag;
1162
1163             if (first_greater1_coeff_idx != -1) {
1164                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1165             }
1166             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1167                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1168             } else {
1169                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1170             }
1171
1172             for (m = 0; m < n_end; m++) {
1173                 n = significant_coeff_flag_idx[m];
1174                 GET_COORD(offset, n);
1175                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1176                 if (trans_coeff_level == ((m < 8) ?
1177                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1178                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1179
1180                     trans_coeff_level += last_coeff_abs_level_remaining;
1181                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1182                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1183                 }
1184                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1185                     sum_abs += trans_coeff_level;
1186                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1187                         trans_coeff_level = -trans_coeff_level;
1188                 }
1189                 if (coeff_sign_flag >> 15)
1190                     trans_coeff_level = -trans_coeff_level;
1191                 coeff_sign_flag <<= 1;
1192                 if (!lc->cu.cu_transquant_bypass_flag) {
1193                     if (s->sps->scaling_list_enable_flag) {
1194                         if (y_c || x_c || log2_trafo_size < 4) {
1195                             int pos;
1196                             switch (log2_trafo_size) {
1197                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1198                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1199                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1200                             default: pos = (y_c        << 2) +  x_c;
1201                             }
1202                             scale_m = scale_matrix[pos];
1203                         } else {
1204                             scale_m = dc_scale;
1205                         }
1206                     }
1207                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1208                     if(trans_coeff_level < 0) {
1209                         if((~trans_coeff_level) & 0xFffffffffff8000)
1210                             trans_coeff_level = -32768;
1211                     } else {
1212                         if (trans_coeff_level & 0xffffffffffff8000)
1213                             trans_coeff_level = 32767;
1214                     }
1215                 }
1216                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1217             }
1218         }
1219     }
1220
1221     if (lc->cu.cu_transquant_bypass_flag) {
1222         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1223     } else {
1224         if (transform_skip_flag)
1225             s->hevcdsp.transform_skip(dst, coeffs, stride);
1226         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1227                  log2_trafo_size == 2)
1228             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1229         else
1230             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1231     }
1232 }
1233
1234 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1235                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1236                               int log2_cb_size, int log2_trafo_size,
1237                               int trafo_depth, int blk_idx)
1238 {
1239     HEVCLocalContext *lc = &s->HEVClc;
1240
1241     if (lc->cu.pred_mode == MODE_INTRA) {
1242         int trafo_size = 1 << log2_trafo_size;
1243         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1244
1245         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1246         if (log2_trafo_size > 2) {
1247             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1248             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1250             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1251         } else if (blk_idx == 3) {
1252             trafo_size = trafo_size << s->sps->hshift[1];
1253             ff_hevc_set_neighbour_available(s, xBase, yBase,
1254                                             trafo_size, trafo_size);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1256             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1257         }
1258     }
1259
1260     if (lc->tt.cbf_luma ||
1261         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1262         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1263         int scan_idx   = SCAN_DIAG;
1264         int scan_idx_c = SCAN_DIAG;
1265
1266         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1267             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1268             if (lc->tu.cu_qp_delta != 0)
1269                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1270                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1271             lc->tu.is_cu_qp_delta_coded = 1;
1272
1273             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1274                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1275                 av_log(s->avctx, AV_LOG_ERROR,
1276                        "The cu_qp_delta %d is outside the valid range "
1277                        "[%d, %d].\n",
1278                        lc->tu.cu_qp_delta,
1279                        -(26 + s->sps->qp_bd_offset / 2),
1280                         (25 + s->sps->qp_bd_offset / 2));
1281                 return AVERROR_INVALIDDATA;
1282             }
1283
1284             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1285         }
1286
1287         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1288             if (lc->tu.cur_intra_pred_mode >= 6 &&
1289                 lc->tu.cur_intra_pred_mode <= 14) {
1290                 scan_idx = SCAN_VERT;
1291             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1292                        lc->tu.cur_intra_pred_mode <= 30) {
1293                 scan_idx = SCAN_HORIZ;
1294             }
1295
1296             if (lc->pu.intra_pred_mode_c >=  6 &&
1297                 lc->pu.intra_pred_mode_c <= 14) {
1298                 scan_idx_c = SCAN_VERT;
1299             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1300                        lc->pu.intra_pred_mode_c <= 30) {
1301                 scan_idx_c = SCAN_HORIZ;
1302             }
1303         }
1304
1305         if (lc->tt.cbf_luma)
1306             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1307         if (log2_trafo_size > 2) {
1308             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1310             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1311                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1312         } else if (blk_idx == 3) {
1313             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1315             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1316                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1317         }
1318     }
1319     return 0;
1320 }
1321
1322 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1323 {
1324     int cb_size          = 1 << log2_cb_size;
1325     int log2_min_pu_size = s->sps->log2_min_pu_size;
1326
1327     int min_pu_width     = s->sps->min_pu_width;
1328     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1329     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1330     int i, j;
1331
1332     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1333         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1334             s->is_pcm[i + j * min_pu_width] = 2;
1335 }
1336
1337 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1338                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1339                               int log2_cb_size, int log2_trafo_size,
1340                               int trafo_depth, int blk_idx)
1341 {
1342     HEVCLocalContext *lc = &s->HEVClc;
1343     uint8_t split_transform_flag;
1344     int ret;
1345
1346     if (trafo_depth > 0 && log2_trafo_size == 2) {
1347         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1348             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1349         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1350             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1351     } else {
1352         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1353         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1354     }
1355
1356     if (lc->cu.intra_split_flag) {
1357         if (trafo_depth == 1)
1358             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1359     } else {
1360         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1361     }
1362
1363     lc->tt.cbf_luma = 1;
1364
1365     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1366                               lc->cu.pred_mode == MODE_INTER &&
1367                               lc->cu.part_mode != PART_2Nx2N &&
1368                               trafo_depth == 0;
1369
1370     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1371         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1372         trafo_depth     < lc->cu.max_trafo_depth       &&
1373         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1374         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1375     } else {
1376         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1377                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1378                                lc->tt.inter_split_flag;
1379     }
1380
1381     if (log2_trafo_size > 2) {
1382         if (trafo_depth == 0 ||
1383             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1384             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1385                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1386         }
1387
1388         if (trafo_depth == 0 ||
1389             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1390             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1391                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1392         }
1393     }
1394
1395     if (split_transform_flag) {
1396         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1397         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1398
1399         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1400                                  log2_cb_size, log2_trafo_size - 1,
1401                                  trafo_depth + 1, 0);
1402         if (ret < 0)
1403             return ret;
1404         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1405                                  log2_cb_size, log2_trafo_size - 1,
1406                                  trafo_depth + 1, 1);
1407         if (ret < 0)
1408             return ret;
1409         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1410                                  log2_cb_size, log2_trafo_size - 1,
1411                                  trafo_depth + 1, 2);
1412         if (ret < 0)
1413             return ret;
1414         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1415                                  log2_cb_size, log2_trafo_size - 1,
1416                                  trafo_depth + 1, 3);
1417         if (ret < 0)
1418             return ret;
1419     } else {
1420         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1421         int log2_min_tu_size = s->sps->log2_min_tb_size;
1422         int min_tu_width     = s->sps->min_tb_width;
1423
1424         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1425             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1426             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1427             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1428         }
1429
1430         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1431                                  log2_cb_size, log2_trafo_size, trafo_depth,
1432                                  blk_idx);
1433         if (ret < 0)
1434             return ret;
1435         // TODO: store cbf_luma somewhere else
1436         if (lc->tt.cbf_luma) {
1437             int i, j;
1438             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1439                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1440                     int x_tu = (x0 + j) >> log2_min_tu_size;
1441                     int y_tu = (y0 + i) >> log2_min_tu_size;
1442                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1443                 }
1444         }
1445         if (!s->sh.disable_deblocking_filter_flag) {
1446             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1447                                                   lc->slice_or_tiles_up_boundary,
1448                                                   lc->slice_or_tiles_left_boundary);
1449             if (s->pps->transquant_bypass_enable_flag &&
1450                 lc->cu.cu_transquant_bypass_flag)
1451                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1452         }
1453     }
1454     return 0;
1455 }
1456
1457 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1458 {
1459     //TODO: non-4:2:0 support
1460     HEVCLocalContext *lc = &s->HEVClc;
1461     GetBitContext gb;
1462     int cb_size   = 1 << log2_cb_size;
1463     int stride0   = s->frame->linesize[0];
1464     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1465     int   stride1 = s->frame->linesize[1];
1466     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1467     int   stride2 = s->frame->linesize[2];
1468     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1469
1470     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1471     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1472     int ret;
1473
1474     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1475                                           lc->slice_or_tiles_up_boundary,
1476                                           lc->slice_or_tiles_left_boundary);
1477
1478     ret = init_get_bits(&gb, pcm, length);
1479     if (ret < 0)
1480         return ret;
1481
1482     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1483     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1484     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1485     return 0;
1486 }
1487
1488 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1489 {
1490     HEVCLocalContext *lc = &s->HEVClc;
1491     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1492     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1493
1494     if (x)
1495         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1496     if (y)
1497         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1498
1499     switch (x) {
1500     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1501     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1502     case 0: lc->pu.mvd.x = 0;                               break;
1503     }
1504
1505     switch (y) {
1506     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1507     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1508     case 0: lc->pu.mvd.y = 0;                               break;
1509     }
1510 }
1511
1512 /**
1513  * 8.5.3.2.2.1 Luma sample interpolation process
1514  *
1515  * @param s HEVC decoding context
1516  * @param dst target buffer for block data at block position
1517  * @param dststride stride of the dst buffer
1518  * @param ref reference picture buffer at origin (0, 0)
1519  * @param mv motion vector (relative to block position) to get pixel data from
1520  * @param x_off horizontal position of block from origin (0, 0)
1521  * @param y_off vertical position of block from origin (0, 0)
1522  * @param block_w width of block
1523  * @param block_h height of block
1524  */
1525 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1526                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1527                     int block_w, int block_h)
1528 {
1529     HEVCLocalContext *lc = &s->HEVClc;
1530     uint8_t *src         = ref->data[0];
1531     ptrdiff_t srcstride  = ref->linesize[0];
1532     int pic_width        = s->sps->width;
1533     int pic_height       = s->sps->height;
1534
1535     int mx         = mv->x & 3;
1536     int my         = mv->y & 3;
1537     int extra_left = ff_hevc_qpel_extra_before[mx];
1538     int extra_top  = ff_hevc_qpel_extra_before[my];
1539
1540     x_off += mv->x >> 2;
1541     y_off += mv->y >> 2;
1542     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1543
1544     if (x_off < extra_left || y_off < extra_top ||
1545         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1546         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1547         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1548         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1549         int buf_offset = extra_top *
1550                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1551
1552         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1553                                  edge_emu_stride, srcstride,
1554                                  block_w + ff_hevc_qpel_extra[mx],
1555                                  block_h + ff_hevc_qpel_extra[my],
1556                                  x_off - extra_left, y_off - extra_top,
1557                                  pic_width, pic_height);
1558         src = lc->edge_emu_buffer + buf_offset;
1559         srcstride = edge_emu_stride;
1560     }
1561     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1562                                      block_h, lc->mc_buffer);
1563 }
1564
1565 /**
1566  * 8.5.3.2.2.2 Chroma sample interpolation process
1567  *
1568  * @param s HEVC decoding context
1569  * @param dst1 target buffer for block data at block position (U plane)
1570  * @param dst2 target buffer for block data at block position (V plane)
1571  * @param dststride stride of the dst1 and dst2 buffers
1572  * @param ref reference picture buffer at origin (0, 0)
1573  * @param mv motion vector (relative to block position) to get pixel data from
1574  * @param x_off horizontal position of block from origin (0, 0)
1575  * @param y_off vertical position of block from origin (0, 0)
1576  * @param block_w width of block
1577  * @param block_h height of block
1578  */
1579 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1580                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1581                       int x_off, int y_off, int block_w, int block_h)
1582 {
1583     HEVCLocalContext *lc = &s->HEVClc;
1584     uint8_t *src1        = ref->data[1];
1585     uint8_t *src2        = ref->data[2];
1586     ptrdiff_t src1stride = ref->linesize[1];
1587     ptrdiff_t src2stride = ref->linesize[2];
1588     int pic_width        = s->sps->width >> 1;
1589     int pic_height       = s->sps->height >> 1;
1590
1591     int mx = mv->x & 7;
1592     int my = mv->y & 7;
1593
1594     x_off += mv->x >> 3;
1595     y_off += mv->y >> 3;
1596     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1597     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1598
1599     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1600         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1601         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1602         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1603         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1604         int buf_offset1 = EPEL_EXTRA_BEFORE *
1605                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1606         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1607         int buf_offset2 = EPEL_EXTRA_BEFORE *
1608                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1609
1610         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1611                                  edge_emu_stride, src1stride,
1612                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1613                                  x_off - EPEL_EXTRA_BEFORE,
1614                                  y_off - EPEL_EXTRA_BEFORE,
1615                                  pic_width, pic_height);
1616
1617         src1 = lc->edge_emu_buffer + buf_offset1;
1618         src1stride = edge_emu_stride;
1619         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1620                                              block_w, block_h, mx, my, lc->mc_buffer);
1621
1622         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1623                                  edge_emu_stride, src2stride,
1624                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1625                                  x_off - EPEL_EXTRA_BEFORE,
1626                                  y_off - EPEL_EXTRA_BEFORE,
1627                                  pic_width, pic_height);
1628         src2 = lc->edge_emu_buffer + buf_offset2;
1629         src2stride = edge_emu_stride;
1630
1631         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1632                                              block_w, block_h, mx, my,
1633                                              lc->mc_buffer);
1634     } else {
1635         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1636                                              block_w, block_h, mx, my,
1637                                              lc->mc_buffer);
1638         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1639                                              block_w, block_h, mx, my,
1640                                              lc->mc_buffer);
1641     }
1642 }
1643
1644 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1645                                 const Mv *mv, int y0, int height)
1646 {
1647     int y = (mv->y >> 2) + y0 + height + 9;
1648     ff_thread_await_progress(&ref->tf, y, 0);
1649 }
1650
1651 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1652                                 int nPbW, int nPbH,
1653                                 int log2_cb_size, int partIdx)
1654 {
1655 #define POS(c_idx, x, y)                                                              \
1656     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1657                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1658     HEVCLocalContext *lc = &s->HEVClc;
1659     int merge_idx = 0;
1660     struct MvField current_mv = {{{ 0 }}};
1661
1662     int min_pu_width = s->sps->min_pu_width;
1663
1664     MvField *tab_mvf = s->ref->tab_mvf;
1665     RefPicList  *refPicList = s->ref->refPicList;
1666     HEVCFrame *ref0, *ref1;
1667
1668     int tmpstride = MAX_PB_SIZE;
1669
1670     uint8_t *dst0 = POS(0, x0, y0);
1671     uint8_t *dst1 = POS(1, x0, y0);
1672     uint8_t *dst2 = POS(2, x0, y0);
1673     int log2_min_cb_size = s->sps->log2_min_cb_size;
1674     int min_cb_width     = s->sps->min_cb_width;
1675     int x_cb             = x0 >> log2_min_cb_size;
1676     int y_cb             = y0 >> log2_min_cb_size;
1677     int ref_idx[2];
1678     int mvp_flag[2];
1679     int x_pu, y_pu;
1680     int i, j;
1681
1682     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1683         if (s->sh.max_num_merge_cand > 1)
1684             merge_idx = ff_hevc_merge_idx_decode(s);
1685         else
1686             merge_idx = 0;
1687
1688         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1689                                    1 << log2_cb_size,
1690                                    1 << log2_cb_size,
1691                                    log2_cb_size, partIdx,
1692                                    merge_idx, &current_mv);
1693         x_pu = x0 >> s->sps->log2_min_pu_size;
1694         y_pu = y0 >> s->sps->log2_min_pu_size;
1695
1696         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1697             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1698                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1699     } else { /* MODE_INTER */
1700         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1701         if (lc->pu.merge_flag) {
1702             if (s->sh.max_num_merge_cand > 1)
1703                 merge_idx = ff_hevc_merge_idx_decode(s);
1704             else
1705                 merge_idx = 0;
1706
1707             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1708                                        partIdx, merge_idx, &current_mv);
1709             x_pu = x0 >> s->sps->log2_min_pu_size;
1710             y_pu = y0 >> s->sps->log2_min_pu_size;
1711
1712             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1713                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1714                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1715         } else {
1716             enum InterPredIdc inter_pred_idc = PRED_L0;
1717             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1718             if (s->sh.slice_type == B_SLICE)
1719                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1720
1721             if (inter_pred_idc != PRED_L1) {
1722                 if (s->sh.nb_refs[L0]) {
1723                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1724                     current_mv.ref_idx[0] = ref_idx[0];
1725                 }
1726                 current_mv.pred_flag[0] = 1;
1727                 hls_mvd_coding(s, x0, y0, 0);
1728                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1729                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1730                                          partIdx, merge_idx, &current_mv,
1731                                          mvp_flag[0], 0);
1732                 current_mv.mv[0].x += lc->pu.mvd.x;
1733                 current_mv.mv[0].y += lc->pu.mvd.y;
1734             }
1735
1736             if (inter_pred_idc != PRED_L0) {
1737                 if (s->sh.nb_refs[L1]) {
1738                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1739                     current_mv.ref_idx[1] = ref_idx[1];
1740                 }
1741
1742                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1743                     lc->pu.mvd.x = 0;
1744                     lc->pu.mvd.y = 0;
1745                 } else {
1746                     hls_mvd_coding(s, x0, y0, 1);
1747                 }
1748
1749                 current_mv.pred_flag[1] = 1;
1750                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1751                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1752                                          partIdx, merge_idx, &current_mv,
1753                                          mvp_flag[1], 1);
1754                 current_mv.mv[1].x += lc->pu.mvd.x;
1755                 current_mv.mv[1].y += lc->pu.mvd.y;
1756             }
1757
1758             x_pu = x0 >> s->sps->log2_min_pu_size;
1759             y_pu = y0 >> s->sps->log2_min_pu_size;
1760
1761             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1762                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1763                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1764         }
1765     }
1766
1767     if (current_mv.pred_flag[0]) {
1768         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1769         if (!ref0)
1770             return;
1771         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1772     }
1773     if (current_mv.pred_flag[1]) {
1774         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1775         if (!ref1)
1776             return;
1777         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1778     }
1779
1780     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1781         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1782         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1783
1784         luma_mc(s, tmp, tmpstride, ref0->frame,
1785                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1786
1787         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1788             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1789             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1790                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1791                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1792                                      dst0, s->frame->linesize[0], tmp,
1793                                      tmpstride, nPbW, nPbH);
1794         } else {
1795             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1796         }
1797         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1798                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1799
1800         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1801             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1802             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1803                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1804                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1805                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1806                                      nPbW / 2, nPbH / 2);
1807             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1808                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1809                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1810                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1811                                      nPbW / 2, nPbH / 2);
1812         } else {
1813             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1814             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1815         }
1816     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1817         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1818         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1819
1820         if (!ref1)
1821             return;
1822
1823         luma_mc(s, tmp, tmpstride, ref1->frame,
1824                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1825
1826         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1827             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1828             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1829                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1830                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1831                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1832                                       nPbW, nPbH);
1833         } else {
1834             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1835         }
1836
1837         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1838                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1839
1840         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1841             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1842             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1843                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1844                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1845                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1846             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1847                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1848                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1849                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1850         } else {
1851             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1852             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1853         }
1854     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1855         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1856         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1857         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1858         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1859         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1860         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1861
1862         if (!ref0 || !ref1)
1863             return;
1864
1865         luma_mc(s, tmp, tmpstride, ref0->frame,
1866                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1867         luma_mc(s, tmp2, tmpstride, ref1->frame,
1868                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1869
1870         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1871             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1872             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1873                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1874                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1875                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1876                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1877                                          dst0, s->frame->linesize[0],
1878                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1879         } else {
1880             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1881                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1882         }
1883
1884         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1885                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1886         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1887                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1888
1889         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1890             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1891             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1892                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1893                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1894                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1895                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1896                                          dst1, s->frame->linesize[1], tmp, tmp3,
1897                                          tmpstride, nPbW / 2, nPbH / 2);
1898             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1899                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1900                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1901                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1902                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1903                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1904                                          tmpstride, nPbW / 2, nPbH / 2);
1905         } else {
1906             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1907             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1908         }
1909     }
1910 }
1911
1912 /**
1913  * 8.4.1
1914  */
1915 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1916                                 int prev_intra_luma_pred_flag)
1917 {
1918     HEVCLocalContext *lc = &s->HEVClc;
1919     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1920     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1921     int min_pu_width     = s->sps->min_pu_width;
1922     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1923     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1924     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1925
1926     int cand_up   = (lc->ctb_up_flag || y0b) ?
1927                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1928     int cand_left = (lc->ctb_left_flag || x0b) ?
1929                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1930
1931     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1932
1933     MvField *tab_mvf = s->ref->tab_mvf;
1934     int intra_pred_mode;
1935     int candidate[3];
1936     int i, j;
1937
1938     // intra_pred_mode prediction does not cross vertical CTB boundaries
1939     if ((y0 - 1) < y_ctb)
1940         cand_up = INTRA_DC;
1941
1942     if (cand_left == cand_up) {
1943         if (cand_left < 2) {
1944             candidate[0] = INTRA_PLANAR;
1945             candidate[1] = INTRA_DC;
1946             candidate[2] = INTRA_ANGULAR_26;
1947         } else {
1948             candidate[0] = cand_left;
1949             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1950             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1951         }
1952     } else {
1953         candidate[0] = cand_left;
1954         candidate[1] = cand_up;
1955         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1956             candidate[2] = INTRA_PLANAR;
1957         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1958             candidate[2] = INTRA_DC;
1959         } else {
1960             candidate[2] = INTRA_ANGULAR_26;
1961         }
1962     }
1963
1964     if (prev_intra_luma_pred_flag) {
1965         intra_pred_mode = candidate[lc->pu.mpm_idx];
1966     } else {
1967         if (candidate[0] > candidate[1])
1968             FFSWAP(uint8_t, candidate[0], candidate[1]);
1969         if (candidate[0] > candidate[2])
1970             FFSWAP(uint8_t, candidate[0], candidate[2]);
1971         if (candidate[1] > candidate[2])
1972             FFSWAP(uint8_t, candidate[1], candidate[2]);
1973
1974         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1975         for (i = 0; i < 3; i++)
1976             if (intra_pred_mode >= candidate[i])
1977                 intra_pred_mode++;
1978     }
1979
1980     /* write the intra prediction units into the mv array */
1981     if (!size_in_pus)
1982         size_in_pus = 1;
1983     for (i = 0; i < size_in_pus; i++) {
1984         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1985                intra_pred_mode, size_in_pus);
1986
1987         for (j = 0; j < size_in_pus; j++) {
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1991             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1992             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1993             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1994             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1995             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1996             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1997         }
1998     }
1999
2000     return intra_pred_mode;
2001 }
2002
2003 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2004                                           int log2_cb_size, int ct_depth)
2005 {
2006     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2007     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2008     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2009     int y;
2010
2011     for (y = 0; y < length; y++)
2012         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2013                ct_depth, length);
2014 }
2015
2016 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2017                                   int log2_cb_size)
2018 {
2019     HEVCLocalContext *lc = &s->HEVClc;
2020     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2021     uint8_t prev_intra_luma_pred_flag[4];
2022     int split   = lc->cu.part_mode == PART_NxN;
2023     int pb_size = (1 << log2_cb_size) >> split;
2024     int side    = split + 1;
2025     int chroma_mode;
2026     int i, j;
2027
2028     for (i = 0; i < side; i++)
2029         for (j = 0; j < side; j++)
2030             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2031
2032     for (i = 0; i < side; i++) {
2033         for (j = 0; j < side; j++) {
2034             if (prev_intra_luma_pred_flag[2 * i + j])
2035                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2036             else
2037                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2038
2039             lc->pu.intra_pred_mode[2 * i + j] =
2040                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2041                                      prev_intra_luma_pred_flag[2 * i + j]);
2042         }
2043     }
2044
2045     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2046     if (chroma_mode != 4) {
2047         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2048             lc->pu.intra_pred_mode_c = 34;
2049         else
2050             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2051     } else {
2052         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2053     }
2054 }
2055
2056 static void intra_prediction_unit_default_value(HEVCContext *s,
2057                                                 int x0, int y0,
2058                                                 int log2_cb_size)
2059 {
2060     HEVCLocalContext *lc = &s->HEVClc;
2061     int pb_size          = 1 << log2_cb_size;
2062     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2063     int min_pu_width     = s->sps->min_pu_width;
2064     MvField *tab_mvf     = s->ref->tab_mvf;
2065     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2066     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2067     int j, k;
2068
2069     if (size_in_pus == 0)
2070         size_in_pus = 1;
2071     for (j = 0; j < size_in_pus; j++) {
2072         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2073         for (k = 0; k < size_in_pus; k++)
2074             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2075     }
2076 }
2077
2078 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2079 {
2080     int cb_size          = 1 << log2_cb_size;
2081     HEVCLocalContext *lc = &s->HEVClc;
2082     int log2_min_cb_size = s->sps->log2_min_cb_size;
2083     int length           = cb_size >> log2_min_cb_size;
2084     int min_cb_width     = s->sps->min_cb_width;
2085     int x_cb             = x0 >> log2_min_cb_size;
2086     int y_cb             = y0 >> log2_min_cb_size;
2087     int x, y, ret;
2088
2089     lc->cu.x                = x0;
2090     lc->cu.y                = y0;
2091     lc->cu.rqt_root_cbf     = 1;
2092     lc->cu.pred_mode        = MODE_INTRA;
2093     lc->cu.part_mode        = PART_2Nx2N;
2094     lc->cu.intra_split_flag = 0;
2095     lc->cu.pcm_flag         = 0;
2096
2097     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2098     for (x = 0; x < 4; x++)
2099         lc->pu.intra_pred_mode[x] = 1;
2100     if (s->pps->transquant_bypass_enable_flag) {
2101         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2102         if (lc->cu.cu_transquant_bypass_flag)
2103             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2104     } else
2105         lc->cu.cu_transquant_bypass_flag = 0;
2106
2107     if (s->sh.slice_type != I_SLICE) {
2108         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2109
2110         lc->cu.pred_mode = MODE_SKIP;
2111         x = y_cb * min_cb_width + x_cb;
2112         for (y = 0; y < length; y++) {
2113             memset(&s->skip_flag[x], skip_flag, length);
2114             x += min_cb_width;
2115         }
2116         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2117     }
2118
2119     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2120         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2121         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2122
2123         if (!s->sh.disable_deblocking_filter_flag)
2124             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2125                                                   lc->slice_or_tiles_up_boundary,
2126                                                   lc->slice_or_tiles_left_boundary);
2127     } else {
2128         if (s->sh.slice_type != I_SLICE)
2129             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2130         if (lc->cu.pred_mode != MODE_INTRA ||
2131             log2_cb_size == s->sps->log2_min_cb_size) {
2132             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2133             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2134                                       lc->cu.pred_mode == MODE_INTRA;
2135         }
2136
2137         if (lc->cu.pred_mode == MODE_INTRA) {
2138             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2139                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2140                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2141                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2142             }
2143             if (lc->cu.pcm_flag) {
2144                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2145                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2146                 if (s->sps->pcm.loop_filter_disable_flag)
2147                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2148
2149                 if (ret < 0)
2150                     return ret;
2151             } else {
2152                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2153             }
2154         } else {
2155             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2156             switch (lc->cu.part_mode) {
2157             case PART_2Nx2N:
2158                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2159                 break;
2160             case PART_2NxN:
2161                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2162                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2163                 break;
2164             case PART_Nx2N:
2165                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2166                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2167                 break;
2168             case PART_2NxnU:
2169                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2170                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2171                 break;
2172             case PART_2NxnD:
2173                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2174                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2175                 break;
2176             case PART_nLx2N:
2177                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2178                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2179                 break;
2180             case PART_nRx2N:
2181                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2182                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2183                 break;
2184             case PART_NxN:
2185                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2186                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2187                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2188                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2189                 break;
2190             }
2191         }
2192
2193         if (!lc->cu.pcm_flag) {
2194             if (lc->cu.pred_mode != MODE_INTRA &&
2195                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2196                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2197             }
2198             if (lc->cu.rqt_root_cbf) {
2199                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2200                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2201                                          s->sps->max_transform_hierarchy_depth_inter;
2202                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2203                                          log2_cb_size,
2204                                          log2_cb_size, 0, 0);
2205                 if (ret < 0)
2206                     return ret;
2207             } else {
2208                 if (!s->sh.disable_deblocking_filter_flag)
2209                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2210                                                           lc->slice_or_tiles_up_boundary,
2211                                                           lc->slice_or_tiles_left_boundary);
2212             }
2213         }
2214     }
2215
2216     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2217         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2218
2219     x = y_cb * min_cb_width + x_cb;
2220     for (y = 0; y < length; y++) {
2221         memset(&s->qp_y_tab[x], lc->qp_y, length);
2222         x += min_cb_width;
2223     }
2224
2225     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2226
2227     return 0;
2228 }
2229
2230 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2231                                int log2_cb_size, int cb_depth)
2232 {
2233     HEVCLocalContext *lc = &s->HEVClc;
2234     const int cb_size    = 1 << log2_cb_size;
2235
2236     lc->ct.depth = cb_depth;
2237     if (x0 + cb_size <= s->sps->width  &&
2238         y0 + cb_size <= s->sps->height &&
2239         log2_cb_size > s->sps->log2_min_cb_size) {
2240         SAMPLE(s->split_cu_flag, x0, y0) =
2241             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2242     } else {
2243         SAMPLE(s->split_cu_flag, x0, y0) =
2244             (log2_cb_size > s->sps->log2_min_cb_size);
2245     }
2246     if (s->pps->cu_qp_delta_enabled_flag &&
2247         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2248         lc->tu.is_cu_qp_delta_coded = 0;
2249         lc->tu.cu_qp_delta          = 0;
2250     }
2251
2252     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2253         const int cb_size_split = cb_size >> 1;
2254         const int x1 = x0 + cb_size_split;
2255         const int y1 = y0 + cb_size_split;
2256
2257         log2_cb_size--;
2258         cb_depth++;
2259
2260 #define SUBDIVIDE(x, y)                                                \
2261 do {                                                                   \
2262     if (x < s->sps->width && y < s->sps->height) {                     \
2263         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2264         if (ret < 0)                                                   \
2265             return ret;                                                \
2266     }                                                                  \
2267 } while (0)
2268
2269         SUBDIVIDE(x0, y0);
2270         SUBDIVIDE(x1, y0);
2271         SUBDIVIDE(x0, y1);
2272         SUBDIVIDE(x1, y1);
2273     } else {
2274         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2275         if (ret < 0)
2276             return ret;
2277     }
2278
2279     return 0;
2280 }
2281
2282 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2283                                  int ctb_addr_ts)
2284 {
2285     HEVCLocalContext *lc  = &s->HEVClc;
2286     int ctb_size          = 1 << s->sps->log2_ctb_size;
2287     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2288     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2289
2290     int tile_left_boundary, tile_up_boundary;
2291     int slice_left_boundary, slice_up_boundary;
2292
2293     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2294
2295     if (s->pps->entropy_coding_sync_enabled_flag) {
2296         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2297             lc->first_qp_group = 1;
2298         lc->end_of_tiles_x = s->sps->width;
2299     } else if (s->pps->tiles_enabled_flag) {
2300         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2301             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2302             lc->start_of_tiles_x = x_ctb;
2303             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2304             lc->first_qp_group   = 1;
2305         }
2306     } else {
2307         lc->end_of_tiles_x = s->sps->width;
2308     }
2309
2310     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2311
2312     if (s->pps->tiles_enabled_flag) {
2313         tile_left_boundary  = x_ctb > 0 &&
2314                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2315         slice_left_boundary = x_ctb > 0 &&
2316                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2317         tile_up_boundary  = y_ctb > 0 &&
2318                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2319         slice_up_boundary = y_ctb > 0 &&
2320                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2321     } else {
2322         tile_left_boundary  =
2323         tile_up_boundary    = 1;
2324         slice_left_boundary = ctb_addr_in_slice > 0;
2325         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2326     }
2327     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2328     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2329     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2330     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2331     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2332     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2333 }
2334
2335 static int hls_slice_data(HEVCContext *s)
2336 {
2337     int ctb_size    = 1 << s->sps->log2_ctb_size;
2338     int more_data   = 1;
2339     int x_ctb       = 0;
2340     int y_ctb       = 0;
2341     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2342     int ret;
2343
2344     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2345         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2346
2347         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2348         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2349         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2350
2351         ff_hevc_cabac_init(s, ctb_addr_ts);
2352
2353         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2354
2355         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2356         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2357         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2358
2359         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2360         if (ret < 0)
2361             return ret;
2362         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2363
2364         ctb_addr_ts++;
2365         ff_hevc_save_states(s, ctb_addr_ts);
2366         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2367     }
2368
2369     if (x_ctb + ctb_size >= s->sps->width &&
2370         y_ctb + ctb_size >= s->sps->height)
2371         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2372
2373     return ctb_addr_ts;
2374 }
2375
2376 /**
2377  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2378  * 0 if the unit should be skipped, 1 otherwise
2379  */
2380 static int hls_nal_unit(HEVCContext *s)
2381 {
2382     GetBitContext *gb = &s->HEVClc.gb;
2383     int nuh_layer_id;
2384
2385     if (get_bits1(gb) != 0)
2386         return AVERROR_INVALIDDATA;
2387
2388     s->nal_unit_type = get_bits(gb, 6);
2389
2390     nuh_layer_id   = get_bits(gb, 6);
2391     s->temporal_id = get_bits(gb, 3) - 1;
2392     if (s->temporal_id < 0)
2393         return AVERROR_INVALIDDATA;
2394
2395     av_log(s->avctx, AV_LOG_DEBUG,
2396            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2397            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2398
2399     return nuh_layer_id == 0;
2400 }
2401
2402 static void restore_tqb_pixels(HEVCContext *s)
2403 {
2404     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2405     int x, y, c_idx;
2406
2407     for (c_idx = 0; c_idx < 3; c_idx++) {
2408         ptrdiff_t stride = s->frame->linesize[c_idx];
2409         int hshift       = s->sps->hshift[c_idx];
2410         int vshift       = s->sps->vshift[c_idx];
2411         for (y = 0; y < s->sps->min_pu_height; y++) {
2412             for (x = 0; x < s->sps->min_pu_width; x++) {
2413                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2414                     int n;
2415                     int len      = min_pu_size >> hshift;
2416                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2417                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2418                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2419                         memcpy(dst, src, len);
2420                         src += stride;
2421                         dst += stride;
2422                     }
2423                 }
2424             }
2425         }
2426     }
2427 }
2428
2429 static int set_side_data(HEVCContext *s)
2430 {
2431     AVFrame *out = s->ref->frame;
2432
2433     if (s->sei_frame_packing_present &&
2434         s->frame_packing_arrangement_type >= 3 &&
2435         s->frame_packing_arrangement_type <= 5 &&
2436         s->content_interpretation_type > 0 &&
2437         s->content_interpretation_type < 3) {
2438         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2439         if (!stereo)
2440             return AVERROR(ENOMEM);
2441
2442         switch (s->frame_packing_arrangement_type) {
2443         case 3:
2444             if (s->quincunx_subsampling)
2445                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2446             else
2447                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2448             break;
2449         case 4:
2450             stereo->type = AV_STEREO3D_TOPBOTTOM;
2451             break;
2452         case 5:
2453             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2454             break;
2455         }
2456
2457         if (s->content_interpretation_type == 2)
2458             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2459     }
2460
2461     if (s->sei_display_orientation_present &&
2462         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2463         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2464         AVFrameSideData *rotation = av_frame_new_side_data(out,
2465                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2466                                                            sizeof(int32_t) * 9);
2467         if (!rotation)
2468             return AVERROR(ENOMEM);
2469
2470         av_display_rotation_set((int32_t *)rotation->data, angle);
2471         av_display_matrix_flip((int32_t *)rotation->data,
2472                                s->sei_vflip, s->sei_hflip);
2473     }
2474
2475     return 0;
2476 }
2477
2478 static int hevc_frame_start(HEVCContext *s)
2479 {
2480     HEVCLocalContext *lc = &s->HEVClc;
2481     int ret;
2482
2483     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2484     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2485     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2486     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2487
2488     lc->start_of_tiles_x = 0;
2489     s->is_decoded        = 0;
2490     s->first_nal_type    = s->nal_unit_type;
2491
2492     if (s->pps->tiles_enabled_flag)
2493         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2494
2495     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2496                               s->poc);
2497     if (ret < 0)
2498         goto fail;
2499
2500     ret = ff_hevc_frame_rps(s);
2501     if (ret < 0) {
2502         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2503         goto fail;
2504     }
2505
2506     s->ref->frame->key_frame = IS_IRAP(s);
2507
2508     ret = set_side_data(s);
2509     if (ret < 0)
2510         goto fail;
2511
2512     av_frame_unref(s->output_frame);
2513     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2514     if (ret < 0)
2515         goto fail;
2516
2517     ff_thread_finish_setup(s->avctx);
2518
2519     return 0;
2520
2521 fail:
2522     if (s->ref)
2523         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2524     s->ref = NULL;
2525     return ret;
2526 }
2527
2528 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2529 {
2530     HEVCLocalContext *lc = &s->HEVClc;
2531     GetBitContext *gb    = &lc->gb;
2532     int ctb_addr_ts, ret;
2533
2534     ret = init_get_bits8(gb, nal, length);
2535     if (ret < 0)
2536         return ret;
2537
2538     ret = hls_nal_unit(s);
2539     if (ret < 0) {
2540         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2541                s->nal_unit_type);
2542         goto fail;
2543     } else if (!ret)
2544         return 0;
2545
2546     switch (s->nal_unit_type) {
2547     case NAL_VPS:
2548         ret = ff_hevc_decode_nal_vps(s);
2549         if (ret < 0)
2550             goto fail;
2551         break;
2552     case NAL_SPS:
2553         ret = ff_hevc_decode_nal_sps(s);
2554         if (ret < 0)
2555             goto fail;
2556         break;
2557     case NAL_PPS:
2558         ret = ff_hevc_decode_nal_pps(s);
2559         if (ret < 0)
2560             goto fail;
2561         break;
2562     case NAL_SEI_PREFIX:
2563     case NAL_SEI_SUFFIX:
2564         ret = ff_hevc_decode_nal_sei(s);
2565         if (ret < 0)
2566             goto fail;
2567         break;
2568     case NAL_TRAIL_R:
2569     case NAL_TRAIL_N:
2570     case NAL_TSA_N:
2571     case NAL_TSA_R:
2572     case NAL_STSA_N:
2573     case NAL_STSA_R:
2574     case NAL_BLA_W_LP:
2575     case NAL_BLA_W_RADL:
2576     case NAL_BLA_N_LP:
2577     case NAL_IDR_W_RADL:
2578     case NAL_IDR_N_LP:
2579     case NAL_CRA_NUT:
2580     case NAL_RADL_N:
2581     case NAL_RADL_R:
2582     case NAL_RASL_N:
2583     case NAL_RASL_R:
2584         ret = hls_slice_header(s);
2585         if (ret < 0)
2586             return ret;
2587
2588         if (s->max_ra == INT_MAX) {
2589             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2590                 s->max_ra = s->poc;
2591             } else {
2592                 if (IS_IDR(s))
2593                     s->max_ra = INT_MIN;
2594             }
2595         }
2596
2597         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2598             s->poc <= s->max_ra) {
2599             s->is_decoded = 0;
2600             break;
2601         } else {
2602             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2603                 s->max_ra = INT_MIN;
2604         }
2605
2606         if (s->sh.first_slice_in_pic_flag) {
2607             ret = hevc_frame_start(s);
2608             if (ret < 0)
2609                 return ret;
2610         } else if (!s->ref) {
2611             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2612             goto fail;
2613         }
2614
2615         if (s->nal_unit_type != s->first_nal_type) {
2616             av_log(s->avctx, AV_LOG_ERROR,
2617                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2618                    s->first_nal_type, s->nal_unit_type);
2619             return AVERROR_INVALIDDATA;
2620         }
2621
2622         if (!s->sh.dependent_slice_segment_flag &&
2623             s->sh.slice_type != I_SLICE) {
2624             ret = ff_hevc_slice_rpl(s);
2625             if (ret < 0) {
2626                 av_log(s->avctx, AV_LOG_WARNING,
2627                        "Error constructing the reference lists for the current slice.\n");
2628                 goto fail;
2629             }
2630         }
2631
2632         ctb_addr_ts = hls_slice_data(s);
2633         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2634             s->is_decoded = 1;
2635             if ((s->pps->transquant_bypass_enable_flag ||
2636                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2637                 s->sps->sao_enabled)
2638                 restore_tqb_pixels(s);
2639         }
2640
2641         if (ctb_addr_ts < 0) {
2642             ret = ctb_addr_ts;
2643             goto fail;
2644         }
2645         break;
2646     case NAL_EOS_NUT:
2647     case NAL_EOB_NUT:
2648         s->seq_decode = (s->seq_decode + 1) & 0xff;
2649         s->max_ra     = INT_MAX;
2650         break;
2651     case NAL_AUD:
2652     case NAL_FD_NUT:
2653         break;
2654     default:
2655         av_log(s->avctx, AV_LOG_INFO,
2656                "Skipping NAL unit %d\n", s->nal_unit_type);
2657     }
2658
2659     return 0;
2660 fail:
2661     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2662         return ret;
2663     return 0;
2664 }
2665
2666 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2667  * between these functions would be nice. */
2668 static int extract_rbsp(const uint8_t *src, int length,
2669                         HEVCNAL *nal)
2670 {
2671     int i, si, di;
2672     uint8_t *dst;
2673
2674 #define STARTCODE_TEST                                                  \
2675         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2676             if (src[i + 2] != 3) {                                      \
2677                 /* startcode, so we must be past the end */             \
2678                 length = i;                                             \
2679             }                                                           \
2680             break;                                                      \
2681         }
2682 #if HAVE_FAST_UNALIGNED
2683 #define FIND_FIRST_ZERO                                                 \
2684         if (i > 0 && !src[i])                                           \
2685             i--;                                                        \
2686         while (src[i])                                                  \
2687             i++
2688 #if HAVE_FAST_64BIT
2689     for (i = 0; i + 1 < length; i += 9) {
2690         if (!((~AV_RN64A(src + i) &
2691                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2692               0x8000800080008080ULL))
2693             continue;
2694         FIND_FIRST_ZERO;
2695         STARTCODE_TEST;
2696         i -= 7;
2697     }
2698 #else
2699     for (i = 0; i + 1 < length; i += 5) {
2700         if (!((~AV_RN32A(src + i) &
2701                (AV_RN32A(src + i) - 0x01000101U)) &
2702               0x80008080U))
2703             continue;
2704         FIND_FIRST_ZERO;
2705         STARTCODE_TEST;
2706         i -= 3;
2707     }
2708 #endif /* HAVE_FAST_64BIT */
2709 #else
2710     for (i = 0; i + 1 < length; i += 2) {
2711         if (src[i])
2712             continue;
2713         if (i > 0 && src[i - 1] == 0)
2714             i--;
2715         STARTCODE_TEST;
2716     }
2717 #endif /* HAVE_FAST_UNALIGNED */
2718
2719     if (i >= length - 1) { // no escaped 0
2720         nal->data = src;
2721         nal->size = length;
2722         return length;
2723     }
2724
2725     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2726                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2727     if (!nal->rbsp_buffer)
2728         return AVERROR(ENOMEM);
2729
2730     dst = nal->rbsp_buffer;
2731
2732     memcpy(dst, src, i);
2733     si = di = i;
2734     while (si + 2 < length) {
2735         // remove escapes (very rare 1:2^22)
2736         if (src[si + 2] > 3) {
2737             dst[di++] = src[si++];
2738             dst[di++] = src[si++];
2739         } else if (src[si] == 0 && src[si + 1] == 0) {
2740             if (src[si + 2] == 3) { // escape
2741                 dst[di++] = 0;
2742                 dst[di++] = 0;
2743                 si       += 3;
2744
2745                 continue;
2746             } else // next start code
2747                 goto nsc;
2748         }
2749
2750         dst[di++] = src[si++];
2751     }
2752     while (si < length)
2753         dst[di++] = src[si++];
2754
2755 nsc:
2756     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2757
2758     nal->data = dst;
2759     nal->size = di;
2760     return si;
2761 }
2762
2763 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2764 {
2765     int i, consumed, ret = 0;
2766
2767     s->ref = NULL;
2768     s->eos = 0;
2769
2770     /* split the input packet into NAL units, so we know the upper bound on the
2771      * number of slices in the frame */
2772     s->nb_nals = 0;
2773     while (length >= 4) {
2774         HEVCNAL *nal;
2775         int extract_length = 0;
2776
2777         if (s->is_nalff) {
2778             int i;
2779             for (i = 0; i < s->nal_length_size; i++)
2780                 extract_length = (extract_length << 8) | buf[i];
2781             buf    += s->nal_length_size;
2782             length -= s->nal_length_size;
2783
2784             if (extract_length > length) {
2785                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2786                 ret = AVERROR_INVALIDDATA;
2787                 goto fail;
2788             }
2789         } else {
2790             if (buf[2] == 0) {
2791                 length--;
2792                 buf++;
2793                 continue;
2794             }
2795             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2796                 ret = AVERROR_INVALIDDATA;
2797                 goto fail;
2798             }
2799
2800             buf           += 3;
2801             length        -= 3;
2802             extract_length = length;
2803         }
2804
2805         if (s->nals_allocated < s->nb_nals + 1) {
2806             int new_size = s->nals_allocated + 1;
2807             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2808             if (!tmp) {
2809                 ret = AVERROR(ENOMEM);
2810                 goto fail;
2811             }
2812             s->nals = tmp;
2813             memset(s->nals + s->nals_allocated, 0,
2814                    (new_size - s->nals_allocated) * sizeof(*tmp));
2815             s->nals_allocated = new_size;
2816         }
2817         nal = &s->nals[s->nb_nals++];
2818
2819         consumed = extract_rbsp(buf, extract_length, nal);
2820         if (consumed < 0) {
2821             ret = consumed;
2822             goto fail;
2823         }
2824
2825         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2826         if (ret < 0)
2827             goto fail;
2828         hls_nal_unit(s);
2829
2830         if (s->nal_unit_type == NAL_EOB_NUT ||
2831             s->nal_unit_type == NAL_EOS_NUT)
2832             s->eos = 1;
2833
2834         buf    += consumed;
2835         length -= consumed;
2836     }
2837
2838     /* parse the NAL units */
2839     for (i = 0; i < s->nb_nals; i++) {
2840         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2841         if (ret < 0) {
2842             av_log(s->avctx, AV_LOG_WARNING,
2843                    "Error parsing NAL unit #%d.\n", i);
2844             goto fail;
2845         }
2846     }
2847
2848 fail:
2849     if (s->ref)
2850         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2851
2852     return ret;
2853 }
2854
2855 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2856 {
2857     int i;
2858     for (i = 0; i < 16; i++)
2859         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2860 }
2861
2862 static int verify_md5(HEVCContext *s, AVFrame *frame)
2863 {
2864     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2865     int pixel_shift;
2866     int i, j;
2867
2868     if (!desc)
2869         return AVERROR(EINVAL);
2870
2871     pixel_shift = desc->comp[0].depth_minus1 > 7;
2872
2873     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2874            s->poc);
2875
2876     /* the checksums are LE, so we have to byteswap for >8bpp formats
2877      * on BE arches */
2878 #if HAVE_BIGENDIAN
2879     if (pixel_shift && !s->checksum_buf) {
2880         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2881                        FFMAX3(frame->linesize[0], frame->linesize[1],
2882                               frame->linesize[2]));
2883         if (!s->checksum_buf)
2884             return AVERROR(ENOMEM);
2885     }
2886 #endif
2887
2888     for (i = 0; frame->data[i]; i++) {
2889         int width  = s->avctx->coded_width;
2890         int height = s->avctx->coded_height;
2891         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2892         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2893         uint8_t md5[16];
2894
2895         av_md5_init(s->md5_ctx);
2896         for (j = 0; j < h; j++) {
2897             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2898 #if HAVE_BIGENDIAN
2899             if (pixel_shift) {
2900                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2901                                     (const uint16_t *) src, w);
2902                 src = s->checksum_buf;
2903             }
2904 #endif
2905             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2906         }
2907         av_md5_final(s->md5_ctx, md5);
2908
2909         if (!memcmp(md5, s->md5[i], 16)) {
2910             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2911             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2912             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2913         } else {
2914             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2915             print_md5(s->avctx, AV_LOG_ERROR, md5);
2916             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2917             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2918             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2919             return AVERROR_INVALIDDATA;
2920         }
2921     }
2922
2923     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2924
2925     return 0;
2926 }
2927
2928 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2929                              AVPacket *avpkt)
2930 {
2931     int ret;
2932     HEVCContext *s = avctx->priv_data;
2933
2934     if (!avpkt->size) {
2935         ret = ff_hevc_output_frame(s, data, 1);
2936         if (ret < 0)
2937             return ret;
2938
2939         *got_output = ret;
2940         return 0;
2941     }
2942
2943     s->ref = NULL;
2944     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2945     if (ret < 0)
2946         return ret;
2947
2948     /* verify the SEI checksum */
2949     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2950         s->is_md5) {
2951         ret = verify_md5(s, s->ref->frame);
2952         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2953             ff_hevc_unref_frame(s, s->ref, ~0);
2954             return ret;
2955         }
2956     }
2957     s->is_md5 = 0;
2958
2959     if (s->is_decoded) {
2960         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2961         s->is_decoded = 0;
2962     }
2963
2964     if (s->output_frame->buf[0]) {
2965         av_frame_move_ref(data, s->output_frame);
2966         *got_output = 1;
2967     }
2968
2969     return avpkt->size;
2970 }
2971
2972 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2973 {
2974     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2975     if (ret < 0)
2976         return ret;
2977
2978     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2979     if (!dst->tab_mvf_buf)
2980         goto fail;
2981     dst->tab_mvf = src->tab_mvf;
2982
2983     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2984     if (!dst->rpl_tab_buf)
2985         goto fail;
2986     dst->rpl_tab = src->rpl_tab;
2987
2988     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2989     if (!dst->rpl_buf)
2990         goto fail;
2991
2992     dst->poc        = src->poc;
2993     dst->ctb_count  = src->ctb_count;
2994     dst->window     = src->window;
2995     dst->flags      = src->flags;
2996     dst->sequence   = src->sequence;
2997
2998     return 0;
2999 fail:
3000     ff_hevc_unref_frame(s, dst, ~0);
3001     return AVERROR(ENOMEM);
3002 }
3003
3004 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3005 {
3006     HEVCContext       *s = avctx->priv_data;
3007     int i;
3008
3009     pic_arrays_free(s);
3010
3011     av_freep(&s->md5_ctx);
3012
3013     av_frame_free(&s->tmp_frame);
3014     av_frame_free(&s->output_frame);
3015
3016     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3017         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3018         av_frame_free(&s->DPB[i].frame);
3019     }
3020
3021     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3022         av_buffer_unref(&s->vps_list[i]);
3023     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3024         av_buffer_unref(&s->sps_list[i]);
3025     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3026         av_buffer_unref(&s->pps_list[i]);
3027
3028     for (i = 0; i < s->nals_allocated; i++)
3029         av_freep(&s->nals[i].rbsp_buffer);
3030     av_freep(&s->nals);
3031     s->nals_allocated = 0;
3032
3033     return 0;
3034 }
3035
3036 static av_cold int hevc_init_context(AVCodecContext *avctx)
3037 {
3038     HEVCContext *s = avctx->priv_data;
3039     int i;
3040
3041     s->avctx = avctx;
3042
3043     s->tmp_frame = av_frame_alloc();
3044     if (!s->tmp_frame)
3045         goto fail;
3046
3047     s->output_frame = av_frame_alloc();
3048     if (!s->output_frame)
3049         goto fail;
3050
3051     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3052         s->DPB[i].frame = av_frame_alloc();
3053         if (!s->DPB[i].frame)
3054             goto fail;
3055         s->DPB[i].tf.f = s->DPB[i].frame;
3056     }
3057
3058     s->max_ra = INT_MAX;
3059
3060     s->md5_ctx = av_md5_alloc();
3061     if (!s->md5_ctx)
3062         goto fail;
3063
3064     ff_bswapdsp_init(&s->bdsp);
3065
3066     s->context_initialized = 1;
3067
3068     return 0;
3069
3070 fail:
3071     hevc_decode_free(avctx);
3072     return AVERROR(ENOMEM);
3073 }
3074
3075 static int hevc_update_thread_context(AVCodecContext *dst,
3076                                       const AVCodecContext *src)
3077 {
3078     HEVCContext *s  = dst->priv_data;
3079     HEVCContext *s0 = src->priv_data;
3080     int i, ret;
3081
3082     if (!s->context_initialized) {
3083         ret = hevc_init_context(dst);
3084         if (ret < 0)
3085             return ret;
3086     }
3087
3088     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3089         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3090         if (s0->DPB[i].frame->buf[0]) {
3091             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3092             if (ret < 0)
3093                 return ret;
3094         }
3095     }
3096
3097     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3098         av_buffer_unref(&s->vps_list[i]);
3099         if (s0->vps_list[i]) {
3100             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3101             if (!s->vps_list[i])
3102                 return AVERROR(ENOMEM);
3103         }
3104     }
3105
3106     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3107         av_buffer_unref(&s->sps_list[i]);
3108         if (s0->sps_list[i]) {
3109             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3110             if (!s->sps_list[i])
3111                 return AVERROR(ENOMEM);
3112         }
3113     }
3114
3115     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3116         av_buffer_unref(&s->pps_list[i]);
3117         if (s0->pps_list[i]) {
3118             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3119             if (!s->pps_list[i])
3120                 return AVERROR(ENOMEM);
3121         }
3122     }
3123
3124     if (s->sps != s0->sps)
3125         ret = set_sps(s, s0->sps);
3126
3127     s->seq_decode = s0->seq_decode;
3128     s->seq_output = s0->seq_output;
3129     s->pocTid0    = s0->pocTid0;
3130     s->max_ra     = s0->max_ra;
3131
3132     s->is_nalff        = s0->is_nalff;
3133     s->nal_length_size = s0->nal_length_size;
3134
3135     if (s0->eos) {
3136         s->seq_decode = (s->seq_decode + 1) & 0xff;
3137         s->max_ra = INT_MAX;
3138     }
3139
3140     return 0;
3141 }
3142
3143 static int hevc_decode_extradata(HEVCContext *s)
3144 {
3145     AVCodecContext *avctx = s->avctx;
3146     GetByteContext gb;
3147     int ret;
3148
3149     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3150
3151     if (avctx->extradata_size > 3 &&
3152         (avctx->extradata[0] || avctx->extradata[1] ||
3153          avctx->extradata[2] > 1)) {
3154         /* It seems the extradata is encoded as hvcC format.
3155          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3156          * is finalized. When finalized, configurationVersion will be 1 and we
3157          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3158         int i, j, num_arrays, nal_len_size;
3159
3160         s->is_nalff = 1;
3161
3162         bytestream2_skip(&gb, 21);
3163         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3164         num_arrays   = bytestream2_get_byte(&gb);
3165
3166         /* nal units in the hvcC always have length coded with 2 bytes,
3167          * so put a fake nal_length_size = 2 while parsing them */
3168         s->nal_length_size = 2;
3169
3170         /* Decode nal units from hvcC. */
3171         for (i = 0; i < num_arrays; i++) {
3172             int type = bytestream2_get_byte(&gb) & 0x3f;
3173             int cnt  = bytestream2_get_be16(&gb);
3174
3175             for (j = 0; j < cnt; j++) {
3176                 // +2 for the nal size field
3177                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3178                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3179                     av_log(s->avctx, AV_LOG_ERROR,
3180                            "Invalid NAL unit size in extradata.\n");
3181                     return AVERROR_INVALIDDATA;
3182                 }
3183
3184                 ret = decode_nal_units(s, gb.buffer, nalsize);
3185                 if (ret < 0) {
3186                     av_log(avctx, AV_LOG_ERROR,
3187                            "Decoding nal unit %d %d from hvcC failed\n",
3188                            type, i);
3189                     return ret;
3190                 }
3191                 bytestream2_skip(&gb, nalsize);
3192             }
3193         }
3194
3195         /* Now store right nal length size, that will be used to parse
3196          * all other nals */
3197         s->nal_length_size = nal_len_size;
3198     } else {
3199         s->is_nalff = 0;
3200         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3201         if (ret < 0)
3202             return ret;
3203     }
3204     return 0;
3205 }
3206
3207 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3208 {
3209     HEVCContext *s = avctx->priv_data;
3210     int ret;
3211
3212     ff_init_cabac_states();
3213
3214     avctx->internal->allocate_progress = 1;
3215
3216     ret = hevc_init_context(avctx);
3217     if (ret < 0)
3218         return ret;
3219
3220     if (avctx->extradata_size > 0 && avctx->extradata) {
3221         ret = hevc_decode_extradata(s);
3222         if (ret < 0) {
3223             hevc_decode_free(avctx);
3224             return ret;
3225         }
3226     }
3227
3228     return 0;
3229 }
3230
3231 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3232 {
3233     HEVCContext *s = avctx->priv_data;
3234     int ret;
3235
3236     memset(s, 0, sizeof(*s));
3237
3238     ret = hevc_init_context(avctx);
3239     if (ret < 0)
3240         return ret;
3241
3242     return 0;
3243 }
3244
3245 static void hevc_decode_flush(AVCodecContext *avctx)
3246 {
3247     HEVCContext *s = avctx->priv_data;
3248     ff_hevc_flush_dpb(s);
3249     s->max_ra = INT_MAX;
3250 }
3251
3252 #define OFFSET(x) offsetof(HEVCContext, x)
3253 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3254
3255 static const AVProfile profiles[] = {
3256     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3257     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3258     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3259     { FF_PROFILE_UNKNOWN },
3260 };
3261
3262 static const AVOption options[] = {
3263     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3264         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3265     { NULL },
3266 };
3267
3268 static const AVClass hevc_decoder_class = {
3269     .class_name = "HEVC decoder",
3270     .item_name  = av_default_item_name,
3271     .option     = options,
3272     .version    = LIBAVUTIL_VERSION_INT,
3273 };
3274
3275 AVCodec ff_hevc_decoder = {
3276     .name                  = "hevc",
3277     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3278     .type                  = AVMEDIA_TYPE_VIDEO,
3279     .id                    = AV_CODEC_ID_HEVC,
3280     .priv_data_size        = sizeof(HEVCContext),
3281     .priv_class            = &hevc_decoder_class,
3282     .init                  = hevc_decode_init,
3283     .close                 = hevc_decode_free,
3284     .decode                = hevc_decode_frame,
3285     .flush                 = hevc_decode_flush,
3286     .update_thread_context = hevc_update_thread_context,
3287     .init_thread_copy      = hevc_init_thread_copy,
3288     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3289                              CODEC_CAP_FRAME_THREADS,
3290     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3291 };