git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb_legacy.h"
  39 #include "hevc.h"
  40 #include "hevc_data.h"
  41 #include "hevcdec.h"
  42 #include "profiles.h"
  43
  44 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
  45 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
  46 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
  47
  48 static const uint8_t scan_1x1[1] = { 0 };
  49
  50 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  51
  52 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  53
  54 static const uint8_t horiz_scan4x4_x[16] = {
  55     0, 1, 2, 3,
  56     0, 1, 2, 3,
  57     0, 1, 2, 3,
  58     0, 1, 2, 3,
  59 };
  60
  61 static const uint8_t horiz_scan4x4_y[16] = {
  62     0, 0, 0, 0,
  63     1, 1, 1, 1,
  64     2, 2, 2, 2,
  65     3, 3, 3, 3,
  66 };
  67
  68 static const uint8_t horiz_scan8x8_inv[8][8] = {
  69     {  0,  1,  2,  3, 16, 17, 18, 19, },
  70     {  4,  5,  6,  7, 20, 21, 22, 23, },
  71     {  8,  9, 10, 11, 24, 25, 26, 27, },
  72     { 12, 13, 14, 15, 28, 29, 30, 31, },
  73     { 32, 33, 34, 35, 48, 49, 50, 51, },
  74     { 36, 37, 38, 39, 52, 53, 54, 55, },
  75     { 40, 41, 42, 43, 56, 57, 58, 59, },
  76     { 44, 45, 46, 47, 60, 61, 62, 63, },
  77 };
  78
  79 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  80
  81 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  82
  83 static const uint8_t diag_scan2x2_inv[2][2] = {
  84     { 0, 2, },
  85     { 1, 3, },
  86 };
  87
  88 static const uint8_t diag_scan4x4_inv[4][4] = {
  89     { 0,  2,  5,  9, },
  90     { 1,  4,  8, 12, },
  91     { 3,  7, 11, 14, },
  92     { 6, 10, 13, 15, },
  93 };
  94
  95 static const uint8_t diag_scan8x8_inv[8][8] = {
  96     {  0,  2,  5,  9, 14, 20, 27, 35, },
  97     {  1,  4,  8, 13, 19, 26, 34, 42, },
  98     {  3,  7, 12, 18, 25, 33, 41, 48, },
  99     {  6, 11, 17, 24, 32, 40, 47, 53, },
 100     { 10, 16, 23, 31, 39, 46, 52, 57, },
 101     { 15, 22, 30, 38, 45, 51, 56, 60, },
 102     { 21, 29, 37, 44, 50, 55, 59, 62, },
 103     { 28, 36, 43, 49, 54, 58, 61, 63, },
 104 };
 105
 106 /**
 107  * NOTE: Each function hls_foo correspond to the function foo in the
 108  * specification (HLS stands for High Level Syntax).
 109  */
 110
 111 /**
 112  * Section 5.7
 113  */
 114
 115 /* free everything allocated  by pic_arrays_init() */
 116 static void pic_arrays_free(HEVCContext *s)
 117 {
 118     av_freep(&s->sao);
 119     av_freep(&s->deblock);
 120
 121     av_freep(&s->skip_flag);
 122     av_freep(&s->tab_ct_depth);
 123
 124     av_freep(&s->tab_ipm);
 125     av_freep(&s->cbf_luma);
 126     av_freep(&s->is_pcm);
 127
 128     av_freep(&s->qp_y_tab);
 129     av_freep(&s->tab_slice_address);
 130     av_freep(&s->filter_slice_edges);
 131
 132     av_freep(&s->horizontal_bs);
 133     av_freep(&s->vertical_bs);
 134
 135     av_buffer_pool_uninit(&s->tab_mvf_pool);
 136     av_buffer_pool_uninit(&s->rpl_tab_pool);
 137 }
 138
 139 /* allocate arrays that depend on frame dimensions */
 140 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 141 {
 142     int log2_min_cb_size = sps->log2_min_cb_size;
 143     int width            = sps->width;
 144     int height           = sps->height;
 145     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 146                            ((height >> log2_min_cb_size) + 1);
 147     int ctb_count        = sps->ctb_width * sps->ctb_height;
 148     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 149
 150     s->bs_width  = width  >> 3;
 151     s->bs_height = height >> 3;
 152
 153     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 154     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 155     if (!s->sao || !s->deblock)
 156         goto fail;
 157
 158     s->skip_flag    = av_malloc(pic_size_in_ctb);
 159     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 160     if (!s->skip_flag || !s->tab_ct_depth)
 161         goto fail;
 162
 163     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 164     s->tab_ipm  = av_mallocz(min_pu_size);
 165     s->is_pcm   = av_malloc(min_pu_size);
 166     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 167         goto fail;
 168
 169     s->filter_slice_edges = av_malloc(ctb_count);
 170     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 171                                       sizeof(*s->tab_slice_address));
 172     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 173                                       sizeof(*s->qp_y_tab));
 174     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 175         goto fail;
 176
 177     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 178     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 179     if (!s->horizontal_bs || !s->vertical_bs)
 180         goto fail;
 181
 182     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 183                                           av_buffer_alloc);
 184     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 185                                           av_buffer_allocz);
 186     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 187         goto fail;
 188
 189     return 0;
 190
 191 fail:
 192     pic_arrays_free(s);
 193     return AVERROR(ENOMEM);
 194 }
 195
 196 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 197 {
 198     int i = 0;
 199     int j = 0;
 200     uint8_t luma_weight_l0_flag[16];
 201     uint8_t chroma_weight_l0_flag[16];
 202     uint8_t luma_weight_l1_flag[16];
 203     uint8_t chroma_weight_l1_flag[16];
 204
 205     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 206     if (s->ps.sps->chroma_format_idc != 0) {
 207         int delta = get_se_golomb(gb);
 208         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 209     }
 210
 211     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 212         luma_weight_l0_flag[i] = get_bits1(gb);
 213         if (!luma_weight_l0_flag[i]) {
 214             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 215             s->sh.luma_offset_l0[i] = 0;
 216         }
 217     }
 218     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 219         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 220             chroma_weight_l0_flag[i] = get_bits1(gb);
 221     } else {
 222         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 223             chroma_weight_l0_flag[i] = 0;
 224     }
 225     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 226         if (luma_weight_l0_flag[i]) {
 227             int delta_luma_weight_l0 = get_se_golomb(gb);
 228             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 229             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 230         }
 231         if (chroma_weight_l0_flag[i]) {
 232             for (j = 0; j < 2; j++) {
 233                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 234                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 235                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 236                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 237                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 238             }
 239         } else {
 240             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 241             s->sh.chroma_offset_l0[i][0] = 0;
 242             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 243             s->sh.chroma_offset_l0[i][1] = 0;
 244         }
 245     }
 246     if (s->sh.slice_type == HEVC_SLICE_B) {
 247         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 248             luma_weight_l1_flag[i] = get_bits1(gb);
 249             if (!luma_weight_l1_flag[i]) {
 250                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 251                 s->sh.luma_offset_l1[i] = 0;
 252             }
 253         }
 254         if (s->ps.sps->chroma_format_idc != 0) {
 255             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 256                 chroma_weight_l1_flag[i] = get_bits1(gb);
 257         } else {
 258             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 259                 chroma_weight_l1_flag[i] = 0;
 260         }
 261         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 262             if (luma_weight_l1_flag[i]) {
 263                 int delta_luma_weight_l1 = get_se_golomb(gb);
 264                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 265                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 266             }
 267             if (chroma_weight_l1_flag[i]) {
 268                 for (j = 0; j < 2; j++) {
 269                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 270                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 271                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 272                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 273                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 274                 }
 275             } else {
 276                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 277                 s->sh.chroma_offset_l1[i][0] = 0;
 278                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 279                 s->sh.chroma_offset_l1[i][1] = 0;
 280             }
 281         }
 282     }
 283 }
 284
 285 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 286 {
 287     const HEVCSPS *sps = s->ps.sps;
 288     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 289     int prev_delta_msb = 0;
 290     unsigned int nb_sps = 0, nb_sh;
 291     int i;
 292
 293     rps->nb_refs = 0;
 294     if (!sps->long_term_ref_pics_present_flag)
 295         return 0;
 296
 297     if (sps->num_long_term_ref_pics_sps > 0)
 298         nb_sps = get_ue_golomb_long(gb);
 299     nb_sh = get_ue_golomb_long(gb);
 300
 301     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 302         return AVERROR_INVALIDDATA;
 303
 304     rps->nb_refs = nb_sh + nb_sps;
 305
 306     for (i = 0; i < rps->nb_refs; i++) {
 307         uint8_t delta_poc_msb_present;
 308
 309         if (i < nb_sps) {
 310             uint8_t lt_idx_sps = 0;
 311
 312             if (sps->num_long_term_ref_pics_sps > 1)
 313                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 314
 315             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 316             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 317         } else {
 318             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 319             rps->used[i] = get_bits1(gb);
 320         }
 321
 322         delta_poc_msb_present = get_bits1(gb);
 323         if (delta_poc_msb_present) {
 324             int delta = get_ue_golomb_long(gb);
 325
 326             if (i && i != nb_sps)
 327                 delta += prev_delta_msb;
 328
 329             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 330             prev_delta_msb = delta;
 331         }
 332     }
 333
 334     return 0;
 335 }
 336
 337 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 338                                  const HEVCSPS *sps)
 339 {
 340     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 341     const HEVCWindow *ow = &sps->output_window;
 342     unsigned int num = 0, den = 0;
 343
 344     avctx->pix_fmt             = sps->pix_fmt;
 345     avctx->coded_width         = sps->width;
 346     avctx->coded_height        = sps->height;
 347     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
 348     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
 349     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 350     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 351     avctx->level               = sps->ptl.general_ptl.level_idc;
 352
 353     ff_set_sar(avctx, sps->vui.sar);
 354
 355     if (sps->vui.video_signal_type_present_flag)
 356         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 357                                                             : AVCOL_RANGE_MPEG;
 358     else
 359         avctx->color_range = AVCOL_RANGE_MPEG;
 360
 361     if (sps->vui.colour_description_present_flag) {
 362         avctx->color_primaries = sps->vui.colour_primaries;
 363         avctx->color_trc       = sps->vui.transfer_characteristic;
 364         avctx->colorspace      = sps->vui.matrix_coeffs;
 365     } else {
 366         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 367         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 368         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 369     }
 370
 371     if (vps->vps_timing_info_present_flag) {
 372         num = vps->vps_num_units_in_tick;
 373         den = vps->vps_time_scale;
 374     } else if (sps->vui.vui_timing_info_present_flag) {
 375         num = sps->vui.vui_num_units_in_tick;
 376         den = sps->vui.vui_time_scale;
 377     }
 378
 379     if (num != 0 && den != 0)
 380         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 381                   num, den, 1 << 30);
 382 }
 383
 384 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 385 {
 386     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
 387                          CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 388     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 389
 390     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 391         sps->pix_fmt == AV_PIX_FMT_YUV420P10) {
 392 #if CONFIG_HEVC_D3D11VA_HWACCEL
 393         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 394         *fmt++ = AV_PIX_FMT_D3D11;
 395 #endif
 396 #if CONFIG_HEVC_DXVA2_HWACCEL
 397         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 398 #endif
 399 #if CONFIG_HEVC_VAAPI_HWACCEL
 400         *fmt++ = AV_PIX_FMT_VAAPI;
 401 #endif
 402     }
 403     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 404 #if CONFIG_HEVC_VDPAU_HWACCEL
 405         *fmt++ = AV_PIX_FMT_VDPAU;
 406 #endif
 407     }
 408
 409     *fmt++ = sps->pix_fmt;
 410     *fmt = AV_PIX_FMT_NONE;
 411
 412     return ff_get_format(s->avctx, pix_fmts);
 413 }
 414
 415 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
 416                    enum AVPixelFormat pix_fmt)
 417 {
 418     int ret;
 419
 420     pic_arrays_free(s);
 421     s->ps.sps = NULL;
 422     s->ps.vps = NULL;
 423
 424     if (!sps)
 425         return 0;
 426
 427     ret = pic_arrays_init(s, sps);
 428     if (ret < 0)
 429         goto fail;
 430
 431     export_stream_params(s->avctx, &s->ps, sps);
 432
 433     s->avctx->pix_fmt = pix_fmt;
 434
 435     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 436     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 437     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 438
 439     if (sps->sao_enabled && !s->avctx->hwaccel) {
 440         av_frame_unref(s->tmp_frame);
 441         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 442         if (ret < 0)
 443             goto fail;
 444         s->frame = s->tmp_frame;
 445     }
 446
 447     s->ps.sps = sps;
 448     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 449
 450     return 0;
 451
 452 fail:
 453     pic_arrays_free(s);
 454     s->ps.sps = NULL;
 455     return ret;
 456 }
 457
 458 static int hls_slice_header(HEVCContext *s)
 459 {
 460     GetBitContext *gb = &s->HEVClc.gb;
 461     SliceHeader *sh   = &s->sh;
 462     int i, ret;
 463
 464     // Coded parameters
 465     sh->first_slice_in_pic_flag = get_bits1(gb);
 466     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 467         s->seq_decode = (s->seq_decode + 1) & 0xff;
 468         s->max_ra     = INT_MAX;
 469         if (IS_IDR(s))
 470             ff_hevc_clear_refs(s);
 471     }
 472     if (IS_IRAP(s))
 473         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 474
 475     sh->pps_id = get_ue_golomb_long(gb);
 476     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 477         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 478         return AVERROR_INVALIDDATA;
 479     }
 480     if (!sh->first_slice_in_pic_flag &&
 481         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 482         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 483         return AVERROR_INVALIDDATA;
 484     }
 485     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 486
 487     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 488         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 489         enum AVPixelFormat pix_fmt;
 490
 491         ff_hevc_clear_refs(s);
 492
 493         ret = set_sps(s, sps, sps->pix_fmt);
 494         if (ret < 0)
 495             return ret;
 496
 497         pix_fmt = get_format(s, sps);
 498         if (pix_fmt < 0)
 499             return pix_fmt;
 500         s->avctx->pix_fmt = pix_fmt;
 501
 502         s->seq_decode = (s->seq_decode + 1) & 0xff;
 503         s->max_ra     = INT_MAX;
 504     }
 505
 506     sh->dependent_slice_segment_flag = 0;
 507     if (!sh->first_slice_in_pic_flag) {
 508         int slice_address_length;
 509
 510         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 511             sh->dependent_slice_segment_flag = get_bits1(gb);
 512
 513         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 514                                             s->ps.sps->ctb_height);
 515         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 516         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 517             av_log(s->avctx, AV_LOG_ERROR,
 518                    "Invalid slice segment address: %u.\n",
 519                    sh->slice_segment_addr);
 520             return AVERROR_INVALIDDATA;
 521         }
 522
 523         if (!sh->dependent_slice_segment_flag) {
 524             sh->slice_addr = sh->slice_segment_addr;
 525             s->slice_idx++;
 526         }
 527     } else {
 528         sh->slice_segment_addr = sh->slice_addr = 0;
 529         s->slice_idx           = 0;
 530         s->slice_initialized   = 0;
 531     }
 532
 533     if (!sh->dependent_slice_segment_flag) {
 534         s->slice_initialized = 0;
 535
 536         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 537             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 538
 539         sh->slice_type = get_ue_golomb_long(gb);
 540         if (!(sh->slice_type == HEVC_SLICE_I ||
 541               sh->slice_type == HEVC_SLICE_P ||
 542               sh->slice_type == HEVC_SLICE_B)) {
 543             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 544                    sh->slice_type);
 545             return AVERROR_INVALIDDATA;
 546         }
 547         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
 548             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 549             return AVERROR_INVALIDDATA;
 550         }
 551
 552         // when flag is not present, picture is inferred to be output
 553         sh->pic_output_flag = 1;
 554         if (s->ps.pps->output_flag_present_flag)
 555             sh->pic_output_flag = get_bits1(gb);
 556
 557         if (s->ps.sps->separate_colour_plane_flag)
 558             sh->colour_plane_id = get_bits(gb, 2);
 559
 560         if (!IS_IDR(s)) {
 561             int poc, pos;
 562
 563             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 564             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 565             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 566                 av_log(s->avctx, AV_LOG_WARNING,
 567                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 568                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 569                     return AVERROR_INVALIDDATA;
 570                 poc = s->poc;
 571             }
 572             s->poc = poc;
 573
 574             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 575             pos = get_bits_left(gb);
 576             if (!sh->short_term_ref_pic_set_sps_flag) {
 577                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 578                 if (ret < 0)
 579                     return ret;
 580
 581                 sh->short_term_rps = &sh->slice_rps;
 582             } else {
 583                 int numbits, rps_idx;
 584
 585                 if (!s->ps.sps->nb_st_rps) {
 586                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 587                     return AVERROR_INVALIDDATA;
 588                 }
 589
 590                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 591                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 592                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 593             }
 594             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 595
 596             pos = get_bits_left(gb);
 597             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 598             if (ret < 0) {
 599                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 600                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 601                     return AVERROR_INVALIDDATA;
 602             }
 603             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 604
 605             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 606                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 607             else
 608                 sh->slice_temporal_mvp_enabled_flag = 0;
 609         } else {
 610             s->sh.short_term_rps = NULL;
 611             s->poc               = 0;
 612         }
 613
 614         /* 8.3.1 */
 615         if (s->temporal_id == 0 &&
 616             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 617             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 618             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 619             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 620             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 621             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 622             s->nal_unit_type != HEVC_NAL_RASL_R)
 623             s->pocTid0 = s->poc;
 624
 625         if (s->ps.sps->sao_enabled) {
 626             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 627             sh->slice_sample_adaptive_offset_flag[1] =
 628             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 629         } else {
 630             sh->slice_sample_adaptive_offset_flag[0] = 0;
 631             sh->slice_sample_adaptive_offset_flag[1] = 0;
 632             sh->slice_sample_adaptive_offset_flag[2] = 0;
 633         }
 634
 635         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 636         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
 637             int nb_refs;
 638
 639             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 640             if (sh->slice_type == HEVC_SLICE_B)
 641                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 642
 643             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 644                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 645                 if (sh->slice_type == HEVC_SLICE_B)
 646                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 647             }
 648             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 649                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 650                        sh->nb_refs[L0], sh->nb_refs[L1]);
 651                 return AVERROR_INVALIDDATA;
 652             }
 653
 654             sh->rpl_modification_flag[0] = 0;
 655             sh->rpl_modification_flag[1] = 0;
 656             nb_refs = ff_hevc_frame_nb_refs(s);
 657             if (!nb_refs) {
 658                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 659                 return AVERROR_INVALIDDATA;
 660             }
 661
 662             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 663                 sh->rpl_modification_flag[0] = get_bits1(gb);
 664                 if (sh->rpl_modification_flag[0]) {
 665                     for (i = 0; i < sh->nb_refs[L0]; i++)
 666                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 667                 }
 668
 669                 if (sh->slice_type == HEVC_SLICE_B) {
 670                     sh->rpl_modification_flag[1] = get_bits1(gb);
 671                     if (sh->rpl_modification_flag[1] == 1)
 672                         for (i = 0; i < sh->nb_refs[L1]; i++)
 673                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 674                 }
 675             }
 676
 677             if (sh->slice_type == HEVC_SLICE_B)
 678                 sh->mvd_l1_zero_flag = get_bits1(gb);
 679
 680             if (s->ps.pps->cabac_init_present_flag)
 681                 sh->cabac_init_flag = get_bits1(gb);
 682             else
 683                 sh->cabac_init_flag = 0;
 684
 685             sh->collocated_ref_idx = 0;
 686             if (sh->slice_temporal_mvp_enabled_flag) {
 687                 sh->collocated_list = L0;
 688                 if (sh->slice_type == HEVC_SLICE_B)
 689                     sh->collocated_list = !get_bits1(gb);
 690
 691                 if (sh->nb_refs[sh->collocated_list] > 1) {
 692                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 693                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 694                         av_log(s->avctx, AV_LOG_ERROR,
 695                                "Invalid collocated_ref_idx: %d.\n",
 696                                sh->collocated_ref_idx);
 697                         return AVERROR_INVALIDDATA;
 698                     }
 699                 }
 700             }
 701
 702             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
 703                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
 704                 pred_weight_table(s, gb);
 705             }
 706
 707             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 708             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 709                 av_log(s->avctx, AV_LOG_ERROR,
 710                        "Invalid number of merging MVP candidates: %d.\n",
 711                        sh->max_num_merge_cand);
 712                 return AVERROR_INVALIDDATA;
 713             }
 714         }
 715
 716         sh->slice_qp_delta = get_se_golomb(gb);
 717
 718         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 719             sh->slice_cb_qp_offset = get_se_golomb(gb);
 720             sh->slice_cr_qp_offset = get_se_golomb(gb);
 721         } else {
 722             sh->slice_cb_qp_offset = 0;
 723             sh->slice_cr_qp_offset = 0;
 724         }
 725
 726         if (s->ps.pps->deblocking_filter_control_present_flag) {
 727             int deblocking_filter_override_flag = 0;
 728
 729             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 730                 deblocking_filter_override_flag = get_bits1(gb);
 731
 732             if (deblocking_filter_override_flag) {
 733                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 734                 if (!sh->disable_deblocking_filter_flag) {
 735                     sh->beta_offset = get_se_golomb(gb) * 2;
 736                     sh->tc_offset   = get_se_golomb(gb) * 2;
 737                 }
 738             } else {
 739                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 740                 sh->beta_offset                    = s->ps.pps->beta_offset;
 741                 sh->tc_offset                      = s->ps.pps->tc_offset;
 742             }
 743         } else {
 744             sh->disable_deblocking_filter_flag = 0;
 745             sh->beta_offset                    = 0;
 746             sh->tc_offset                      = 0;
 747         }
 748
 749         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 750             (sh->slice_sample_adaptive_offset_flag[0] ||
 751              sh->slice_sample_adaptive_offset_flag[1] ||
 752              !sh->disable_deblocking_filter_flag)) {
 753             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 754         } else {
 755             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 756         }
 757     } else if (!s->slice_initialized) {
 758         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 759         return AVERROR_INVALIDDATA;
 760     }
 761
 762     sh->num_entry_point_offsets = 0;
 763     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 764         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 765         if (sh->num_entry_point_offsets > 0) {
 766             int offset_len = get_ue_golomb_long(gb) + 1;
 767
 768             for (i = 0; i < sh->num_entry_point_offsets; i++)
 769                 skip_bits(gb, offset_len);
 770         }
 771     }
 772
 773     if (s->ps.pps->slice_header_extension_present_flag) {
 774         unsigned int length = get_ue_golomb_long(gb);
 775         for (i = 0; i < length; i++)
 776             skip_bits(gb, 8);  // slice_header_extension_data_byte
 777     }
 778
 779     // Inferred parameters
 780     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 781     if (sh->slice_qp > 51 ||
 782         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 783         av_log(s->avctx, AV_LOG_ERROR,
 784                "The slice_qp %d is outside the valid range "
 785                "[%d, 51].\n",
 786                sh->slice_qp,
 787                -s->ps.sps->qp_bd_offset);
 788         return AVERROR_INVALIDDATA;
 789     }
 790
 791     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 792
 793     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 794         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 795         return AVERROR_INVALIDDATA;
 796     }
 797
 798     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 799
 800     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 801         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
 802                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 803
 804     s->slice_initialized = 1;
 805
 806     return 0;
 807 }
 808
 809 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 810
 811 #define SET_SAO(elem, value)                            \
 812 do {                                                    \
 813     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 814         sao->elem = value;                              \
 815     else if (sao_merge_left_flag)                       \
 816         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 817     else if (sao_merge_up_flag)                         \
 818         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 819     else                                                \
 820         sao->elem = 0;                                  \
 821 } while (0)
 822
 823 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 824 {
 825     HEVCLocalContext *lc    = &s->HEVClc;
 826     int sao_merge_left_flag = 0;
 827     int sao_merge_up_flag   = 0;
 828     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
 829     SAOParams *sao          = &CTB(s->sao, rx, ry);
 830     int c_idx, i;
 831
 832     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 833         s->sh.slice_sample_adaptive_offset_flag[1]) {
 834         if (rx > 0) {
 835             if (lc->ctb_left_flag)
 836                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 837         }
 838         if (ry > 0 && !sao_merge_left_flag) {
 839             if (lc->ctb_up_flag)
 840                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 841         }
 842     }
 843
 844     for (c_idx = 0; c_idx < 3; c_idx++) {
 845         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 846             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 847             continue;
 848         }
 849
 850         if (c_idx == 2) {
 851             sao->type_idx[2] = sao->type_idx[1];
 852             sao->eo_class[2] = sao->eo_class[1];
 853         } else {
 854             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 855         }
 856
 857         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 858             continue;
 859
 860         for (i = 0; i < 4; i++)
 861             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 862
 863         if (sao->type_idx[c_idx] == SAO_BAND) {
 864             for (i = 0; i < 4; i++) {
 865                 if (sao->offset_abs[c_idx][i]) {
 866                     SET_SAO(offset_sign[c_idx][i],
 867                             ff_hevc_sao_offset_sign_decode(s));
 868                 } else {
 869                     sao->offset_sign[c_idx][i] = 0;
 870                 }
 871             }
 872             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 873         } else if (c_idx != 2) {
 874             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 875         }
 876
 877         // Inferred parameters
 878         sao->offset_val[c_idx][0] = 0;
 879         for (i = 0; i < 4; i++) {
 880             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 881             if (sao->type_idx[c_idx] == SAO_EDGE) {
 882                 if (i > 1)
 883                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 884             } else if (sao->offset_sign[c_idx][i]) {
 885                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 886             }
 887         }
 888     }
 889 }
 890
 891 #undef SET_SAO
 892 #undef CTB
 893
 894 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 895                                 int log2_trafo_size, enum ScanType scan_idx,
 896                                 int c_idx)
 897 {
 898 #define GET_COORD(offset, n)                                    \
 899     do {                                                        \
 900         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 901         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 902     } while (0)
 903     HEVCLocalContext *lc    = &s->HEVClc;
 904     int transform_skip_flag = 0;
 905
 906     int last_significant_coeff_x, last_significant_coeff_y;
 907     int last_scan_pos;
 908     int n_end;
 909     int num_coeff    = 0;
 910     int greater1_ctx = 1;
 911
 912     int num_last_subset;
 913     int x_cg_last_sig, y_cg_last_sig;
 914
 915     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 916
 917     ptrdiff_t stride = s->frame->linesize[c_idx];
 918     int hshift       = s->ps.sps->hshift[c_idx];
 919     int vshift       = s->ps.sps->vshift[c_idx];
 920     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 921                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
 922     LOCAL_ALIGNED_32(int16_t, coeffs, [MAX_TB_SIZE * MAX_TB_SIZE]);
 923     LOCAL_ALIGNED_8(uint8_t, significant_coeff_group_flag, [8], [8]);
 924
 925     int trafo_size = 1 << log2_trafo_size;
 926     int i, qp, shift, add, scale, scale_m;
 927     static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 928     const uint8_t *scale_matrix;
 929     uint8_t dc_scale;
 930
 931     memset(coeffs, 0, sizeof(int16_t) * MAX_TB_SIZE * MAX_TB_SIZE);
 932     memset(significant_coeff_group_flag, 0, sizeof(uint8_t) * 8 * 8);
 933     // Derive QP for dequant
 934     if (!lc->cu.cu_transquant_bypass_flag) {
 935         static const int qp_c[] = {
 936             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 937         };
 938
 939         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 940             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 941             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 942             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 943         };
 944
 945         static const uint8_t div6[51 + 2 * 6 + 1] = {
 946             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 947             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 948             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 949         };
 950         int qp_y = lc->qp_y;
 951
 952         if (c_idx == 0) {
 953             qp = qp_y + s->ps.sps->qp_bd_offset;
 954         } else {
 955             int qp_i, offset;
 956
 957             if (c_idx == 1)
 958                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 959             else
 960                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 961
 962             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
 963             if (qp_i < 30)
 964                 qp = qp_i;
 965             else if (qp_i > 43)
 966                 qp = qp_i - 6;
 967             else
 968                 qp = qp_c[qp_i - 30];
 969
 970             qp += s->ps.sps->qp_bd_offset;
 971         }
 972
 973         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
 974         add      = 1 << (shift - 1);
 975         scale    = level_scale[rem6[qp]] << (div6[qp]);
 976         scale_m  = 16; // default when no custom scaling lists.
 977         dc_scale = 16;
 978
 979         if (s->ps.sps->scaling_list_enable_flag) {
 980             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
 981                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
 982             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 983
 984             if (log2_trafo_size != 5)
 985                 matrix_id = 3 * matrix_id + c_idx;
 986
 987             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 988             if (log2_trafo_size >= 4)
 989                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 990         }
 991     }
 992
 993     if (s->ps.pps->transform_skip_enabled_flag &&
 994         !lc->cu.cu_transquant_bypass_flag   &&
 995         log2_trafo_size == 2) {
 996         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 997     }
 998
 999     last_significant_coeff_x =
1000         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
1001     last_significant_coeff_y =
1002         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
1003
1004     if (last_significant_coeff_x > 3) {
1005         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
1006         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
1007                                    (2 + (last_significant_coeff_x & 1)) +
1008                                    suffix;
1009     }
1010
1011     if (last_significant_coeff_y > 3) {
1012         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1013         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1014                                    (2 + (last_significant_coeff_y & 1)) +
1015                                    suffix;
1016     }
1017
1018     if (scan_idx == SCAN_VERT)
1019         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1020
1021     x_cg_last_sig = last_significant_coeff_x >> 2;
1022     y_cg_last_sig = last_significant_coeff_y >> 2;
1023
1024     switch (scan_idx) {
1025     case SCAN_DIAG: {
1026         int last_x_c = last_significant_coeff_x & 3;
1027         int last_y_c = last_significant_coeff_y & 3;
1028
1029         scan_x_off = ff_hevc_diag_scan4x4_x;
1030         scan_y_off = ff_hevc_diag_scan4x4_y;
1031         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1032         if (trafo_size == 4) {
1033             scan_x_cg = scan_1x1;
1034             scan_y_cg = scan_1x1;
1035         } else if (trafo_size == 8) {
1036             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1037             scan_x_cg  = diag_scan2x2_x;
1038             scan_y_cg  = diag_scan2x2_y;
1039         } else if (trafo_size == 16) {
1040             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1041             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1042             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1043         } else { // trafo_size == 32
1044             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1045             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1046             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1047         }
1048         break;
1049     }
1050     case SCAN_HORIZ:
1051         scan_x_cg  = horiz_scan2x2_x;
1052         scan_y_cg  = horiz_scan2x2_y;
1053         scan_x_off = horiz_scan4x4_x;
1054         scan_y_off = horiz_scan4x4_y;
1055         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1056         break;
1057     default: //SCAN_VERT
1058         scan_x_cg  = horiz_scan2x2_y;
1059         scan_y_cg  = horiz_scan2x2_x;
1060         scan_x_off = horiz_scan4x4_y;
1061         scan_y_off = horiz_scan4x4_x;
1062         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1063         break;
1064     }
1065     num_coeff++;
1066     num_last_subset = (num_coeff - 1) >> 4;
1067
1068     for (i = num_last_subset; i >= 0; i--) {
1069         int n, m;
1070         int x_cg, y_cg, x_c, y_c;
1071         int implicit_non_zero_coeff = 0;
1072         int64_t trans_coeff_level;
1073         int prev_sig = 0;
1074         int offset   = i << 4;
1075
1076         uint8_t significant_coeff_flag_idx[16];
1077         uint8_t nb_significant_coeff_flag = 0;
1078
1079         x_cg = scan_x_cg[i];
1080         y_cg = scan_y_cg[i];
1081
1082         if (i < num_last_subset && i > 0) {
1083             int ctx_cg = 0;
1084             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1085                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1086             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1087                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1088
1089             significant_coeff_group_flag[x_cg][y_cg] =
1090                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1091             implicit_non_zero_coeff = 1;
1092         } else {
1093             significant_coeff_group_flag[x_cg][y_cg] =
1094                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1095                  (x_cg == 0 && y_cg == 0));
1096         }
1097
1098         last_scan_pos = num_coeff - offset - 1;
1099
1100         if (i == num_last_subset) {
1101             n_end                         = last_scan_pos - 1;
1102             significant_coeff_flag_idx[0] = last_scan_pos;
1103             nb_significant_coeff_flag     = 1;
1104         } else {
1105             n_end = 15;
1106         }
1107
1108         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1109             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1110         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1111             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1112
1113         for (n = n_end; n >= 0; n--) {
1114             GET_COORD(offset, n);
1115
1116             if (significant_coeff_group_flag[x_cg][y_cg] &&
1117                 (n > 0 || implicit_non_zero_coeff == 0)) {
1118                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1119                                                           log2_trafo_size,
1120                                                           scan_idx,
1121                                                           prev_sig) == 1) {
1122                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1123                     nb_significant_coeff_flag++;
1124                     implicit_non_zero_coeff = 0;
1125                 }
1126             } else {
1127                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1128                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1129                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1130                     nb_significant_coeff_flag++;
1131                 }
1132             }
1133         }
1134
1135         n_end = nb_significant_coeff_flag;
1136
1137         if (n_end) {
1138             int first_nz_pos_in_cg = 16;
1139             int last_nz_pos_in_cg = -1;
1140             int c_rice_param = 0;
1141             int first_greater1_coeff_idx = -1;
1142             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1143             uint16_t coeff_sign_flag;
1144             int sum_abs = 0;
1145             int sign_hidden = 0;
1146
1147             // initialize first elem of coeff_bas_level_greater1_flag
1148             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1149
1150             if (!(i == num_last_subset) && greater1_ctx == 0)
1151                 ctx_set++;
1152             greater1_ctx      = 1;
1153             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1154
1155             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1156                 int n_idx = significant_coeff_flag_idx[m];
1157                 int inc   = (ctx_set << 2) + greater1_ctx;
1158                 coeff_abs_level_greater1_flag[n_idx] =
1159                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1160                 if (coeff_abs_level_greater1_flag[n_idx]) {
1161                     greater1_ctx = 0;
1162                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1163                     greater1_ctx++;
1164                 }
1165
1166                 if (coeff_abs_level_greater1_flag[n_idx] &&
1167                     first_greater1_coeff_idx == -1)
1168                     first_greater1_coeff_idx = n_idx;
1169             }
1170             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1171             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1172                                  !lc->cu.cu_transquant_bypass_flag;
1173
1174             if (first_greater1_coeff_idx != -1) {
1175                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1176             }
1177             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1178                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1179             } else {
1180                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1181             }
1182
1183             for (m = 0; m < n_end; m++) {
1184                 n = significant_coeff_flag_idx[m];
1185                 GET_COORD(offset, n);
1186                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1187                 if (trans_coeff_level == ((m < 8) ?
1188                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1189                     trans_coeff_level += ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1190                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1191                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1192                 }
1193                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1194                     sum_abs += trans_coeff_level;
1195                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1196                         trans_coeff_level = -trans_coeff_level;
1197                 }
1198                 if (coeff_sign_flag >> 15)
1199                     trans_coeff_level = -trans_coeff_level;
1200                 coeff_sign_flag <<= 1;
1201                 if (!lc->cu.cu_transquant_bypass_flag) {
1202                     if (s->ps.sps->scaling_list_enable_flag) {
1203                         if (y_c || x_c || log2_trafo_size < 4) {
1204                             int pos;
1205                             switch (log2_trafo_size) {
1206                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1207                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1208                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1209                             default: pos = (y_c        << 2) +  x_c;
1210                             }
1211                             scale_m = scale_matrix[pos];
1212                         } else {
1213                             scale_m = dc_scale;
1214                         }
1215                     }
1216                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1217                     if(trans_coeff_level < 0) {
1218                         if((~trans_coeff_level) & 0xFffffffffff8000)
1219                             trans_coeff_level = -32768;
1220                     } else {
1221                         if (trans_coeff_level & 0xffffffffffff8000)
1222                             trans_coeff_level = 32767;
1223                     }
1224                 }
1225                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1226             }
1227         }
1228     }
1229
1230     if (!lc->cu.cu_transquant_bypass_flag) {
1231         if (transform_skip_flag)
1232             s->hevcdsp.dequant(coeffs);
1233         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1234                  log2_trafo_size == 2)
1235             s->hevcdsp.transform_4x4_luma(coeffs);
1236         else {
1237             int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
1238             if (max_xy == 0)
1239                 s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs);
1240             else {
1241                 int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
1242                 if (max_xy < 4)
1243                     col_limit = FFMIN(4, col_limit);
1244                 else if (max_xy < 8)
1245                     col_limit = FFMIN(8, col_limit);
1246                 else if (max_xy < 12)
1247                     col_limit = FFMIN(24, col_limit);
1248                 s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit);
1249             }
1250         }
1251     }
1252     s->hevcdsp.add_residual[log2_trafo_size - 2](dst, coeffs, stride);
1253 }
1254
1255 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1256                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1257                               int log2_cb_size, int log2_trafo_size,
1258                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1259 {
1260     HEVCLocalContext *lc = &s->HEVClc;
1261
1262     if (lc->cu.pred_mode == MODE_INTRA) {
1263         int trafo_size = 1 << log2_trafo_size;
1264         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1265
1266         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1267         if (log2_trafo_size > 2) {
1268             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1269             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1270             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1271             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1272         } else if (blk_idx == 3) {
1273             trafo_size = trafo_size << s->ps.sps->hshift[1];
1274             ff_hevc_set_neighbour_available(s, xBase, yBase,
1275                                             trafo_size, trafo_size);
1276             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1277             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1278         }
1279     }
1280
1281     if (cbf_luma || cbf_cb || cbf_cr) {
1282         int scan_idx   = SCAN_DIAG;
1283         int scan_idx_c = SCAN_DIAG;
1284
1285         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1286             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1287             if (lc->tu.cu_qp_delta != 0)
1288                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1289                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1290             lc->tu.is_cu_qp_delta_coded = 1;
1291
1292             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1293                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1294                 av_log(s->avctx, AV_LOG_ERROR,
1295                        "The cu_qp_delta %d is outside the valid range "
1296                        "[%d, %d].\n",
1297                        lc->tu.cu_qp_delta,
1298                        -(26 + s->ps.sps->qp_bd_offset / 2),
1299                         (25 + s->ps.sps->qp_bd_offset / 2));
1300                 return AVERROR_INVALIDDATA;
1301             }
1302
1303             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1304         }
1305
1306         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1307             if (lc->tu.cur_intra_pred_mode >= 6 &&
1308                 lc->tu.cur_intra_pred_mode <= 14) {
1309                 scan_idx = SCAN_VERT;
1310             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1311                        lc->tu.cur_intra_pred_mode <= 30) {
1312                 scan_idx = SCAN_HORIZ;
1313             }
1314
1315             if (lc->pu.intra_pred_mode_c >=  6 &&
1316                 lc->pu.intra_pred_mode_c <= 14) {
1317                 scan_idx_c = SCAN_VERT;
1318             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1319                        lc->pu.intra_pred_mode_c <= 30) {
1320                 scan_idx_c = SCAN_HORIZ;
1321             }
1322         }
1323
1324         if (cbf_luma)
1325             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1326         if (log2_trafo_size > 2) {
1327             if (cbf_cb)
1328                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1329             if (cbf_cr)
1330                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1331         } else if (blk_idx == 3) {
1332             if (cbf_cb)
1333                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1334             if (cbf_cr)
1335                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1336         }
1337     }
1338     return 0;
1339 }
1340
1341 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1342 {
1343     int cb_size          = 1 << log2_cb_size;
1344     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1345
1346     int min_pu_width     = s->ps.sps->min_pu_width;
1347     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1348     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1349     int i, j;
1350
1351     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1352         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1353             s->is_pcm[i + j * min_pu_width] = 2;
1354 }
1355
1356 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1357                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1358                               int log2_cb_size, int log2_trafo_size,
1359                               int trafo_depth, int blk_idx,
1360                               int cbf_cb, int cbf_cr)
1361 {
1362     HEVCLocalContext *lc = &s->HEVClc;
1363     uint8_t split_transform_flag;
1364     int ret;
1365
1366     if (lc->cu.intra_split_flag) {
1367         if (trafo_depth == 1)
1368             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1369     } else {
1370         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1371     }
1372
1373     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1374         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1375         trafo_depth     < lc->cu.max_trafo_depth       &&
1376         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1377         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1378     } else {
1379         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1380                           lc->cu.pred_mode == MODE_INTER &&
1381                           lc->cu.part_mode != PART_2Nx2N &&
1382                           trafo_depth == 0;
1383
1384         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1385                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1386                                inter_split;
1387     }
1388
1389     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1390         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1391     else if (log2_trafo_size > 2 || trafo_depth == 0)
1392         cbf_cb = 0;
1393     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1394         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1395     else if (log2_trafo_size > 2 || trafo_depth == 0)
1396         cbf_cr = 0;
1397
1398     if (split_transform_flag) {
1399         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1400         const int x1 = x0 + trafo_size_split;
1401         const int y1 = y0 + trafo_size_split;
1402
1403 #define SUBDIVIDE(x, y, idx)                                                    \
1404 do {                                                                            \
1405     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1406                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1407                              cbf_cb, cbf_cr);                                   \
1408     if (ret < 0)                                                                \
1409         return ret;                                                             \
1410 } while (0)
1411
1412         SUBDIVIDE(x0, y0, 0);
1413         SUBDIVIDE(x1, y0, 1);
1414         SUBDIVIDE(x0, y1, 2);
1415         SUBDIVIDE(x1, y1, 3);
1416
1417 #undef SUBDIVIDE
1418     } else {
1419         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1420         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1421         int min_tu_width     = s->ps.sps->min_tb_width;
1422         int cbf_luma         = 1;
1423
1424         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1425             cbf_cb || cbf_cr)
1426             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1427
1428         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1429                                  log2_cb_size, log2_trafo_size,
1430                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1431         if (ret < 0)
1432             return ret;
1433         // TODO: store cbf_luma somewhere else
1434         if (cbf_luma) {
1435             int i, j;
1436             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1437                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1438                     int x_tu = (x0 + j) >> log2_min_tu_size;
1439                     int y_tu = (y0 + i) >> log2_min_tu_size;
1440                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1441                 }
1442         }
1443         if (!s->sh.disable_deblocking_filter_flag) {
1444             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1445             if (s->ps.pps->transquant_bypass_enable_flag &&
1446                 lc->cu.cu_transquant_bypass_flag)
1447                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1448         }
1449     }
1450     return 0;
1451 }
1452
1453 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1454 {
1455     //TODO: non-4:2:0 support
1456     HEVCLocalContext *lc = &s->HEVClc;
1457     GetBitContext gb;
1458     int cb_size   = 1 << log2_cb_size;
1459     ptrdiff_t stride0 = s->frame->linesize[0];
1460     ptrdiff_t stride1 = s->frame->linesize[1];
1461     ptrdiff_t stride2 = s->frame->linesize[2];
1462     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1463     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1464     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1465
1466     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1467     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1468     int ret;
1469
1470     if (!s->sh.disable_deblocking_filter_flag)
1471         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1472
1473     ret = init_get_bits(&gb, pcm, length);
1474     if (ret < 0)
1475         return ret;
1476
1477     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1478     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1479     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1480     return 0;
1481 }
1482
1483 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1484 {
1485     HEVCLocalContext *lc = &s->HEVClc;
1486     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1487     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1488
1489     if (x)
1490         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1491     if (y)
1492         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1493
1494     switch (x) {
1495     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1496     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1497     case 0: lc->pu.mvd.x = 0;                               break;
1498     }
1499
1500     switch (y) {
1501     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1502     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1503     case 0: lc->pu.mvd.y = 0;                               break;
1504     }
1505 }
1506
1507 /**
1508  * 8.5.3.2.2.1 Luma sample interpolation process
1509  *
1510  * @param s HEVC decoding context
1511  * @param dst target buffer for block data at block position
1512  * @param dststride stride of the dst buffer
1513  * @param ref reference picture buffer at origin (0, 0)
1514  * @param mv motion vector (relative to block position) to get pixel data from
1515  * @param x_off horizontal position of block from origin (0, 0)
1516  * @param y_off vertical position of block from origin (0, 0)
1517  * @param block_w width of block
1518  * @param block_h height of block
1519  */
1520 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1521                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1522                     int block_w, int block_h, int pred_idx)
1523 {
1524     HEVCLocalContext *lc = &s->HEVClc;
1525     uint8_t *src         = ref->data[0];
1526     ptrdiff_t srcstride  = ref->linesize[0];
1527     int pic_width        = s->ps.sps->width;
1528     int pic_height       = s->ps.sps->height;
1529
1530     int mx         = mv->x & 3;
1531     int my         = mv->y & 3;
1532     int extra_left = ff_hevc_qpel_extra_before[mx];
1533     int extra_top  = ff_hevc_qpel_extra_before[my];
1534
1535     x_off += mv->x >> 2;
1536     y_off += mv->y >> 2;
1537     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1538
1539     if (x_off < extra_left || y_off < extra_top ||
1540         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1541         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1542         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1543         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1544         int buf_offset = extra_top *
1545                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1546
1547         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1548                                  edge_emu_stride, srcstride,
1549                                  block_w + ff_hevc_qpel_extra[mx],
1550                                  block_h + ff_hevc_qpel_extra[my],
1551                                  x_off - extra_left, y_off - extra_top,
1552                                  pic_width, pic_height);
1553         src = lc->edge_emu_buffer + buf_offset;
1554         srcstride = edge_emu_stride;
1555     }
1556     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1557                                                    block_h, mx, my, lc->mc_buffer);
1558 }
1559
1560 /**
1561  * 8.5.3.2.2.2 Chroma sample interpolation process
1562  *
1563  * @param s HEVC decoding context
1564  * @param dst1 target buffer for block data at block position (U plane)
1565  * @param dst2 target buffer for block data at block position (V plane)
1566  * @param dststride stride of the dst1 and dst2 buffers
1567  * @param ref reference picture buffer at origin (0, 0)
1568  * @param mv motion vector (relative to block position) to get pixel data from
1569  * @param x_off horizontal position of block from origin (0, 0)
1570  * @param y_off vertical position of block from origin (0, 0)
1571  * @param block_w width of block
1572  * @param block_h height of block
1573  */
1574 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1575                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1576                       int x_off, int y_off, int block_w, int block_h, int pred_idx)
1577 {
1578     HEVCLocalContext *lc = &s->HEVClc;
1579     uint8_t *src1        = ref->data[1];
1580     uint8_t *src2        = ref->data[2];
1581     ptrdiff_t src1stride = ref->linesize[1];
1582     ptrdiff_t src2stride = ref->linesize[2];
1583     int pic_width        = s->ps.sps->width >> 1;
1584     int pic_height       = s->ps.sps->height >> 1;
1585
1586     int mx = mv->x & 7;
1587     int my = mv->y & 7;
1588
1589     x_off += mv->x >> 3;
1590     y_off += mv->y >> 3;
1591     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1592     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1593
1594     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1595         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1596         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1597         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1598         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1599         int buf_offset1 = EPEL_EXTRA_BEFORE *
1600                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1601         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1602         int buf_offset2 = EPEL_EXTRA_BEFORE *
1603                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1604
1605         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1606                                  edge_emu_stride, src1stride,
1607                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1608                                  x_off - EPEL_EXTRA_BEFORE,
1609                                  y_off - EPEL_EXTRA_BEFORE,
1610                                  pic_width, pic_height);
1611
1612         src1 = lc->edge_emu_buffer + buf_offset1;
1613         src1stride = edge_emu_stride;
1614         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1615                                                        block_h, mx, my, lc->mc_buffer);
1616
1617         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1618                                  edge_emu_stride, src2stride,
1619                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1620                                  x_off - EPEL_EXTRA_BEFORE,
1621                                  y_off - EPEL_EXTRA_BEFORE,
1622                                  pic_width, pic_height);
1623         src2 = lc->edge_emu_buffer + buf_offset2;
1624         src2stride = edge_emu_stride;
1625
1626         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1627                                                        block_h, mx, my, lc->mc_buffer);
1628     } else {
1629         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1630                                                        block_h, mx, my, lc->mc_buffer);
1631         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1632                                                        block_h, mx, my, lc->mc_buffer);
1633     }
1634 }
1635
1636 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1637                                 const Mv *mv, int y0, int height)
1638 {
1639     int y = (mv->y >> 2) + y0 + height + 9;
1640     ff_thread_await_progress(&ref->tf, y, 0);
1641 }
1642
1643 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1644                                   int nPbH, int log2_cb_size, int part_idx,
1645                                   int merge_idx, MvField *mv)
1646 {
1647     HEVCLocalContext *lc             = &s->HEVClc;
1648     enum InterPredIdc inter_pred_idc = PRED_L0;
1649     int mvp_flag;
1650
1651     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1652     if (s->sh.slice_type == HEVC_SLICE_B)
1653         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1654
1655     if (inter_pred_idc != PRED_L1) {
1656         if (s->sh.nb_refs[L0])
1657             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1658
1659         mv->pred_flag[0] = 1;
1660         hls_mvd_coding(s, x0, y0, 0);
1661         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1662         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1663                                  part_idx, merge_idx, mv, mvp_flag, 0);
1664         mv->mv[0].x += lc->pu.mvd.x;
1665         mv->mv[0].y += lc->pu.mvd.y;
1666     }
1667
1668     if (inter_pred_idc != PRED_L0) {
1669         if (s->sh.nb_refs[L1])
1670             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1671
1672         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1673             AV_ZERO32(&lc->pu.mvd);
1674         } else {
1675             hls_mvd_coding(s, x0, y0, 1);
1676         }
1677
1678         mv->pred_flag[1] = 1;
1679         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1680         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1681                                  part_idx, merge_idx, mv, mvp_flag, 1);
1682         mv->mv[1].x += lc->pu.mvd.x;
1683         mv->mv[1].y += lc->pu.mvd.y;
1684     }
1685 }
1686
1687 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1688                                 int nPbW, int nPbH,
1689                                 int log2_cb_size, int partIdx)
1690 {
1691     static const int pred_indices[] = {
1692         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1693     };
1694     const int pred_idx = pred_indices[nPbW];
1695
1696 #define POS(c_idx, x, y)                                                              \
1697     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1698                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1699     HEVCLocalContext *lc = &s->HEVClc;
1700     int merge_idx = 0;
1701     struct MvField current_mv = {{{ 0 }}};
1702
1703     int min_pu_width = s->ps.sps->min_pu_width;
1704     int weighted_pred = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1705                         (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1706
1707     MvField *tab_mvf = s->ref->tab_mvf;
1708     RefPicList  *refPicList = s->ref->refPicList;
1709     HEVCFrame *ref0, *ref1;
1710
1711     ptrdiff_t tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1712
1713     uint8_t *dst0 = POS(0, x0, y0);
1714     uint8_t *dst1 = POS(1, x0, y0);
1715     uint8_t *dst2 = POS(2, x0, y0);
1716     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1717     int min_cb_width     = s->ps.sps->min_cb_width;
1718     int x_cb             = x0 >> log2_min_cb_size;
1719     int y_cb             = y0 >> log2_min_cb_size;
1720     int x_pu, y_pu;
1721     int i, j;
1722
1723     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1724
1725     if (!skip_flag)
1726         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1727
1728     if (skip_flag || lc->pu.merge_flag) {
1729         if (s->sh.max_num_merge_cand > 1)
1730             merge_idx = ff_hevc_merge_idx_decode(s);
1731         else
1732             merge_idx = 0;
1733
1734         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1735                                    partIdx, merge_idx, &current_mv);
1736     } else {
1737         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1738                               partIdx, merge_idx, &current_mv);
1739     }
1740
1741     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1742     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1743
1744     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1745         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1746             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1747
1748     if (current_mv.pred_flag[0]) {
1749         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1750         if (!ref0)
1751             return;
1752         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1753     }
1754     if (current_mv.pred_flag[1]) {
1755         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1756         if (!ref1)
1757             return;
1758         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1759     }
1760
1761     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1762         LOCAL_ALIGNED_16(int16_t,  tmp, [MAX_PB_SIZE * MAX_PB_SIZE]);
1763         LOCAL_ALIGNED_16(int16_t, tmp2, [MAX_PB_SIZE * MAX_PB_SIZE]);
1764
1765         luma_mc(s, tmp, tmpstride, ref0->frame,
1766                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1767
1768         if (weighted_pred) {
1769             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1770                                                s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1771                                                s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1772                                                dst0, s->frame->linesize[0], tmp,
1773                                                tmpstride, nPbH);
1774         } else {
1775             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1776         }
1777         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1778                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1779
1780         if (weighted_pred) {
1781             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1782                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1783                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1784                                                       dst1, s->frame->linesize[1], tmp, tmpstride,
1785                                                       nPbH / 2);
1786             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1787                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1788                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1789                                                       dst2, s->frame->linesize[2], tmp2, tmpstride,
1790                                                       nPbH / 2);
1791         } else {
1792             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1793             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1794         }
1795     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1796         LOCAL_ALIGNED_16(int16_t, tmp,  [MAX_PB_SIZE * MAX_PB_SIZE]);
1797         LOCAL_ALIGNED_16(int16_t, tmp2, [MAX_PB_SIZE * MAX_PB_SIZE]);
1798
1799         luma_mc(s, tmp, tmpstride, ref1->frame,
1800                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1801
1802         if (weighted_pred) {
1803             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1804                                                s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1805                                                s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1806                                                dst0, s->frame->linesize[0], tmp, tmpstride,
1807                                                nPbH);
1808         } else {
1809             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1810         }
1811
1812         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1813                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1814
1815         if (weighted_pred) {
1816             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1817                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1818                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1819                                                       dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1820             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1821                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1822                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1823                                                       dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1824         } else {
1825             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1826             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1827         }
1828     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1829         LOCAL_ALIGNED_16(int16_t, tmp,  [MAX_PB_SIZE * MAX_PB_SIZE]);
1830         LOCAL_ALIGNED_16(int16_t, tmp2, [MAX_PB_SIZE * MAX_PB_SIZE]);
1831         LOCAL_ALIGNED_16(int16_t, tmp3, [MAX_PB_SIZE * MAX_PB_SIZE]);
1832         LOCAL_ALIGNED_16(int16_t, tmp4, [MAX_PB_SIZE * MAX_PB_SIZE]);
1833
1834         luma_mc(s, tmp, tmpstride, ref0->frame,
1835                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1836         luma_mc(s, tmp2, tmpstride, ref1->frame,
1837                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1838
1839         if (weighted_pred) {
1840             s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1841                                                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1842                                                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1843                                                    s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1844                                                    s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1845                                                    dst0, s->frame->linesize[0],
1846                                                    tmp, tmp2, tmpstride, nPbH);
1847         } else {
1848             s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1849                                                          tmp, tmp2, tmpstride, nPbH);
1850         }
1851
1852         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1853                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1854         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1855                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1856
1857         if (weighted_pred) {
1858             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1859                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1860                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1861                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1862                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1863                                                           dst1, s->frame->linesize[1], tmp, tmp3,
1864                                                           tmpstride, nPbH / 2);
1865             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1866                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1867                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1868                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1869                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1870                                                           dst2, s->frame->linesize[2], tmp2, tmp4,
1871                                                           tmpstride, nPbH / 2);
1872         } else {
1873             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
1874             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1875         }
1876     }
1877 }
1878
1879 /**
1880  * 8.4.1
1881  */
1882 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1883                                 int prev_intra_luma_pred_flag)
1884 {
1885     HEVCLocalContext *lc = &s->HEVClc;
1886     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1887     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1888     int min_pu_width     = s->ps.sps->min_pu_width;
1889     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1890     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1891     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1892
1893     int cand_up   = (lc->ctb_up_flag || y0b) ?
1894                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1895     int cand_left = (lc->ctb_left_flag || x0b) ?
1896                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1897
1898     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1899
1900     MvField *tab_mvf = s->ref->tab_mvf;
1901     int intra_pred_mode;
1902     int candidate[3];
1903     int i, j;
1904
1905     // intra_pred_mode prediction does not cross vertical CTB boundaries
1906     if ((y0 - 1) < y_ctb)
1907         cand_up = INTRA_DC;
1908
1909     if (cand_left == cand_up) {
1910         if (cand_left < 2) {
1911             candidate[0] = INTRA_PLANAR;
1912             candidate[1] = INTRA_DC;
1913             candidate[2] = INTRA_ANGULAR_26;
1914         } else {
1915             candidate[0] = cand_left;
1916             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1917             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1918         }
1919     } else {
1920         candidate[0] = cand_left;
1921         candidate[1] = cand_up;
1922         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1923             candidate[2] = INTRA_PLANAR;
1924         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1925             candidate[2] = INTRA_DC;
1926         } else {
1927             candidate[2] = INTRA_ANGULAR_26;
1928         }
1929     }
1930
1931     if (prev_intra_luma_pred_flag) {
1932         intra_pred_mode = candidate[lc->pu.mpm_idx];
1933     } else {
1934         if (candidate[0] > candidate[1])
1935             FFSWAP(uint8_t, candidate[0], candidate[1]);
1936         if (candidate[0] > candidate[2])
1937             FFSWAP(uint8_t, candidate[0], candidate[2]);
1938         if (candidate[1] > candidate[2])
1939             FFSWAP(uint8_t, candidate[1], candidate[2]);
1940
1941         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1942         for (i = 0; i < 3; i++)
1943             if (intra_pred_mode >= candidate[i])
1944                 intra_pred_mode++;
1945     }
1946
1947     /* write the intra prediction units into the mv array */
1948     if (!size_in_pus)
1949         size_in_pus = 1;
1950     for (i = 0; i < size_in_pus; i++) {
1951         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1952                intra_pred_mode, size_in_pus);
1953
1954         for (j = 0; j < size_in_pus; j++) {
1955             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1956             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1957             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1958             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1959             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1960             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1961             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1962             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1963             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1964         }
1965     }
1966
1967     return intra_pred_mode;
1968 }
1969
1970 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1971                                           int log2_cb_size, int ct_depth)
1972 {
1973     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1974     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1975     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1976     int y;
1977
1978     for (y = 0; y < length; y++)
1979         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1980                ct_depth, length);
1981 }
1982
1983 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1984                                   int log2_cb_size)
1985 {
1986     HEVCLocalContext *lc = &s->HEVClc;
1987     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1988     uint8_t prev_intra_luma_pred_flag[4];
1989     int split   = lc->cu.part_mode == PART_NxN;
1990     int pb_size = (1 << log2_cb_size) >> split;
1991     int side    = split + 1;
1992     int chroma_mode;
1993     int i, j;
1994
1995     for (i = 0; i < side; i++)
1996         for (j = 0; j < side; j++)
1997             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1998
1999     for (i = 0; i < side; i++) {
2000         for (j = 0; j < side; j++) {
2001             if (prev_intra_luma_pred_flag[2 * i + j])
2002                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2003             else
2004                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2005
2006             lc->pu.intra_pred_mode[2 * i + j] =
2007                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2008                                      prev_intra_luma_pred_flag[2 * i + j]);
2009         }
2010     }
2011
2012     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2013     if (chroma_mode != 4) {
2014         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2015             lc->pu.intra_pred_mode_c = 34;
2016         else
2017             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2018     } else {
2019         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2020     }
2021 }
2022
2023 static void intra_prediction_unit_default_value(HEVCContext *s,
2024                                                 int x0, int y0,
2025                                                 int log2_cb_size)
2026 {
2027     HEVCLocalContext *lc = &s->HEVClc;
2028     int pb_size          = 1 << log2_cb_size;
2029     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2030     int min_pu_width     = s->ps.sps->min_pu_width;
2031     MvField *tab_mvf     = s->ref->tab_mvf;
2032     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2033     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2034     int j, k;
2035
2036     if (size_in_pus == 0)
2037         size_in_pus = 1;
2038     for (j = 0; j < size_in_pus; j++) {
2039         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2040         for (k = 0; k < size_in_pus; k++)
2041             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2042     }
2043 }
2044
2045 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2046 {
2047     int cb_size          = 1 << log2_cb_size;
2048     HEVCLocalContext *lc = &s->HEVClc;
2049     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2050     int length           = cb_size >> log2_min_cb_size;
2051     int min_cb_width     = s->ps.sps->min_cb_width;
2052     int x_cb             = x0 >> log2_min_cb_size;
2053     int y_cb             = y0 >> log2_min_cb_size;
2054     int x, y, ret;
2055
2056     lc->cu.x                = x0;
2057     lc->cu.y                = y0;
2058     lc->cu.pred_mode        = MODE_INTRA;
2059     lc->cu.part_mode        = PART_2Nx2N;
2060     lc->cu.intra_split_flag = 0;
2061
2062     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2063     for (x = 0; x < 4; x++)
2064         lc->pu.intra_pred_mode[x] = 1;
2065     if (s->ps.pps->transquant_bypass_enable_flag) {
2066         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2067         if (lc->cu.cu_transquant_bypass_flag)
2068             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2069     } else
2070         lc->cu.cu_transquant_bypass_flag = 0;
2071
2072     if (s->sh.slice_type != HEVC_SLICE_I) {
2073         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2074
2075         x = y_cb * min_cb_width + x_cb;
2076         for (y = 0; y < length; y++) {
2077             memset(&s->skip_flag[x], skip_flag, length);
2078             x += min_cb_width;
2079         }
2080         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2081     }
2082
2083     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2084         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2085         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2086
2087         if (!s->sh.disable_deblocking_filter_flag)
2088             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2089     } else {
2090         int pcm_flag = 0;
2091
2092         if (s->sh.slice_type != HEVC_SLICE_I)
2093             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2094         if (lc->cu.pred_mode != MODE_INTRA ||
2095             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2096             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2097             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2098                                       lc->cu.pred_mode == MODE_INTRA;
2099         }
2100
2101         if (lc->cu.pred_mode == MODE_INTRA) {
2102             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2103                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2104                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2105                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2106             }
2107             if (pcm_flag) {
2108                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2109                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2110                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2111                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2112
2113                 if (ret < 0)
2114                     return ret;
2115             } else {
2116                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2117             }
2118         } else {
2119             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2120             switch (lc->cu.part_mode) {
2121             case PART_2Nx2N:
2122                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2123                 break;
2124             case PART_2NxN:
2125                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2126                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2127                 break;
2128             case PART_Nx2N:
2129                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2130                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2131                 break;
2132             case PART_2NxnU:
2133                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2134                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2135                 break;
2136             case PART_2NxnD:
2137                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2138                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2139                 break;
2140             case PART_nLx2N:
2141                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2142                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2143                 break;
2144             case PART_nRx2N:
2145                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2146                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2147                 break;
2148             case PART_NxN:
2149                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2150                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2151                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2152                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2153                 break;
2154             }
2155         }
2156
2157         if (!pcm_flag) {
2158             int rqt_root_cbf = 1;
2159
2160             if (lc->cu.pred_mode != MODE_INTRA &&
2161                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2162                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2163             }
2164             if (rqt_root_cbf) {
2165                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2166                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2167                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2168                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2169                                          log2_cb_size,
2170                                          log2_cb_size, 0, 0, 0, 0);
2171                 if (ret < 0)
2172                     return ret;
2173             } else {
2174                 if (!s->sh.disable_deblocking_filter_flag)
2175                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2176             }
2177         }
2178     }
2179
2180     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2181         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2182
2183     x = y_cb * min_cb_width + x_cb;
2184     for (y = 0; y < length; y++) {
2185         memset(&s->qp_y_tab[x], lc->qp_y, length);
2186         x += min_cb_width;
2187     }
2188
2189     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2190
2191     return 0;
2192 }
2193
2194 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2195                                int log2_cb_size, int cb_depth)
2196 {
2197     HEVCLocalContext *lc = &s->HEVClc;
2198     const int cb_size    = 1 << log2_cb_size;
2199     int split_cu;
2200
2201     lc->ct.depth = cb_depth;
2202     if (x0 + cb_size <= s->ps.sps->width  &&
2203         y0 + cb_size <= s->ps.sps->height &&
2204         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2205         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2206     } else {
2207         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2208     }
2209     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2210         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2211         lc->tu.is_cu_qp_delta_coded = 0;
2212         lc->tu.cu_qp_delta          = 0;
2213     }
2214
2215     if (split_cu) {
2216         const int cb_size_split = cb_size >> 1;
2217         const int x1 = x0 + cb_size_split;
2218         const int y1 = y0 + cb_size_split;
2219
2220         log2_cb_size--;
2221         cb_depth++;
2222
2223 #define SUBDIVIDE(x, y)                                                \
2224 do {                                                                   \
2225     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2226         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2227         if (ret < 0)                                                   \
2228             return ret;                                                \
2229     }                                                                  \
2230 } while (0)
2231
2232         SUBDIVIDE(x0, y0);
2233         SUBDIVIDE(x1, y0);
2234         SUBDIVIDE(x0, y1);
2235         SUBDIVIDE(x1, y1);
2236     } else {
2237         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2238         if (ret < 0)
2239             return ret;
2240     }
2241
2242     return 0;
2243 }
2244
2245 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2246                                  int ctb_addr_ts)
2247 {
2248     HEVCLocalContext *lc  = &s->HEVClc;
2249     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2250     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2251     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2252
2253     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2254
2255     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2256         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2257             lc->first_qp_group = 1;
2258         lc->end_of_tiles_x = s->ps.sps->width;
2259     } else if (s->ps.pps->tiles_enabled_flag) {
2260         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2261             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2262             lc->start_of_tiles_x = x_ctb;
2263             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2264             lc->first_qp_group   = 1;
2265         }
2266     } else {
2267         lc->end_of_tiles_x = s->ps.sps->width;
2268     }
2269
2270     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2271
2272     lc->boundary_flags = 0;
2273     if (s->ps.pps->tiles_enabled_flag) {
2274         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2275             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2276         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2277             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2278         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2279             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2280         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2281             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2282     } else {
2283         if (!ctb_addr_in_slice)
2284             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2285         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2286             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2287     }
2288
2289     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2290     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2291     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2292     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2293 }
2294
2295 static int hls_slice_data(HEVCContext *s)
2296 {
2297     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2298     int more_data   = 1;
2299     int x_ctb       = 0;
2300     int y_ctb       = 0;
2301     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2302     int ret;
2303
2304     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2305         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2306
2307         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2308         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2309         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2310
2311         ff_hevc_cabac_init(s, ctb_addr_ts);
2312
2313         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2314
2315         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2316         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2317         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2318
2319         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2320         if (ret < 0)
2321             return ret;
2322         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2323
2324         ctb_addr_ts++;
2325         ff_hevc_save_states(s, ctb_addr_ts);
2326         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2327     }
2328
2329     if (x_ctb + ctb_size >= s->ps.sps->width &&
2330         y_ctb + ctb_size >= s->ps.sps->height)
2331         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2332
2333     return ctb_addr_ts;
2334 }
2335
2336 static void restore_tqb_pixels(HEVCContext *s)
2337 {
2338     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2339     int x, y, c_idx;
2340
2341     for (c_idx = 0; c_idx < 3; c_idx++) {
2342         ptrdiff_t stride = s->frame->linesize[c_idx];
2343         int hshift       = s->ps.sps->hshift[c_idx];
2344         int vshift       = s->ps.sps->vshift[c_idx];
2345         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2346             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2347                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2348                     int n;
2349                     int len      = min_pu_size >> hshift;
2350                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2351                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2352                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2353                         memcpy(dst, src, len);
2354                         src += stride;
2355                         dst += stride;
2356                     }
2357                 }
2358             }
2359         }
2360     }
2361 }
2362
2363 static int set_side_data(HEVCContext *s)
2364 {
2365     AVFrame *out = s->ref->frame;
2366
2367     if (s->sei.frame_packing.present &&
2368         s->sei.frame_packing.arrangement_type >= 3 &&
2369         s->sei.frame_packing.arrangement_type <= 5 &&
2370         s->sei.frame_packing.content_interpretation_type > 0 &&
2371         s->sei.frame_packing.content_interpretation_type < 3) {
2372         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2373         if (!stereo)
2374             return AVERROR(ENOMEM);
2375
2376         switch (s->sei.frame_packing.arrangement_type) {
2377         case 3:
2378             if (s->sei.frame_packing.quincunx_subsampling)
2379                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2380             else
2381                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2382             break;
2383         case 4:
2384             stereo->type = AV_STEREO3D_TOPBOTTOM;
2385             break;
2386         case 5:
2387             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2388             break;
2389         }
2390
2391         if (s->sei.frame_packing.content_interpretation_type == 2)
2392             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2393     }
2394
2395     if (s->sei.display_orientation.present &&
2396         (s->sei.display_orientation.anticlockwise_rotation ||
2397          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2398         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2399         AVFrameSideData *rotation = av_frame_new_side_data(out,
2400                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2401                                                            sizeof(int32_t) * 9);
2402         if (!rotation)
2403             return AVERROR(ENOMEM);
2404
2405         av_display_rotation_set((int32_t *)rotation->data, angle);
2406         av_display_matrix_flip((int32_t *)rotation->data,
2407                                s->sei.display_orientation.hflip,
2408                                s->sei.display_orientation.vflip);
2409     }
2410
2411     if (s->sei.alternative_transfer.present &&
2412         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
2413         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2414         s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2415     }
2416
2417     return 0;
2418 }
2419
2420 static int hevc_frame_start(HEVCContext *s)
2421 {
2422     HEVCLocalContext *lc = &s->HEVClc;
2423     int ret;
2424
2425     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2426     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2427     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2428     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2429
2430     lc->start_of_tiles_x = 0;
2431     s->is_decoded        = 0;
2432     s->first_nal_type    = s->nal_unit_type;
2433
2434     if (s->ps.pps->tiles_enabled_flag)
2435         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2436
2437     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2438                               s->poc);
2439     if (ret < 0)
2440         goto fail;
2441
2442     ret = ff_hevc_frame_rps(s);
2443     if (ret < 0) {
2444         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2445         goto fail;
2446     }
2447
2448     s->ref->frame->key_frame = IS_IRAP(s);
2449
2450     ret = set_side_data(s);
2451     if (ret < 0)
2452         goto fail;
2453
2454     av_frame_unref(s->output_frame);
2455     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2456     if (ret < 0)
2457         goto fail;
2458
2459     ff_thread_finish_setup(s->avctx);
2460
2461     return 0;
2462
2463 fail:
2464     if (s->ref)
2465         ff_hevc_unref_frame(s, s->ref, ~0);
2466     s->ref = NULL;
2467     return ret;
2468 }
2469
2470 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2471 {
2472     HEVCLocalContext *lc = &s->HEVClc;
2473     GetBitContext *gb    = &lc->gb;
2474     int ctb_addr_ts, ret;
2475
2476     *gb              = nal->gb;
2477     s->nal_unit_type = nal->type;
2478     s->temporal_id   = nal->temporal_id;
2479
2480     switch (s->nal_unit_type) {
2481     case HEVC_NAL_VPS:
2482         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2483         if (ret < 0)
2484             goto fail;
2485         break;
2486     case HEVC_NAL_SPS:
2487         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2488                                      s->apply_defdispwin);
2489         if (ret < 0)
2490             goto fail;
2491         break;
2492     case HEVC_NAL_PPS:
2493         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2494         if (ret < 0)
2495             goto fail;
2496         break;
2497     case HEVC_NAL_SEI_PREFIX:
2498     case HEVC_NAL_SEI_SUFFIX:
2499         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei,
2500                                      s->nal_unit_type);
2501         if (ret < 0)
2502             goto fail;
2503         break;
2504     case HEVC_NAL_TRAIL_R:
2505     case HEVC_NAL_TRAIL_N:
2506     case HEVC_NAL_TSA_N:
2507     case HEVC_NAL_TSA_R:
2508     case HEVC_NAL_STSA_N:
2509     case HEVC_NAL_STSA_R:
2510     case HEVC_NAL_BLA_W_LP:
2511     case HEVC_NAL_BLA_W_RADL:
2512     case HEVC_NAL_BLA_N_LP:
2513     case HEVC_NAL_IDR_W_RADL:
2514     case HEVC_NAL_IDR_N_LP:
2515     case HEVC_NAL_CRA_NUT:
2516     case HEVC_NAL_RADL_N:
2517     case HEVC_NAL_RADL_R:
2518     case HEVC_NAL_RASL_N:
2519     case HEVC_NAL_RASL_R:
2520         ret = hls_slice_header(s);
2521         if (ret < 0)
2522             return ret;
2523
2524         if (s->max_ra == INT_MAX) {
2525             if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2526                 s->max_ra = s->poc;
2527             } else {
2528                 if (IS_IDR(s))
2529                     s->max_ra = INT_MIN;
2530             }
2531         }
2532
2533         if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2534             s->poc <= s->max_ra) {
2535             s->is_decoded = 0;
2536             break;
2537         } else {
2538             if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2539                 s->max_ra = INT_MIN;
2540         }
2541
2542         if (s->sh.first_slice_in_pic_flag) {
2543             ret = hevc_frame_start(s);
2544             if (ret < 0)
2545                 return ret;
2546         } else if (!s->ref) {
2547             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2548             goto fail;
2549         }
2550
2551         if (s->nal_unit_type != s->first_nal_type) {
2552             av_log(s->avctx, AV_LOG_ERROR,
2553                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2554                    s->first_nal_type, s->nal_unit_type);
2555             return AVERROR_INVALIDDATA;
2556         }
2557
2558         if (!s->sh.dependent_slice_segment_flag &&
2559             s->sh.slice_type != HEVC_SLICE_I) {
2560             ret = ff_hevc_slice_rpl(s);
2561             if (ret < 0) {
2562                 av_log(s->avctx, AV_LOG_WARNING,
2563                        "Error constructing the reference lists for the current slice.\n");
2564                 goto fail;
2565             }
2566         }
2567
2568         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2569             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2570             if (ret < 0)
2571                 goto fail;
2572         }
2573
2574         if (s->avctx->hwaccel) {
2575             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2576             if (ret < 0)
2577                 goto fail;
2578         } else {
2579             ctb_addr_ts = hls_slice_data(s);
2580             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2581                 s->is_decoded = 1;
2582                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2583                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2584                     s->ps.sps->sao_enabled)
2585                     restore_tqb_pixels(s);
2586             }
2587
2588             if (ctb_addr_ts < 0) {
2589                 ret = ctb_addr_ts;
2590                 goto fail;
2591             }
2592         }
2593         break;
2594     case HEVC_NAL_EOS_NUT:
2595     case HEVC_NAL_EOB_NUT:
2596         s->seq_decode = (s->seq_decode + 1) & 0xff;
2597         s->max_ra     = INT_MAX;
2598         break;
2599     case HEVC_NAL_AUD:
2600     case HEVC_NAL_FD_NUT:
2601         break;
2602     default:
2603         av_log(s->avctx, AV_LOG_INFO,
2604                "Skipping NAL unit %d\n", s->nal_unit_type);
2605     }
2606
2607     return 0;
2608 fail:
2609     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2610         return ret;
2611     return 0;
2612 }
2613
2614 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2615 {
2616     int i, ret = 0;
2617
2618     s->ref = NULL;
2619     s->eos = 0;
2620
2621     /* split the input packet into NAL units, so we know the upper bound on the
2622      * number of slices in the frame */
2623     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2624                                 s->nal_length_size, s->avctx->codec_id);
2625     if (ret < 0) {
2626         av_log(s->avctx, AV_LOG_ERROR,
2627                "Error splitting the input into NAL units.\n");
2628         return ret;
2629     }
2630
2631     for (i = 0; i < s->pkt.nb_nals; i++) {
2632         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
2633             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT)
2634             s->eos = 1;
2635     }
2636
2637     /* decode the NAL units */
2638     for (i = 0; i < s->pkt.nb_nals; i++) {
2639         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2640         if (ret < 0) {
2641             av_log(s->avctx, AV_LOG_WARNING,
2642                    "Error parsing NAL unit #%d.\n", i);
2643             goto fail;
2644         }
2645     }
2646
2647 fail:
2648     if (s->ref)
2649         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2650
2651     return ret;
2652 }
2653
2654 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2655 {
2656     int i;
2657     for (i = 0; i < 16; i++)
2658         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2659 }
2660
2661 static int verify_md5(HEVCContext *s, AVFrame *frame)
2662 {
2663     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2664     int pixel_shift;
2665     int i, j;
2666
2667     if (!desc)
2668         return AVERROR(EINVAL);
2669
2670     pixel_shift = desc->comp[0].depth > 8;
2671
2672     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2673            s->poc);
2674
2675     /* the checksums are LE, so we have to byteswap for >8bpp formats
2676      * on BE arches */
2677 #if HAVE_BIGENDIAN
2678     if (pixel_shift && !s->checksum_buf) {
2679         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2680                        FFMAX3(frame->linesize[0], frame->linesize[1],
2681                               frame->linesize[2]));
2682         if (!s->checksum_buf)
2683             return AVERROR(ENOMEM);
2684     }
2685 #endif
2686
2687     for (i = 0; frame->data[i]; i++) {
2688         int width  = s->avctx->coded_width;
2689         int height = s->avctx->coded_height;
2690         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2691         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2692         uint8_t md5[16];
2693
2694         av_md5_init(s->md5_ctx);
2695         for (j = 0; j < h; j++) {
2696             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2697 #if HAVE_BIGENDIAN
2698             if (pixel_shift) {
2699                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2700                                     (const uint16_t *) src, w);
2701                 src = s->checksum_buf;
2702             }
2703 #endif
2704             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2705         }
2706         av_md5_final(s->md5_ctx, md5);
2707
2708         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
2709             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2710             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2711             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2712         } else {
2713             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2714             print_md5(s->avctx, AV_LOG_ERROR, md5);
2715             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2716             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
2717             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2718             return AVERROR_INVALIDDATA;
2719         }
2720     }
2721
2722     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2723
2724     return 0;
2725 }
2726
2727 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length)
2728 {
2729     AVCodecContext *avctx = s->avctx;
2730     GetByteContext gb;
2731     int ret, i;
2732
2733     bytestream2_init(&gb, buf, length);
2734
2735     if (length > 3 && (buf[0] || buf[1] || buf[2] > 1)) {
2736         /* It seems the extradata is encoded as hvcC format.
2737          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2738          * is finalized. When finalized, configurationVersion will be 1 and we
2739          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2740         int i, j, num_arrays, nal_len_size;
2741
2742         s->is_nalff = 1;
2743
2744         bytestream2_skip(&gb, 21);
2745         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2746         num_arrays   = bytestream2_get_byte(&gb);
2747
2748         /* nal units in the hvcC always have length coded with 2 bytes,
2749          * so put a fake nal_length_size = 2 while parsing them */
2750         s->nal_length_size = 2;
2751
2752         /* Decode nal units from hvcC. */
2753         for (i = 0; i < num_arrays; i++) {
2754             int type = bytestream2_get_byte(&gb) & 0x3f;
2755             int cnt  = bytestream2_get_be16(&gb);
2756
2757             for (j = 0; j < cnt; j++) {
2758                 // +2 for the nal size field
2759                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2760                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2761                     av_log(s->avctx, AV_LOG_ERROR,
2762                            "Invalid NAL unit size in extradata.\n");
2763                     return AVERROR_INVALIDDATA;
2764                 }
2765
2766                 ret = decode_nal_units(s, gb.buffer, nalsize);
2767                 if (ret < 0) {
2768                     av_log(avctx, AV_LOG_ERROR,
2769                            "Decoding nal unit %d %d from hvcC failed\n",
2770                            type, i);
2771                     return ret;
2772                 }
2773                 bytestream2_skip(&gb, nalsize);
2774             }
2775         }
2776
2777         /* Now store right nal length size, that will be used to parse
2778          * all other nals */
2779         s->nal_length_size = nal_len_size;
2780     } else {
2781         s->is_nalff = 0;
2782         ret = decode_nal_units(s, buf, length);
2783         if (ret < 0)
2784             return ret;
2785     }
2786
2787     /* export stream parameters from the first SPS */
2788     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2789         if (s->ps.sps_list[i]) {
2790             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2791             export_stream_params(s->avctx, &s->ps, sps);
2792             break;
2793         }
2794     }
2795
2796     return 0;
2797 }
2798
2799 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2800                              AVPacket *avpkt)
2801 {
2802     int ret;
2803     int new_extradata_size;
2804     uint8_t *new_extradata;
2805     HEVCContext *s = avctx->priv_data;
2806
2807     if (!avpkt->size) {
2808         ret = ff_hevc_output_frame(s, data, 1);
2809         if (ret < 0)
2810             return ret;
2811
2812         *got_output = ret;
2813         return 0;
2814     }
2815
2816     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
2817                                             &new_extradata_size);
2818     if (new_extradata && new_extradata_size > 0) {
2819         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size);
2820         if (ret < 0)
2821             return ret;
2822     }
2823
2824     s->ref = NULL;
2825     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2826     if (ret < 0)
2827         return ret;
2828
2829     if (avctx->hwaccel) {
2830         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2831             av_log(avctx, AV_LOG_ERROR,
2832                    "hardware accelerator failed to decode picture\n");
2833     } else {
2834         /* verify the SEI checksum */
2835         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2836             s->sei.picture_hash.is_md5) {
2837             ret = verify_md5(s, s->ref->frame);
2838             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2839                 ff_hevc_unref_frame(s, s->ref, ~0);
2840                 return ret;
2841             }
2842         }
2843     }
2844     s->sei.picture_hash.is_md5 = 0;
2845
2846     if (s->is_decoded) {
2847         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2848         s->is_decoded = 0;
2849     }
2850
2851     if (s->output_frame->buf[0]) {
2852         av_frame_move_ref(data, s->output_frame);
2853         *got_output = 1;
2854     }
2855
2856     return avpkt->size;
2857 }
2858
2859 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2860 {
2861     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2862     if (ret < 0)
2863         return ret;
2864
2865     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2866     if (!dst->tab_mvf_buf)
2867         goto fail;
2868     dst->tab_mvf = src->tab_mvf;
2869
2870     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2871     if (!dst->rpl_tab_buf)
2872         goto fail;
2873     dst->rpl_tab = src->rpl_tab;
2874
2875     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2876     if (!dst->rpl_buf)
2877         goto fail;
2878
2879     dst->poc        = src->poc;
2880     dst->ctb_count  = src->ctb_count;
2881     dst->flags      = src->flags;
2882     dst->sequence   = src->sequence;
2883
2884     if (src->hwaccel_picture_private) {
2885         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2886         if (!dst->hwaccel_priv_buf)
2887             goto fail;
2888         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2889     }
2890
2891     return 0;
2892 fail:
2893     ff_hevc_unref_frame(s, dst, ~0);
2894     return AVERROR(ENOMEM);
2895 }
2896
2897 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2898 {
2899     HEVCContext       *s = avctx->priv_data;
2900     int i;
2901
2902     pic_arrays_free(s);
2903
2904     av_freep(&s->md5_ctx);
2905
2906     av_frame_free(&s->tmp_frame);
2907     av_frame_free(&s->output_frame);
2908
2909     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2910         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2911         av_frame_free(&s->DPB[i].frame);
2912     }
2913
2914     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2915         av_buffer_unref(&s->ps.vps_list[i]);
2916     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2917         av_buffer_unref(&s->ps.sps_list[i]);
2918     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2919         av_buffer_unref(&s->ps.pps_list[i]);
2920
2921     ff_h2645_packet_uninit(&s->pkt);
2922
2923     return 0;
2924 }
2925
2926 static av_cold int hevc_init_context(AVCodecContext *avctx)
2927 {
2928     HEVCContext *s = avctx->priv_data;
2929     int i;
2930
2931     s->avctx = avctx;
2932
2933     s->tmp_frame = av_frame_alloc();
2934     if (!s->tmp_frame)
2935         goto fail;
2936
2937     s->output_frame = av_frame_alloc();
2938     if (!s->output_frame)
2939         goto fail;
2940
2941     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2942         s->DPB[i].frame = av_frame_alloc();
2943         if (!s->DPB[i].frame)
2944             goto fail;
2945         s->DPB[i].tf.f = s->DPB[i].frame;
2946     }
2947
2948     s->max_ra = INT_MAX;
2949
2950     s->md5_ctx = av_md5_alloc();
2951     if (!s->md5_ctx)
2952         goto fail;
2953
2954     ff_bswapdsp_init(&s->bdsp);
2955
2956     s->context_initialized = 1;
2957
2958     return 0;
2959
2960 fail:
2961     hevc_decode_free(avctx);
2962     return AVERROR(ENOMEM);
2963 }
2964
2965 static int hevc_update_thread_context(AVCodecContext *dst,
2966                                       const AVCodecContext *src)
2967 {
2968     HEVCContext *s  = dst->priv_data;
2969     HEVCContext *s0 = src->priv_data;
2970     int i, ret;
2971
2972     if (!s->context_initialized) {
2973         ret = hevc_init_context(dst);
2974         if (ret < 0)
2975             return ret;
2976     }
2977
2978     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2979         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2980         if (s0->DPB[i].frame->buf[0]) {
2981             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2982             if (ret < 0)
2983                 return ret;
2984         }
2985     }
2986
2987     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2988         av_buffer_unref(&s->ps.vps_list[i]);
2989         if (s0->ps.vps_list[i]) {
2990             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2991             if (!s->ps.vps_list[i])
2992                 return AVERROR(ENOMEM);
2993         }
2994     }
2995
2996     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2997         av_buffer_unref(&s->ps.sps_list[i]);
2998         if (s0->ps.sps_list[i]) {
2999             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3000             if (!s->ps.sps_list[i])
3001                 return AVERROR(ENOMEM);
3002         }
3003     }
3004
3005     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3006         av_buffer_unref(&s->ps.pps_list[i]);
3007         if (s0->ps.pps_list[i]) {
3008             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3009             if (!s->ps.pps_list[i])
3010                 return AVERROR(ENOMEM);
3011         }
3012     }
3013
3014     if (s->ps.sps != s0->ps.sps)
3015         ret = set_sps(s, s0->ps.sps, src->pix_fmt);
3016
3017     s->seq_decode = s0->seq_decode;
3018     s->seq_output = s0->seq_output;
3019     s->pocTid0    = s0->pocTid0;
3020     s->max_ra     = s0->max_ra;
3021
3022     s->is_nalff        = s0->is_nalff;
3023     s->nal_length_size = s0->nal_length_size;
3024
3025     if (s0->eos) {
3026         s->seq_decode = (s->seq_decode + 1) & 0xff;
3027         s->max_ra = INT_MAX;
3028     }
3029
3030     return 0;
3031 }
3032
3033 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3034 {
3035     HEVCContext *s = avctx->priv_data;
3036     int ret;
3037
3038     avctx->internal->allocate_progress = 1;
3039
3040     ret = hevc_init_context(avctx);
3041     if (ret < 0)
3042         return ret;
3043
3044     if (avctx->extradata_size > 0 && avctx->extradata) {
3045         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size);
3046         if (ret < 0) {
3047             hevc_decode_free(avctx);
3048             return ret;
3049         }
3050     }
3051
3052     return 0;
3053 }
3054
3055 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3056 {
3057     HEVCContext *s = avctx->priv_data;
3058     int ret;
3059
3060     memset(s, 0, sizeof(*s));
3061
3062     ret = hevc_init_context(avctx);
3063     if (ret < 0)
3064         return ret;
3065
3066     return 0;
3067 }
3068
3069 static void hevc_decode_flush(AVCodecContext *avctx)
3070 {
3071     HEVCContext *s = avctx->priv_data;
3072     ff_hevc_flush_dpb(s);
3073     s->max_ra = INT_MAX;
3074 }
3075
3076 #define OFFSET(x) offsetof(HEVCContext, x)
3077 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3078
3079 static const AVOption options[] = {
3080     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3081         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3082     { NULL },
3083 };
3084
3085 static const AVClass hevc_decoder_class = {
3086     .class_name = "HEVC decoder",
3087     .item_name  = av_default_item_name,
3088     .option     = options,
3089     .version    = LIBAVUTIL_VERSION_INT,
3090 };
3091
3092 AVCodec ff_hevc_decoder = {
3093     .name                  = "hevc",
3094     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3095     .type                  = AVMEDIA_TYPE_VIDEO,
3096     .id                    = AV_CODEC_ID_HEVC,
3097     .priv_data_size        = sizeof(HEVCContext),
3098     .priv_class            = &hevc_decoder_class,
3099     .init                  = hevc_decode_init,
3100     .close                 = hevc_decode_free,
3101     .decode                = hevc_decode_frame,
3102     .flush                 = hevc_decode_flush,
3103     .update_thread_context = hevc_update_thread_context,
3104     .init_thread_copy      = hevc_init_thread_copy,
3105     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3106                              AV_CODEC_CAP_FRAME_THREADS,
3107     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3108     .caps_internal         = FF_CODEC_CAP_EXPORTS_CROPPING | FF_CODEC_CAP_INIT_THREADSAFE,
3109 };