git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->ps.sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->ps.sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->ps.sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->ps.sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx,
 284                                  const HEVCContext *s, const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)s->ps.vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     export_stream_params(s->avctx, s, sps);
 336
 337     pic_arrays_free(s);
 338     ret = pic_arrays_init(s, sps);
 339     if (ret < 0)
 340         goto fail;
 341
 342     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 343 #if CONFIG_HEVC_DXVA2_HWACCEL
 344         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 345 #endif
 346 #if CONFIG_HEVC_D3D11VA_HWACCEL
 347         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 348 #endif
 349 #if CONFIG_HEVC_VDPAU_HWACCEL
 350         *fmt++ = AV_PIX_FMT_VDPAU;
 351 #endif
 352     }
 353
 354     if (pix_fmt == AV_PIX_FMT_NONE) {
 355         *fmt++ = sps->pix_fmt;
 356         *fmt = AV_PIX_FMT_NONE;
 357
 358         ret = ff_thread_get_format(s->avctx, pix_fmts);
 359         if (ret < 0)
 360             goto fail;
 361         s->avctx->pix_fmt = ret;
 362     }
 363     else {
 364         s->avctx->pix_fmt = pix_fmt;
 365     }
 366
 367     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 368     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 369     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 370
 371     for (i = 0; i < 3; i++) {
 372         av_freep(&s->sao_pixel_buffer_h[i]);
 373         av_freep(&s->sao_pixel_buffer_v[i]);
 374     }
 375
 376     if (sps->sao_enabled && !s->avctx->hwaccel) {
 377         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 378         int c_idx;
 379
 380         for(c_idx = 0; c_idx < c_count; c_idx++) {
 381             int w = sps->width >> sps->hshift[c_idx];
 382             int h = sps->height >> sps->vshift[c_idx];
 383             s->sao_pixel_buffer_h[c_idx] =
 384                 av_malloc((w * 2 * sps->ctb_height) <<
 385                           sps->pixel_shift);
 386             s->sao_pixel_buffer_v[c_idx] =
 387                 av_malloc((h * 2 * sps->ctb_width) <<
 388                           sps->pixel_shift);
 389         }
 390     }
 391
 392     s->ps.sps = sps;
 393     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 394
 395     return 0;
 396
 397 fail:
 398     pic_arrays_free(s);
 399     s->ps.sps = NULL;
 400     return ret;
 401 }
 402
 403 static int hls_slice_header(HEVCContext *s)
 404 {
 405     GetBitContext *gb = &s->HEVClc->gb;
 406     SliceHeader *sh   = &s->sh;
 407     int i, ret;
 408
 409     // Coded parameters
 410     sh->first_slice_in_pic_flag = get_bits1(gb);
 411     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 412         s->seq_decode = (s->seq_decode + 1) & 0xff;
 413         s->max_ra     = INT_MAX;
 414         if (IS_IDR(s))
 415             ff_hevc_clear_refs(s);
 416     }
 417     sh->no_output_of_prior_pics_flag = 0;
 418     if (IS_IRAP(s))
 419         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 420
 421     sh->pps_id = get_ue_golomb_long(gb);
 422     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 423         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 424         return AVERROR_INVALIDDATA;
 425     }
 426     if (!sh->first_slice_in_pic_flag &&
 427         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 428         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 429         return AVERROR_INVALIDDATA;
 430     }
 431     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 432     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 433         sh->no_output_of_prior_pics_flag = 1;
 434
 435     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 436         const HEVCSPS* last_sps = s->ps.sps;
 437         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 438         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 439             if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
 440                 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
 441                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 442                 sh->no_output_of_prior_pics_flag = 0;
 443         }
 444         ff_hevc_clear_refs(s);
 445         ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
 446         if (ret < 0)
 447             return ret;
 448
 449         s->seq_decode = (s->seq_decode + 1) & 0xff;
 450         s->max_ra     = INT_MAX;
 451     }
 452
 453     sh->dependent_slice_segment_flag = 0;
 454     if (!sh->first_slice_in_pic_flag) {
 455         int slice_address_length;
 456
 457         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 458             sh->dependent_slice_segment_flag = get_bits1(gb);
 459
 460         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 461                                             s->ps.sps->ctb_height);
 462         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 463         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 464             av_log(s->avctx, AV_LOG_ERROR,
 465                    "Invalid slice segment address: %u.\n",
 466                    sh->slice_segment_addr);
 467             return AVERROR_INVALIDDATA;
 468         }
 469
 470         if (!sh->dependent_slice_segment_flag) {
 471             sh->slice_addr = sh->slice_segment_addr;
 472             s->slice_idx++;
 473         }
 474     } else {
 475         sh->slice_segment_addr = sh->slice_addr = 0;
 476         s->slice_idx           = 0;
 477         s->slice_initialized   = 0;
 478     }
 479
 480     if (!sh->dependent_slice_segment_flag) {
 481         s->slice_initialized = 0;
 482
 483         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 484             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 485
 486         sh->slice_type = get_ue_golomb_long(gb);
 487         if (!(sh->slice_type == I_SLICE ||
 488               sh->slice_type == P_SLICE ||
 489               sh->slice_type == B_SLICE)) {
 490             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 491                    sh->slice_type);
 492             return AVERROR_INVALIDDATA;
 493         }
 494         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 495             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 496             return AVERROR_INVALIDDATA;
 497         }
 498
 499         // when flag is not present, picture is inferred to be output
 500         sh->pic_output_flag = 1;
 501         if (s->ps.pps->output_flag_present_flag)
 502             sh->pic_output_flag = get_bits1(gb);
 503
 504         if (s->ps.sps->separate_colour_plane_flag)
 505             sh->colour_plane_id = get_bits(gb, 2);
 506
 507         if (!IS_IDR(s)) {
 508             int poc, pos;
 509
 510             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 511             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 512             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 513                 av_log(s->avctx, AV_LOG_WARNING,
 514                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 515                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 516                     return AVERROR_INVALIDDATA;
 517                 poc = s->poc;
 518             }
 519             s->poc = poc;
 520
 521             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 522             pos = get_bits_left(gb);
 523             if (!sh->short_term_ref_pic_set_sps_flag) {
 524                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 525                 if (ret < 0)
 526                     return ret;
 527
 528                 sh->short_term_rps = &sh->slice_rps;
 529             } else {
 530                 int numbits, rps_idx;
 531
 532                 if (!s->ps.sps->nb_st_rps) {
 533                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 534                     return AVERROR_INVALIDDATA;
 535                 }
 536
 537                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 538                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 539                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 540             }
 541             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 542
 543             pos = get_bits_left(gb);
 544             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 545             if (ret < 0) {
 546                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 547                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 548                     return AVERROR_INVALIDDATA;
 549             }
 550             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 551
 552             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 553                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 554             else
 555                 sh->slice_temporal_mvp_enabled_flag = 0;
 556         } else {
 557             s->sh.short_term_rps = NULL;
 558             s->poc               = 0;
 559         }
 560
 561         /* 8.3.1 */
 562         if (s->temporal_id == 0 &&
 563             s->nal_unit_type != NAL_TRAIL_N &&
 564             s->nal_unit_type != NAL_TSA_N   &&
 565             s->nal_unit_type != NAL_STSA_N  &&
 566             s->nal_unit_type != NAL_RADL_N  &&
 567             s->nal_unit_type != NAL_RADL_R  &&
 568             s->nal_unit_type != NAL_RASL_N  &&
 569             s->nal_unit_type != NAL_RASL_R)
 570             s->pocTid0 = s->poc;
 571
 572         if (s->ps.sps->sao_enabled) {
 573             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 574             if (s->ps.sps->chroma_format_idc) {
 575                 sh->slice_sample_adaptive_offset_flag[1] =
 576                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 577             }
 578         } else {
 579             sh->slice_sample_adaptive_offset_flag[0] = 0;
 580             sh->slice_sample_adaptive_offset_flag[1] = 0;
 581             sh->slice_sample_adaptive_offset_flag[2] = 0;
 582         }
 583
 584         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 585         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 586             int nb_refs;
 587
 588             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 589             if (sh->slice_type == B_SLICE)
 590                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 591
 592             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 593                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 594                 if (sh->slice_type == B_SLICE)
 595                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 596             }
 597             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 598                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 599                        sh->nb_refs[L0], sh->nb_refs[L1]);
 600                 return AVERROR_INVALIDDATA;
 601             }
 602
 603             sh->rpl_modification_flag[0] = 0;
 604             sh->rpl_modification_flag[1] = 0;
 605             nb_refs = ff_hevc_frame_nb_refs(s);
 606             if (!nb_refs) {
 607                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 608                 return AVERROR_INVALIDDATA;
 609             }
 610
 611             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 612                 sh->rpl_modification_flag[0] = get_bits1(gb);
 613                 if (sh->rpl_modification_flag[0]) {
 614                     for (i = 0; i < sh->nb_refs[L0]; i++)
 615                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 616                 }
 617
 618                 if (sh->slice_type == B_SLICE) {
 619                     sh->rpl_modification_flag[1] = get_bits1(gb);
 620                     if (sh->rpl_modification_flag[1] == 1)
 621                         for (i = 0; i < sh->nb_refs[L1]; i++)
 622                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 623                 }
 624             }
 625
 626             if (sh->slice_type == B_SLICE)
 627                 sh->mvd_l1_zero_flag = get_bits1(gb);
 628
 629             if (s->ps.pps->cabac_init_present_flag)
 630                 sh->cabac_init_flag = get_bits1(gb);
 631             else
 632                 sh->cabac_init_flag = 0;
 633
 634             sh->collocated_ref_idx = 0;
 635             if (sh->slice_temporal_mvp_enabled_flag) {
 636                 sh->collocated_list = L0;
 637                 if (sh->slice_type == B_SLICE)
 638                     sh->collocated_list = !get_bits1(gb);
 639
 640                 if (sh->nb_refs[sh->collocated_list] > 1) {
 641                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 642                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 643                         av_log(s->avctx, AV_LOG_ERROR,
 644                                "Invalid collocated_ref_idx: %d.\n",
 645                                sh->collocated_ref_idx);
 646                         return AVERROR_INVALIDDATA;
 647                     }
 648                 }
 649             }
 650
 651             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 652                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 653                 pred_weight_table(s, gb);
 654             }
 655
 656             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 657             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 658                 av_log(s->avctx, AV_LOG_ERROR,
 659                        "Invalid number of merging MVP candidates: %d.\n",
 660                        sh->max_num_merge_cand);
 661                 return AVERROR_INVALIDDATA;
 662             }
 663         }
 664
 665         sh->slice_qp_delta = get_se_golomb(gb);
 666
 667         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 668             sh->slice_cb_qp_offset = get_se_golomb(gb);
 669             sh->slice_cr_qp_offset = get_se_golomb(gb);
 670         } else {
 671             sh->slice_cb_qp_offset = 0;
 672             sh->slice_cr_qp_offset = 0;
 673         }
 674
 675         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 676             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 677         else
 678             sh->cu_chroma_qp_offset_enabled_flag = 0;
 679
 680         if (s->ps.pps->deblocking_filter_control_present_flag) {
 681             int deblocking_filter_override_flag = 0;
 682
 683             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 684                 deblocking_filter_override_flag = get_bits1(gb);
 685
 686             if (deblocking_filter_override_flag) {
 687                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 688                 if (!sh->disable_deblocking_filter_flag) {
 689                     sh->beta_offset = get_se_golomb(gb) * 2;
 690                     sh->tc_offset   = get_se_golomb(gb) * 2;
 691                 }
 692             } else {
 693                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 694                 sh->beta_offset                    = s->ps.pps->beta_offset;
 695                 sh->tc_offset                      = s->ps.pps->tc_offset;
 696             }
 697         } else {
 698             sh->disable_deblocking_filter_flag = 0;
 699             sh->beta_offset                    = 0;
 700             sh->tc_offset                      = 0;
 701         }
 702
 703         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 704             (sh->slice_sample_adaptive_offset_flag[0] ||
 705              sh->slice_sample_adaptive_offset_flag[1] ||
 706              !sh->disable_deblocking_filter_flag)) {
 707             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 708         } else {
 709             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 710         }
 711     } else if (!s->slice_initialized) {
 712         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 713         return AVERROR_INVALIDDATA;
 714     }
 715
 716     sh->num_entry_point_offsets = 0;
 717     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 718         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 719         // It would be possible to bound this tighter but this here is simpler
 720         if (num_entry_point_offsets > get_bits_left(gb)) {
 721             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 722             return AVERROR_INVALIDDATA;
 723         }
 724
 725         sh->num_entry_point_offsets = num_entry_point_offsets;
 726         if (sh->num_entry_point_offsets > 0) {
 727             int offset_len = get_ue_golomb_long(gb) + 1;
 728
 729             if (offset_len < 1 || offset_len > 32) {
 730                 sh->num_entry_point_offsets = 0;
 731                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 732                 return AVERROR_INVALIDDATA;
 733             }
 734
 735             av_freep(&sh->entry_point_offset);
 736             av_freep(&sh->offset);
 737             av_freep(&sh->size);
 738             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 739             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 740             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 741             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 742                 sh->num_entry_point_offsets = 0;
 743                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 744                 return AVERROR(ENOMEM);
 745             }
 746             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 747                 unsigned val = get_bits_long(gb, offset_len);
 748                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 749             }
 750             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 751                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 752                 s->threads_number = 1;
 753             } else
 754                 s->enable_parallel_tiles = 0;
 755         } else
 756             s->enable_parallel_tiles = 0;
 757     }
 758
 759     if (s->ps.pps->slice_header_extension_present_flag) {
 760         unsigned int length = get_ue_golomb_long(gb);
 761         if (length*8LL > get_bits_left(gb)) {
 762             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 763             return AVERROR_INVALIDDATA;
 764         }
 765         for (i = 0; i < length; i++)
 766             skip_bits(gb, 8);  // slice_header_extension_data_byte
 767     }
 768
 769     // Inferred parameters
 770     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 771     if (sh->slice_qp > 51 ||
 772         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 773         av_log(s->avctx, AV_LOG_ERROR,
 774                "The slice_qp %d is outside the valid range "
 775                "[%d, 51].\n",
 776                sh->slice_qp,
 777                -s->ps.sps->qp_bd_offset);
 778         return AVERROR_INVALIDDATA;
 779     }
 780
 781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 782
 783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 785         return AVERROR_INVALIDDATA;
 786     }
 787
 788     if (get_bits_left(gb) < 0) {
 789         av_log(s->avctx, AV_LOG_ERROR,
 790                "Overread slice header by %d bits\n", -get_bits_left(gb));
 791         return AVERROR_INVALIDDATA;
 792     }
 793
 794     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 795
 796     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 797         s->HEVClc->qp_y = s->sh.slice_qp;
 798
 799     s->slice_initialized = 1;
 800     s->HEVClc->tu.cu_qp_offset_cb = 0;
 801     s->HEVClc->tu.cu_qp_offset_cr = 0;
 802
 803     return 0;
 804 }
 805
 806 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 807
 808 #define SET_SAO(elem, value)                            \
 809 do {                                                    \
 810     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 811         sao->elem = value;                              \
 812     else if (sao_merge_left_flag)                       \
 813         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 814     else if (sao_merge_up_flag)                         \
 815         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 816     else                                                \
 817         sao->elem = 0;                                  \
 818 } while (0)
 819
 820 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 821 {
 822     HEVCLocalContext *lc    = s->HEVClc;
 823     int sao_merge_left_flag = 0;
 824     int sao_merge_up_flag   = 0;
 825     SAOParams *sao          = &CTB(s->sao, rx, ry);
 826     int c_idx, i;
 827
 828     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 829         s->sh.slice_sample_adaptive_offset_flag[1]) {
 830         if (rx > 0) {
 831             if (lc->ctb_left_flag)
 832                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 833         }
 834         if (ry > 0 && !sao_merge_left_flag) {
 835             if (lc->ctb_up_flag)
 836                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 837         }
 838     }
 839
 840     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 841         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 842                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 843
 844         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 845             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 846             continue;
 847         }
 848
 849         if (c_idx == 2) {
 850             sao->type_idx[2] = sao->type_idx[1];
 851             sao->eo_class[2] = sao->eo_class[1];
 852         } else {
 853             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 854         }
 855
 856         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 857             continue;
 858
 859         for (i = 0; i < 4; i++)
 860             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 861
 862         if (sao->type_idx[c_idx] == SAO_BAND) {
 863             for (i = 0; i < 4; i++) {
 864                 if (sao->offset_abs[c_idx][i]) {
 865                     SET_SAO(offset_sign[c_idx][i],
 866                             ff_hevc_sao_offset_sign_decode(s));
 867                 } else {
 868                     sao->offset_sign[c_idx][i] = 0;
 869                 }
 870             }
 871             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 872         } else if (c_idx != 2) {
 873             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 874         }
 875
 876         // Inferred parameters
 877         sao->offset_val[c_idx][0] = 0;
 878         for (i = 0; i < 4; i++) {
 879             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 880             if (sao->type_idx[c_idx] == SAO_EDGE) {
 881                 if (i > 1)
 882                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 883             } else if (sao->offset_sign[c_idx][i]) {
 884                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 885             }
 886             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 887         }
 888     }
 889 }
 890
 891 #undef SET_SAO
 892 #undef CTB
 893
 894 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 895     HEVCLocalContext *lc    = s->HEVClc;
 896     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 897
 898     if (log2_res_scale_abs_plus1 !=  0) {
 899         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 900         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 901                                (1 - 2 * res_scale_sign_flag);
 902     } else {
 903         lc->tu.res_scale_val = 0;
 904     }
 905
 906
 907     return 0;
 908 }
 909
 910 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 911                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 912                               int log2_cb_size, int log2_trafo_size,
 913                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 914 {
 915     HEVCLocalContext *lc = s->HEVClc;
 916     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
 917     int i;
 918
 919     if (lc->cu.pred_mode == MODE_INTRA) {
 920         int trafo_size = 1 << log2_trafo_size;
 921         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 922
 923         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 924     }
 925
 926     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 927         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 928         int scan_idx   = SCAN_DIAG;
 929         int scan_idx_c = SCAN_DIAG;
 930         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 931                          (s->ps.sps->chroma_format_idc == 2 &&
 932                          (cbf_cb[1] || cbf_cr[1]));
 933
 934         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 935             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 936             if (lc->tu.cu_qp_delta != 0)
 937                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 938                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 939             lc->tu.is_cu_qp_delta_coded = 1;
 940
 941             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
 942                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
 943                 av_log(s->avctx, AV_LOG_ERROR,
 944                        "The cu_qp_delta %d is outside the valid range "
 945                        "[%d, %d].\n",
 946                        lc->tu.cu_qp_delta,
 947                        -(26 + s->ps.sps->qp_bd_offset / 2),
 948                         (25 + s->ps.sps->qp_bd_offset / 2));
 949                 return AVERROR_INVALIDDATA;
 950             }
 951
 952             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 953         }
 954
 955         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 956             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 957             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 958             if (cu_chroma_qp_offset_flag) {
 959                 int cu_chroma_qp_offset_idx  = 0;
 960                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
 961                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 962                     av_log(s->avctx, AV_LOG_ERROR,
 963                         "cu_chroma_qp_offset_idx not yet tested.\n");
 964                 }
 965                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 966                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 967             } else {
 968                 lc->tu.cu_qp_offset_cb = 0;
 969                 lc->tu.cu_qp_offset_cr = 0;
 970             }
 971             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 972         }
 973
 974         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 975             if (lc->tu.intra_pred_mode >= 6 &&
 976                 lc->tu.intra_pred_mode <= 14) {
 977                 scan_idx = SCAN_VERT;
 978             } else if (lc->tu.intra_pred_mode >= 22 &&
 979                        lc->tu.intra_pred_mode <= 30) {
 980                 scan_idx = SCAN_HORIZ;
 981             }
 982
 983             if (lc->tu.intra_pred_mode_c >=  6 &&
 984                 lc->tu.intra_pred_mode_c <= 14) {
 985                 scan_idx_c = SCAN_VERT;
 986             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 987                        lc->tu.intra_pred_mode_c <= 30) {
 988                 scan_idx_c = SCAN_HORIZ;
 989             }
 990         }
 991
 992         lc->tu.cross_pf = 0;
 993
 994         if (cbf_luma)
 995             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 996         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
 997             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
 998             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
 999             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1000                                 (lc->cu.pred_mode == MODE_INTER ||
1001                                  (lc->tu.chroma_mode_c ==  4)));
1002
1003             if (lc->tu.cross_pf) {
1004                 hls_cross_component_pred(s, 0);
1005             }
1006             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1007                 if (lc->cu.pred_mode == MODE_INTRA) {
1008                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1009                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1010                 }
1011                 if (cbf_cb[i])
1012                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1013                                                 log2_trafo_size_c, scan_idx_c, 1);
1014                 else
1015                     if (lc->tu.cross_pf) {
1016                         ptrdiff_t stride = s->frame->linesize[1];
1017                         int hshift = s->ps.sps->hshift[1];
1018                         int vshift = s->ps.sps->vshift[1];
1019                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1020                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1021                         int size = 1 << log2_trafo_size_c;
1022
1023                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1024                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1025                         for (i = 0; i < (size * size); i++) {
1026                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1027                         }
1028                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1029                     }
1030             }
1031
1032             if (lc->tu.cross_pf) {
1033                 hls_cross_component_pred(s, 1);
1034             }
1035             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1036                 if (lc->cu.pred_mode == MODE_INTRA) {
1037                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1038                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1039                 }
1040                 if (cbf_cr[i])
1041                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1042                                                 log2_trafo_size_c, scan_idx_c, 2);
1043                 else
1044                     if (lc->tu.cross_pf) {
1045                         ptrdiff_t stride = s->frame->linesize[2];
1046                         int hshift = s->ps.sps->hshift[2];
1047                         int vshift = s->ps.sps->vshift[2];
1048                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1049                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1050                         int size = 1 << log2_trafo_size_c;
1051
1052                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1053                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1054                         for (i = 0; i < (size * size); i++) {
1055                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1056                         }
1057                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1058                     }
1059             }
1060         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1061             int trafo_size_h = 1 << (log2_trafo_size + 1);
1062             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1063             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1064                 if (lc->cu.pred_mode == MODE_INTRA) {
1065                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1066                                                     trafo_size_h, trafo_size_v);
1067                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1068                 }
1069                 if (cbf_cb[i])
1070                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1071                                                 log2_trafo_size, scan_idx_c, 1);
1072             }
1073             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1074                 if (lc->cu.pred_mode == MODE_INTRA) {
1075                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1076                                                 trafo_size_h, trafo_size_v);
1077                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1078                 }
1079                 if (cbf_cr[i])
1080                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1081                                                 log2_trafo_size, scan_idx_c, 2);
1082             }
1083         }
1084     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1085         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1086             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1087             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1088             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1089             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1090             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1091             if (s->ps.sps->chroma_format_idc == 2) {
1092                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1093                                                 trafo_size_h, trafo_size_v);
1094                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1095                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1096             }
1097         } else if (blk_idx == 3) {
1098             int trafo_size_h = 1 << (log2_trafo_size + 1);
1099             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1100             ff_hevc_set_neighbour_available(s, xBase, yBase,
1101                                             trafo_size_h, trafo_size_v);
1102             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1103             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1104             if (s->ps.sps->chroma_format_idc == 2) {
1105                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1106                                                 trafo_size_h, trafo_size_v);
1107                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1108                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1109             }
1110         }
1111     }
1112
1113     return 0;
1114 }
1115
1116 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1117 {
1118     int cb_size          = 1 << log2_cb_size;
1119     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1120
1121     int min_pu_width     = s->ps.sps->min_pu_width;
1122     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1123     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1124     int i, j;
1125
1126     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1127         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1128             s->is_pcm[i + j * min_pu_width] = 2;
1129 }
1130
1131 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1132                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1133                               int log2_cb_size, int log2_trafo_size,
1134                               int trafo_depth, int blk_idx,
1135                               const int *base_cbf_cb, const int *base_cbf_cr)
1136 {
1137     HEVCLocalContext *lc = s->HEVClc;
1138     uint8_t split_transform_flag;
1139     int cbf_cb[2];
1140     int cbf_cr[2];
1141     int ret;
1142
1143     cbf_cb[0] = base_cbf_cb[0];
1144     cbf_cb[1] = base_cbf_cb[1];
1145     cbf_cr[0] = base_cbf_cr[0];
1146     cbf_cr[1] = base_cbf_cr[1];
1147
1148     if (lc->cu.intra_split_flag) {
1149         if (trafo_depth == 1) {
1150             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1151             if (s->ps.sps->chroma_format_idc == 3) {
1152                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1153                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1154             } else {
1155                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1156                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1157             }
1158         }
1159     } else {
1160         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1161         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1162         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1163     }
1164
1165     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1166         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1167         trafo_depth     < lc->cu.max_trafo_depth       &&
1168         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1169         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1170     } else {
1171         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1172                           lc->cu.pred_mode == MODE_INTER &&
1173                           lc->cu.part_mode != PART_2Nx2N &&
1174                           trafo_depth == 0;
1175
1176         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1177                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1178                                inter_split;
1179     }
1180
1181     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1182         if (trafo_depth == 0 || cbf_cb[0]) {
1183             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1184             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1185                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1186             }
1187         }
1188
1189         if (trafo_depth == 0 || cbf_cr[0]) {
1190             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1191             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1192                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1193             }
1194         }
1195     }
1196
1197     if (split_transform_flag) {
1198         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1199         const int x1 = x0 + trafo_size_split;
1200         const int y1 = y0 + trafo_size_split;
1201
1202 #define SUBDIVIDE(x, y, idx)                                                    \
1203 do {                                                                            \
1204     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1205                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1206                              cbf_cb, cbf_cr);                                   \
1207     if (ret < 0)                                                                \
1208         return ret;                                                             \
1209 } while (0)
1210
1211         SUBDIVIDE(x0, y0, 0);
1212         SUBDIVIDE(x1, y0, 1);
1213         SUBDIVIDE(x0, y1, 2);
1214         SUBDIVIDE(x1, y1, 3);
1215
1216 #undef SUBDIVIDE
1217     } else {
1218         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1219         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1220         int min_tu_width     = s->ps.sps->min_tb_width;
1221         int cbf_luma         = 1;
1222
1223         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1224             cbf_cb[0] || cbf_cr[0] ||
1225             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1226             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1227         }
1228
1229         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1230                                  log2_cb_size, log2_trafo_size,
1231                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1232         if (ret < 0)
1233             return ret;
1234         // TODO: store cbf_luma somewhere else
1235         if (cbf_luma) {
1236             int i, j;
1237             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1238                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1239                     int x_tu = (x0 + j) >> log2_min_tu_size;
1240                     int y_tu = (y0 + i) >> log2_min_tu_size;
1241                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1242                 }
1243         }
1244         if (!s->sh.disable_deblocking_filter_flag) {
1245             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1246             if (s->ps.pps->transquant_bypass_enable_flag &&
1247                 lc->cu.cu_transquant_bypass_flag)
1248                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1249         }
1250     }
1251     return 0;
1252 }
1253
1254 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1255 {
1256     HEVCLocalContext *lc = s->HEVClc;
1257     GetBitContext gb;
1258     int cb_size   = 1 << log2_cb_size;
1259     int stride0   = s->frame->linesize[0];
1260     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1261     int   stride1 = s->frame->linesize[1];
1262     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1263     int   stride2 = s->frame->linesize[2];
1264     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1265
1266     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1267                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1268                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1269                           s->ps.sps->pcm.bit_depth_chroma;
1270     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1271     int ret;
1272
1273     if (!s->sh.disable_deblocking_filter_flag)
1274         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1275
1276     ret = init_get_bits(&gb, pcm, length);
1277     if (ret < 0)
1278         return ret;
1279
1280     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1281     if (s->ps.sps->chroma_format_idc) {
1282         s->hevcdsp.put_pcm(dst1, stride1,
1283                            cb_size >> s->ps.sps->hshift[1],
1284                            cb_size >> s->ps.sps->vshift[1],
1285                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1286         s->hevcdsp.put_pcm(dst2, stride2,
1287                            cb_size >> s->ps.sps->hshift[2],
1288                            cb_size >> s->ps.sps->vshift[2],
1289                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1290     }
1291
1292     return 0;
1293 }
1294
1295 /**
1296  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1297  *
1298  * @param s HEVC decoding context
1299  * @param dst target buffer for block data at block position
1300  * @param dststride stride of the dst buffer
1301  * @param ref reference picture buffer at origin (0, 0)
1302  * @param mv motion vector (relative to block position) to get pixel data from
1303  * @param x_off horizontal position of block from origin (0, 0)
1304  * @param y_off vertical position of block from origin (0, 0)
1305  * @param block_w width of block
1306  * @param block_h height of block
1307  * @param luma_weight weighting factor applied to the luma prediction
1308  * @param luma_offset additive offset applied to the luma prediction value
1309  */
1310
1311 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1312                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1313                         int block_w, int block_h, int luma_weight, int luma_offset)
1314 {
1315     HEVCLocalContext *lc = s->HEVClc;
1316     uint8_t *src         = ref->data[0];
1317     ptrdiff_t srcstride  = ref->linesize[0];
1318     int pic_width        = s->ps.sps->width;
1319     int pic_height       = s->ps.sps->height;
1320     int mx               = mv->x & 3;
1321     int my               = mv->y & 3;
1322     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1323                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1324     int idx              = ff_hevc_pel_weight[block_w];
1325
1326     x_off += mv->x >> 2;
1327     y_off += mv->y >> 2;
1328     src   += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1329
1330     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1331         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1332         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1333         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1334         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1335         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1336
1337         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1338                                  edge_emu_stride, srcstride,
1339                                  block_w + QPEL_EXTRA,
1340                                  block_h + QPEL_EXTRA,
1341                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1342                                  pic_width, pic_height);
1343         src = lc->edge_emu_buffer + buf_offset;
1344         srcstride = edge_emu_stride;
1345     }
1346
1347     if (!weight_flag)
1348         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1349                                                       block_h, mx, my, block_w);
1350     else
1351         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1352                                                         block_h, s->sh.luma_log2_weight_denom,
1353                                                         luma_weight, luma_offset, mx, my, block_w);
1354 }
1355
1356 /**
1357  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1358  *
1359  * @param s HEVC decoding context
1360  * @param dst target buffer for block data at block position
1361  * @param dststride stride of the dst buffer
1362  * @param ref0 reference picture0 buffer at origin (0, 0)
1363  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1364  * @param x_off horizontal position of block from origin (0, 0)
1365  * @param y_off vertical position of block from origin (0, 0)
1366  * @param block_w width of block
1367  * @param block_h height of block
1368  * @param ref1 reference picture1 buffer at origin (0, 0)
1369  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1370  * @param current_mv current motion vector structure
1371  */
1372  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1373                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1374                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1375 {
1376     HEVCLocalContext *lc = s->HEVClc;
1377     ptrdiff_t src0stride  = ref0->linesize[0];
1378     ptrdiff_t src1stride  = ref1->linesize[0];
1379     int pic_width        = s->ps.sps->width;
1380     int pic_height       = s->ps.sps->height;
1381     int mx0              = mv0->x & 3;
1382     int my0              = mv0->y & 3;
1383     int mx1              = mv1->x & 3;
1384     int my1              = mv1->y & 3;
1385     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1386                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1387     int x_off0           = x_off + (mv0->x >> 2);
1388     int y_off0           = y_off + (mv0->y >> 2);
1389     int x_off1           = x_off + (mv1->x >> 2);
1390     int y_off1           = y_off + (mv1->y >> 2);
1391     int idx              = ff_hevc_pel_weight[block_w];
1392
1393     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1394     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1395
1396     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1397         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1398         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1399         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1400         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1401         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1402
1403         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1404                                  edge_emu_stride, src0stride,
1405                                  block_w + QPEL_EXTRA,
1406                                  block_h + QPEL_EXTRA,
1407                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1408                                  pic_width, pic_height);
1409         src0 = lc->edge_emu_buffer + buf_offset;
1410         src0stride = edge_emu_stride;
1411     }
1412
1413     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1414         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1415         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1416         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1417         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1418         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1419
1420         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1421                                  edge_emu_stride, src1stride,
1422                                  block_w + QPEL_EXTRA,
1423                                  block_h + QPEL_EXTRA,
1424                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1425                                  pic_width, pic_height);
1426         src1 = lc->edge_emu_buffer2 + buf_offset;
1427         src1stride = edge_emu_stride;
1428     }
1429
1430     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1431                                                 block_h, mx0, my0, block_w);
1432     if (!weight_flag)
1433         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1434                                                        block_h, mx1, my1, block_w);
1435     else
1436         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1437                                                          block_h, s->sh.luma_log2_weight_denom,
1438                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1439                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1440                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1441                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1442                                                          mx1, my1, block_w);
1443
1444 }
1445
1446 /**
1447  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1448  *
1449  * @param s HEVC decoding context
1450  * @param dst1 target buffer for block data at block position (U plane)
1451  * @param dst2 target buffer for block data at block position (V plane)
1452  * @param dststride stride of the dst1 and dst2 buffers
1453  * @param ref reference picture buffer at origin (0, 0)
1454  * @param mv motion vector (relative to block position) to get pixel data from
1455  * @param x_off horizontal position of block from origin (0, 0)
1456  * @param y_off vertical position of block from origin (0, 0)
1457  * @param block_w width of block
1458  * @param block_h height of block
1459  * @param chroma_weight weighting factor applied to the chroma prediction
1460  * @param chroma_offset additive offset applied to the chroma prediction value
1461  */
1462
1463 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1464                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1465                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1466 {
1467     HEVCLocalContext *lc = s->HEVClc;
1468     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1469     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1470     const Mv *mv         = &current_mv->mv[reflist];
1471     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1472                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1473     int idx              = ff_hevc_pel_weight[block_w];
1474     int hshift           = s->ps.sps->hshift[1];
1475     int vshift           = s->ps.sps->vshift[1];
1476     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1477     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1478     intptr_t _mx         = mx << (1 - hshift);
1479     intptr_t _my         = my << (1 - vshift);
1480
1481     x_off += mv->x >> (2 + hshift);
1482     y_off += mv->y >> (2 + vshift);
1483     src0  += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1484
1485     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1486         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1487         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1488         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1489         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1490         int buf_offset0 = EPEL_EXTRA_BEFORE *
1491                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1492         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1493                                  edge_emu_stride, srcstride,
1494                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1495                                  x_off - EPEL_EXTRA_BEFORE,
1496                                  y_off - EPEL_EXTRA_BEFORE,
1497                                  pic_width, pic_height);
1498
1499         src0 = lc->edge_emu_buffer + buf_offset0;
1500         srcstride = edge_emu_stride;
1501     }
1502     if (!weight_flag)
1503         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1504                                                   block_h, _mx, _my, block_w);
1505     else
1506         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1507                                                         block_h, s->sh.chroma_log2_weight_denom,
1508                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1509 }
1510
1511 /**
1512  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1513  *
1514  * @param s HEVC decoding context
1515  * @param dst target buffer for block data at block position
1516  * @param dststride stride of the dst buffer
1517  * @param ref0 reference picture0 buffer at origin (0, 0)
1518  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1519  * @param x_off horizontal position of block from origin (0, 0)
1520  * @param y_off vertical position of block from origin (0, 0)
1521  * @param block_w width of block
1522  * @param block_h height of block
1523  * @param ref1 reference picture1 buffer at origin (0, 0)
1524  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1525  * @param current_mv current motion vector structure
1526  * @param cidx chroma component(cb, cr)
1527  */
1528 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1529                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1530 {
1531     HEVCLocalContext *lc = s->HEVClc;
1532     uint8_t *src1        = ref0->data[cidx+1];
1533     uint8_t *src2        = ref1->data[cidx+1];
1534     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1535     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1536     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1537                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1538     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1539     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1540     Mv *mv0              = &current_mv->mv[0];
1541     Mv *mv1              = &current_mv->mv[1];
1542     int hshift = s->ps.sps->hshift[1];
1543     int vshift = s->ps.sps->vshift[1];
1544
1545     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1546     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1547     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1548     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1549     intptr_t _mx0 = mx0 << (1 - hshift);
1550     intptr_t _my0 = my0 << (1 - vshift);
1551     intptr_t _mx1 = mx1 << (1 - hshift);
1552     intptr_t _my1 = my1 << (1 - vshift);
1553
1554     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1555     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1556     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1557     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1558     int idx = ff_hevc_pel_weight[block_w];
1559     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1560     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1561
1562     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1563         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1564         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1565         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1566         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1567         int buf_offset1 = EPEL_EXTRA_BEFORE *
1568                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1569
1570         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1571                                  edge_emu_stride, src1stride,
1572                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1573                                  x_off0 - EPEL_EXTRA_BEFORE,
1574                                  y_off0 - EPEL_EXTRA_BEFORE,
1575                                  pic_width, pic_height);
1576
1577         src1 = lc->edge_emu_buffer + buf_offset1;
1578         src1stride = edge_emu_stride;
1579     }
1580
1581     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1582         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1583         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1584         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1585         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1586         int buf_offset1 = EPEL_EXTRA_BEFORE *
1587                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1588
1589         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1590                                  edge_emu_stride, src2stride,
1591                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1592                                  x_off1 - EPEL_EXTRA_BEFORE,
1593                                  y_off1 - EPEL_EXTRA_BEFORE,
1594                                  pic_width, pic_height);
1595
1596         src2 = lc->edge_emu_buffer2 + buf_offset1;
1597         src2stride = edge_emu_stride;
1598     }
1599
1600     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1601                                                 block_h, _mx0, _my0, block_w);
1602     if (!weight_flag)
1603         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1604                                                        src2, src2stride, lc->tmp,
1605                                                        block_h, _mx1, _my1, block_w);
1606     else
1607         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1608                                                          src2, src2stride, lc->tmp,
1609                                                          block_h,
1610                                                          s->sh.chroma_log2_weight_denom,
1611                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1612                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1613                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1614                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1615                                                          _mx1, _my1, block_w);
1616 }
1617
1618 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1619                                 const Mv *mv, int y0, int height)
1620 {
1621     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1622
1623     if (s->threads_type == FF_THREAD_FRAME )
1624         ff_thread_await_progress(&ref->tf, y, 0);
1625 }
1626
1627 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1628                                   int nPbH, int log2_cb_size, int part_idx,
1629                                   int merge_idx, MvField *mv)
1630 {
1631     HEVCLocalContext *lc = s->HEVClc;
1632     enum InterPredIdc inter_pred_idc = PRED_L0;
1633     int mvp_flag;
1634
1635     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1636     mv->pred_flag = 0;
1637     if (s->sh.slice_type == B_SLICE)
1638         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1639
1640     if (inter_pred_idc != PRED_L1) {
1641         if (s->sh.nb_refs[L0])
1642             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1643
1644         mv->pred_flag = PF_L0;
1645         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1646         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1647         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1648                                  part_idx, merge_idx, mv, mvp_flag, 0);
1649         mv->mv[0].x += lc->pu.mvd.x;
1650         mv->mv[0].y += lc->pu.mvd.y;
1651     }
1652
1653     if (inter_pred_idc != PRED_L0) {
1654         if (s->sh.nb_refs[L1])
1655             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1656
1657         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1658             AV_ZERO32(&lc->pu.mvd);
1659         } else {
1660             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1661         }
1662
1663         mv->pred_flag += PF_L1;
1664         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1665         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1666                                  part_idx, merge_idx, mv, mvp_flag, 1);
1667         mv->mv[1].x += lc->pu.mvd.x;
1668         mv->mv[1].y += lc->pu.mvd.y;
1669     }
1670 }
1671
1672 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1673                                 int nPbW, int nPbH,
1674                                 int log2_cb_size, int partIdx, int idx)
1675 {
1676 #define POS(c_idx, x, y)                                                              \
1677     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1678                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1679     HEVCLocalContext *lc = s->HEVClc;
1680     int merge_idx = 0;
1681     struct MvField current_mv = {{{ 0 }}};
1682
1683     int min_pu_width = s->ps.sps->min_pu_width;
1684
1685     MvField *tab_mvf = s->ref->tab_mvf;
1686     RefPicList  *refPicList = s->ref->refPicList;
1687     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1688     uint8_t *dst0 = POS(0, x0, y0);
1689     uint8_t *dst1 = POS(1, x0, y0);
1690     uint8_t *dst2 = POS(2, x0, y0);
1691     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1692     int min_cb_width     = s->ps.sps->min_cb_width;
1693     int x_cb             = x0 >> log2_min_cb_size;
1694     int y_cb             = y0 >> log2_min_cb_size;
1695     int x_pu, y_pu;
1696     int i, j;
1697
1698     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1699
1700     if (!skip_flag)
1701         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1702
1703     if (skip_flag || lc->pu.merge_flag) {
1704         if (s->sh.max_num_merge_cand > 1)
1705             merge_idx = ff_hevc_merge_idx_decode(s);
1706         else
1707             merge_idx = 0;
1708
1709         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1710                                    partIdx, merge_idx, &current_mv);
1711     } else {
1712         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1713                               partIdx, merge_idx, &current_mv);
1714     }
1715
1716     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1717     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1718
1719     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1720         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1721             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1722
1723     if (current_mv.pred_flag & PF_L0) {
1724         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1725         if (!ref0)
1726             return;
1727         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1728     }
1729     if (current_mv.pred_flag & PF_L1) {
1730         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1731         if (!ref1)
1732             return;
1733         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1734     }
1735
1736     if (current_mv.pred_flag == PF_L0) {
1737         int x0_c = x0 >> s->ps.sps->hshift[1];
1738         int y0_c = y0 >> s->ps.sps->vshift[1];
1739         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1740         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1741
1742         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1743                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1744                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1745                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1746
1747         if (s->ps.sps->chroma_format_idc) {
1748             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1749                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1750                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1751             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1752                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1753                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1754         }
1755     } else if (current_mv.pred_flag == PF_L1) {
1756         int x0_c = x0 >> s->ps.sps->hshift[1];
1757         int y0_c = y0 >> s->ps.sps->vshift[1];
1758         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1759         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1760
1761         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1762                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1763                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1764                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1765
1766         if (s->ps.sps->chroma_format_idc) {
1767             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1768                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1769                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1770
1771             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1772                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1773                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1774         }
1775     } else if (current_mv.pred_flag == PF_BI) {
1776         int x0_c = x0 >> s->ps.sps->hshift[1];
1777         int y0_c = y0 >> s->ps.sps->vshift[1];
1778         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1779         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1780
1781         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1782                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1783                    ref1->frame, &current_mv.mv[1], &current_mv);
1784
1785         if (s->ps.sps->chroma_format_idc) {
1786             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1787                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1788
1789             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1790                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1791         }
1792     }
1793 }
1794
1795 /**
1796  * 8.4.1
1797  */
1798 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1799                                 int prev_intra_luma_pred_flag)
1800 {
1801     HEVCLocalContext *lc = s->HEVClc;
1802     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1803     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1804     int min_pu_width     = s->ps.sps->min_pu_width;
1805     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1806     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1807     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1808
1809     int cand_up   = (lc->ctb_up_flag || y0b) ?
1810                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1811     int cand_left = (lc->ctb_left_flag || x0b) ?
1812                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1813
1814     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1815
1816     MvField *tab_mvf = s->ref->tab_mvf;
1817     int intra_pred_mode;
1818     int candidate[3];
1819     int i, j;
1820
1821     // intra_pred_mode prediction does not cross vertical CTB boundaries
1822     if ((y0 - 1) < y_ctb)
1823         cand_up = INTRA_DC;
1824
1825     if (cand_left == cand_up) {
1826         if (cand_left < 2) {
1827             candidate[0] = INTRA_PLANAR;
1828             candidate[1] = INTRA_DC;
1829             candidate[2] = INTRA_ANGULAR_26;
1830         } else {
1831             candidate[0] = cand_left;
1832             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1833             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1834         }
1835     } else {
1836         candidate[0] = cand_left;
1837         candidate[1] = cand_up;
1838         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1839             candidate[2] = INTRA_PLANAR;
1840         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1841             candidate[2] = INTRA_DC;
1842         } else {
1843             candidate[2] = INTRA_ANGULAR_26;
1844         }
1845     }
1846
1847     if (prev_intra_luma_pred_flag) {
1848         intra_pred_mode = candidate[lc->pu.mpm_idx];
1849     } else {
1850         if (candidate[0] > candidate[1])
1851             FFSWAP(uint8_t, candidate[0], candidate[1]);
1852         if (candidate[0] > candidate[2])
1853             FFSWAP(uint8_t, candidate[0], candidate[2]);
1854         if (candidate[1] > candidate[2])
1855             FFSWAP(uint8_t, candidate[1], candidate[2]);
1856
1857         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1858         for (i = 0; i < 3; i++)
1859             if (intra_pred_mode >= candidate[i])
1860                 intra_pred_mode++;
1861     }
1862
1863     /* write the intra prediction units into the mv array */
1864     if (!size_in_pus)
1865         size_in_pus = 1;
1866     for (i = 0; i < size_in_pus; i++) {
1867         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1868                intra_pred_mode, size_in_pus);
1869
1870         for (j = 0; j < size_in_pus; j++) {
1871             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1872         }
1873     }
1874
1875     return intra_pred_mode;
1876 }
1877
1878 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1879                                           int log2_cb_size, int ct_depth)
1880 {
1881     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1882     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1883     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1884     int y;
1885
1886     for (y = 0; y < length; y++)
1887         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1888                ct_depth, length);
1889 }
1890
1891 static const uint8_t tab_mode_idx[] = {
1892      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1893     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1894
1895 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1896                                   int log2_cb_size)
1897 {
1898     HEVCLocalContext *lc = s->HEVClc;
1899     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1900     uint8_t prev_intra_luma_pred_flag[4];
1901     int split   = lc->cu.part_mode == PART_NxN;
1902     int pb_size = (1 << log2_cb_size) >> split;
1903     int side    = split + 1;
1904     int chroma_mode;
1905     int i, j;
1906
1907     for (i = 0; i < side; i++)
1908         for (j = 0; j < side; j++)
1909             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1910
1911     for (i = 0; i < side; i++) {
1912         for (j = 0; j < side; j++) {
1913             if (prev_intra_luma_pred_flag[2 * i + j])
1914                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1915             else
1916                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1917
1918             lc->pu.intra_pred_mode[2 * i + j] =
1919                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1920                                      prev_intra_luma_pred_flag[2 * i + j]);
1921         }
1922     }
1923
1924     if (s->ps.sps->chroma_format_idc == 3) {
1925         for (i = 0; i < side; i++) {
1926             for (j = 0; j < side; j++) {
1927                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1928                 if (chroma_mode != 4) {
1929                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1930                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1931                     else
1932                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1933                 } else {
1934                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1935                 }
1936             }
1937         }
1938     } else if (s->ps.sps->chroma_format_idc == 2) {
1939         int mode_idx;
1940         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1941         if (chroma_mode != 4) {
1942             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1943                 mode_idx = 34;
1944             else
1945                 mode_idx = intra_chroma_table[chroma_mode];
1946         } else {
1947             mode_idx = lc->pu.intra_pred_mode[0];
1948         }
1949         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1950     } else if (s->ps.sps->chroma_format_idc != 0) {
1951         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1952         if (chroma_mode != 4) {
1953             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1954                 lc->pu.intra_pred_mode_c[0] = 34;
1955             else
1956                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1957         } else {
1958             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1959         }
1960     }
1961 }
1962
1963 static void intra_prediction_unit_default_value(HEVCContext *s,
1964                                                 int x0, int y0,
1965                                                 int log2_cb_size)
1966 {
1967     HEVCLocalContext *lc = s->HEVClc;
1968     int pb_size          = 1 << log2_cb_size;
1969     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1970     int min_pu_width     = s->ps.sps->min_pu_width;
1971     MvField *tab_mvf     = s->ref->tab_mvf;
1972     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1973     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1974     int j, k;
1975
1976     if (size_in_pus == 0)
1977         size_in_pus = 1;
1978     for (j = 0; j < size_in_pus; j++)
1979         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1980     if (lc->cu.pred_mode == MODE_INTRA)
1981         for (j = 0; j < size_in_pus; j++)
1982             for (k = 0; k < size_in_pus; k++)
1983                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1984 }
1985
1986 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1987 {
1988     int cb_size          = 1 << log2_cb_size;
1989     HEVCLocalContext *lc = s->HEVClc;
1990     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1991     int length           = cb_size >> log2_min_cb_size;
1992     int min_cb_width     = s->ps.sps->min_cb_width;
1993     int x_cb             = x0 >> log2_min_cb_size;
1994     int y_cb             = y0 >> log2_min_cb_size;
1995     int idx              = log2_cb_size - 2;
1996     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
1997     int x, y, ret;
1998
1999     lc->cu.x                = x0;
2000     lc->cu.y                = y0;
2001     lc->cu.pred_mode        = MODE_INTRA;
2002     lc->cu.part_mode        = PART_2Nx2N;
2003     lc->cu.intra_split_flag = 0;
2004
2005     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2006     for (x = 0; x < 4; x++)
2007         lc->pu.intra_pred_mode[x] = 1;
2008     if (s->ps.pps->transquant_bypass_enable_flag) {
2009         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2010         if (lc->cu.cu_transquant_bypass_flag)
2011             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2012     } else
2013         lc->cu.cu_transquant_bypass_flag = 0;
2014
2015     if (s->sh.slice_type != I_SLICE) {
2016         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2017
2018         x = y_cb * min_cb_width + x_cb;
2019         for (y = 0; y < length; y++) {
2020             memset(&s->skip_flag[x], skip_flag, length);
2021             x += min_cb_width;
2022         }
2023         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2024     } else {
2025         x = y_cb * min_cb_width + x_cb;
2026         for (y = 0; y < length; y++) {
2027             memset(&s->skip_flag[x], 0, length);
2028             x += min_cb_width;
2029         }
2030     }
2031
2032     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2033         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2034         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2035
2036         if (!s->sh.disable_deblocking_filter_flag)
2037             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2038     } else {
2039         int pcm_flag = 0;
2040
2041         if (s->sh.slice_type != I_SLICE)
2042             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2043         if (lc->cu.pred_mode != MODE_INTRA ||
2044             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2045             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2046             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2047                                       lc->cu.pred_mode == MODE_INTRA;
2048         }
2049
2050         if (lc->cu.pred_mode == MODE_INTRA) {
2051             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2052                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2053                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2054                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2055             }
2056             if (pcm_flag) {
2057                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2058                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2059                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2060                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2061
2062                 if (ret < 0)
2063                     return ret;
2064             } else {
2065                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2066             }
2067         } else {
2068             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069             switch (lc->cu.part_mode) {
2070             case PART_2Nx2N:
2071                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2072                 break;
2073             case PART_2NxN:
2074                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2075                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2076                 break;
2077             case PART_Nx2N:
2078                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2079                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2080                 break;
2081             case PART_2NxnU:
2082                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2083                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2084                 break;
2085             case PART_2NxnD:
2086                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2087                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2088                 break;
2089             case PART_nLx2N:
2090                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2091                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2092                 break;
2093             case PART_nRx2N:
2094                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2095                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2096                 break;
2097             case PART_NxN:
2098                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2099                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2100                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2101                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2102                 break;
2103             }
2104         }
2105
2106         if (!pcm_flag) {
2107             int rqt_root_cbf = 1;
2108
2109             if (lc->cu.pred_mode != MODE_INTRA &&
2110                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2111                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2112             }
2113             if (rqt_root_cbf) {
2114                 const static int cbf[2] = { 0 };
2115                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2116                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2117                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2118                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2119                                          log2_cb_size,
2120                                          log2_cb_size, 0, 0, cbf, cbf);
2121                 if (ret < 0)
2122                     return ret;
2123             } else {
2124                 if (!s->sh.disable_deblocking_filter_flag)
2125                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2126             }
2127         }
2128     }
2129
2130     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2131         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2132
2133     x = y_cb * min_cb_width + x_cb;
2134     for (y = 0; y < length; y++) {
2135         memset(&s->qp_y_tab[x], lc->qp_y, length);
2136         x += min_cb_width;
2137     }
2138
2139     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2140        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2141         lc->qPy_pred = lc->qp_y;
2142     }
2143
2144     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2145
2146     return 0;
2147 }
2148
2149 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2150                                int log2_cb_size, int cb_depth)
2151 {
2152     HEVCLocalContext *lc = s->HEVClc;
2153     const int cb_size    = 1 << log2_cb_size;
2154     int ret;
2155     int split_cu;
2156
2157     lc->ct_depth = cb_depth;
2158     if (x0 + cb_size <= s->ps.sps->width  &&
2159         y0 + cb_size <= s->ps.sps->height &&
2160         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2161         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2162     } else {
2163         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2164     }
2165     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2166         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2167         lc->tu.is_cu_qp_delta_coded = 0;
2168         lc->tu.cu_qp_delta          = 0;
2169     }
2170
2171     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2172         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2173         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2174     }
2175
2176     if (split_cu) {
2177         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2178         const int cb_size_split = cb_size >> 1;
2179         const int x1 = x0 + cb_size_split;
2180         const int y1 = y0 + cb_size_split;
2181
2182         int more_data = 0;
2183
2184         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2185         if (more_data < 0)
2186             return more_data;
2187
2188         if (more_data && x1 < s->ps.sps->width) {
2189             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2190             if (more_data < 0)
2191                 return more_data;
2192         }
2193         if (more_data && y1 < s->ps.sps->height) {
2194             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2195             if (more_data < 0)
2196                 return more_data;
2197         }
2198         if (more_data && x1 < s->ps.sps->width &&
2199             y1 < s->ps.sps->height) {
2200             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2201             if (more_data < 0)
2202                 return more_data;
2203         }
2204
2205         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2206             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2207             lc->qPy_pred = lc->qp_y;
2208
2209         if (more_data)
2210             return ((x1 + cb_size_split) < s->ps.sps->width ||
2211                     (y1 + cb_size_split) < s->ps.sps->height);
2212         else
2213             return 0;
2214     } else {
2215         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2216         if (ret < 0)
2217             return ret;
2218         if ((!((x0 + cb_size) %
2219                (1 << (s->ps.sps->log2_ctb_size))) ||
2220              (x0 + cb_size >= s->ps.sps->width)) &&
2221             (!((y0 + cb_size) %
2222                (1 << (s->ps.sps->log2_ctb_size))) ||
2223              (y0 + cb_size >= s->ps.sps->height))) {
2224             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2225             return !end_of_slice_flag;
2226         } else {
2227             return 1;
2228         }
2229     }
2230
2231     return 0;
2232 }
2233
2234 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2235                                  int ctb_addr_ts)
2236 {
2237     HEVCLocalContext *lc  = s->HEVClc;
2238     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2239     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2240     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2241
2242     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2243
2244     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2245         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2246             lc->first_qp_group = 1;
2247         lc->end_of_tiles_x = s->ps.sps->width;
2248     } else if (s->ps.pps->tiles_enabled_flag) {
2249         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2250             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2251             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2252             lc->first_qp_group   = 1;
2253         }
2254     } else {
2255         lc->end_of_tiles_x = s->ps.sps->width;
2256     }
2257
2258     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2259
2260     lc->boundary_flags = 0;
2261     if (s->ps.pps->tiles_enabled_flag) {
2262         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2263             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2264         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2265             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2266         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2267             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2268         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2269             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2270     } else {
2271         if (ctb_addr_in_slice <= 0)
2272             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2273         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2274             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2275     }
2276
2277     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2278     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2279     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2280     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2281 }
2282
2283 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2284 {
2285     HEVCContext *s  = avctxt->priv_data;
2286     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2287     int more_data   = 1;
2288     int x_ctb       = 0;
2289     int y_ctb       = 0;
2290     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2291
2292     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2293         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2294         return AVERROR_INVALIDDATA;
2295     }
2296
2297     if (s->sh.dependent_slice_segment_flag) {
2298         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2299         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2300             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2301             return AVERROR_INVALIDDATA;
2302         }
2303     }
2304
2305     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2306         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2307
2308         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2309         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2310         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2311
2312         ff_hevc_cabac_init(s, ctb_addr_ts);
2313
2314         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2315
2316         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2317         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2318         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2319
2320         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2321         if (more_data < 0) {
2322             s->tab_slice_address[ctb_addr_rs] = -1;
2323             return more_data;
2324         }
2325
2326
2327         ctb_addr_ts++;
2328         ff_hevc_save_states(s, ctb_addr_ts);
2329         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2330     }
2331
2332     if (x_ctb + ctb_size >= s->ps.sps->width &&
2333         y_ctb + ctb_size >= s->ps.sps->height)
2334         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2335
2336     return ctb_addr_ts;
2337 }
2338
2339 static int hls_slice_data(HEVCContext *s)
2340 {
2341     int arg[2];
2342     int ret[2];
2343
2344     arg[0] = 0;
2345     arg[1] = 1;
2346
2347     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2348     return ret[0];
2349 }
2350 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2351 {
2352     HEVCContext *s1  = avctxt->priv_data, *s;
2353     HEVCLocalContext *lc;
2354     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2355     int more_data   = 1;
2356     int *ctb_row_p    = input_ctb_row;
2357     int ctb_row = ctb_row_p[job];
2358     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2359     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2360     int thread = ctb_row % s1->threads_number;
2361     int ret;
2362
2363     s = s1->sList[self_id];
2364     lc = s->HEVClc;
2365
2366     if(ctb_row) {
2367         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2368
2369         if (ret < 0)
2370             return ret;
2371         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2372     }
2373
2374     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2375         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2376         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2377
2378         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2379
2380         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2381
2382         if (avpriv_atomic_int_get(&s1->wpp_err)){
2383             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2384             return 0;
2385         }
2386
2387         ff_hevc_cabac_init(s, ctb_addr_ts);
2388         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2389         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2390
2391         if (more_data < 0) {
2392             s->tab_slice_address[ctb_addr_rs] = -1;
2393             return more_data;
2394         }
2395
2396         ctb_addr_ts++;
2397
2398         ff_hevc_save_states(s, ctb_addr_ts);
2399         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2400         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2401
2402         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2403             avpriv_atomic_int_set(&s1->wpp_err,  1);
2404             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2405             return 0;
2406         }
2407
2408         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2409             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2410             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2411             return ctb_addr_ts;
2412         }
2413         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2414         x_ctb+=ctb_size;
2415
2416         if(x_ctb >= s->ps.sps->width) {
2417             break;
2418         }
2419     }
2420     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2421
2422     return 0;
2423 }
2424
2425 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2426 {
2427     const uint8_t *data = nal->data;
2428     int length          = nal->size;
2429     HEVCLocalContext *lc = s->HEVClc;
2430     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2431     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2432     int offset;
2433     int startheader, cmpt = 0;
2434     int i, j, res = 0;
2435
2436     if (!ret || !arg) {
2437         av_free(ret);
2438         av_free(arg);
2439         return AVERROR(ENOMEM);
2440     }
2441
2442
2443     if (!s->sList[1]) {
2444         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2445
2446
2447         for (i = 1; i < s->threads_number; i++) {
2448             s->sList[i] = av_malloc(sizeof(HEVCContext));
2449             memcpy(s->sList[i], s, sizeof(HEVCContext));
2450             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2451             s->sList[i]->HEVClc = s->HEVClcList[i];
2452         }
2453     }
2454
2455     offset = (lc->gb.index >> 3);
2456
2457     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2458         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2459             startheader--;
2460             cmpt++;
2461         }
2462     }
2463
2464     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2465         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2466         for (j = 0, cmpt = 0, startheader = offset
2467              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2468             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2469                 startheader--;
2470                 cmpt++;
2471             }
2472         }
2473         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2474         s->sh.offset[i - 1] = offset;
2475
2476     }
2477     if (s->sh.num_entry_point_offsets != 0) {
2478         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2479         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2480         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2481
2482     }
2483     s->data = data;
2484
2485     for (i = 1; i < s->threads_number; i++) {
2486         s->sList[i]->HEVClc->first_qp_group = 1;
2487         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2488         memcpy(s->sList[i], s, sizeof(HEVCContext));
2489         s->sList[i]->HEVClc = s->HEVClcList[i];
2490     }
2491
2492     avpriv_atomic_int_set(&s->wpp_err, 0);
2493     ff_reset_entries(s->avctx);
2494
2495     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2496         arg[i] = i;
2497         ret[i] = 0;
2498     }
2499
2500     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2501         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2502
2503     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2504         res += ret[i];
2505     av_free(ret);
2506     av_free(arg);
2507     return res;
2508 }
2509
2510 static int set_side_data(HEVCContext *s)
2511 {
2512     AVFrame *out = s->ref->frame;
2513
2514     if (s->sei_frame_packing_present &&
2515         s->frame_packing_arrangement_type >= 3 &&
2516         s->frame_packing_arrangement_type <= 5 &&
2517         s->content_interpretation_type > 0 &&
2518         s->content_interpretation_type < 3) {
2519         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2520         if (!stereo)
2521             return AVERROR(ENOMEM);
2522
2523         switch (s->frame_packing_arrangement_type) {
2524         case 3:
2525             if (s->quincunx_subsampling)
2526                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2527             else
2528                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2529             break;
2530         case 4:
2531             stereo->type = AV_STEREO3D_TOPBOTTOM;
2532             break;
2533         case 5:
2534             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2535             break;
2536         }
2537
2538         if (s->content_interpretation_type == 2)
2539             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2540     }
2541
2542     if (s->sei_display_orientation_present &&
2543         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2544         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2545         AVFrameSideData *rotation = av_frame_new_side_data(out,
2546                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2547                                                            sizeof(int32_t) * 9);
2548         if (!rotation)
2549             return AVERROR(ENOMEM);
2550
2551         av_display_rotation_set((int32_t *)rotation->data, angle);
2552         av_display_matrix_flip((int32_t *)rotation->data,
2553                                s->sei_hflip, s->sei_vflip);
2554     }
2555
2556     return 0;
2557 }
2558
2559 static int hevc_frame_start(HEVCContext *s)
2560 {
2561     HEVCLocalContext *lc = s->HEVClc;
2562     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2563                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2564     int ret;
2565
2566     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2567     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2568     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2569     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2570     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2571
2572     s->is_decoded        = 0;
2573     s->first_nal_type    = s->nal_unit_type;
2574
2575     if (s->ps.pps->tiles_enabled_flag)
2576         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2577
2578     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2579     if (ret < 0)
2580         goto fail;
2581
2582     ret = ff_hevc_frame_rps(s);
2583     if (ret < 0) {
2584         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2585         goto fail;
2586     }
2587
2588     s->ref->frame->key_frame = IS_IRAP(s);
2589
2590     ret = set_side_data(s);
2591     if (ret < 0)
2592         goto fail;
2593
2594     s->frame->pict_type = 3 - s->sh.slice_type;
2595
2596     if (!IS_IRAP(s))
2597         ff_hevc_bump_frame(s);
2598
2599     av_frame_unref(s->output_frame);
2600     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2601     if (ret < 0)
2602         goto fail;
2603
2604     if (!s->avctx->hwaccel)
2605         ff_thread_finish_setup(s->avctx);
2606
2607     return 0;
2608
2609 fail:
2610     if (s->ref)
2611         ff_hevc_unref_frame(s, s->ref, ~0);
2612     s->ref = NULL;
2613     return ret;
2614 }
2615
2616 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2617 {
2618     HEVCLocalContext *lc = s->HEVClc;
2619     GetBitContext *gb    = &lc->gb;
2620     int ctb_addr_ts, ret;
2621
2622     *gb              = nal->gb;
2623     s->nal_unit_type = nal->type;
2624     s->temporal_id   = nal->temporal_id;
2625
2626     switch (s->nal_unit_type) {
2627     case NAL_VPS:
2628         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2629         if (ret < 0)
2630             goto fail;
2631         break;
2632     case NAL_SPS:
2633         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2634                                      s->apply_defdispwin);
2635         if (ret < 0)
2636             goto fail;
2637         break;
2638     case NAL_PPS:
2639         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2640         if (ret < 0)
2641             goto fail;
2642         break;
2643     case NAL_SEI_PREFIX:
2644     case NAL_SEI_SUFFIX:
2645         ret = ff_hevc_decode_nal_sei(s);
2646         if (ret < 0)
2647             goto fail;
2648         break;
2649     case NAL_TRAIL_R:
2650     case NAL_TRAIL_N:
2651     case NAL_TSA_N:
2652     case NAL_TSA_R:
2653     case NAL_STSA_N:
2654     case NAL_STSA_R:
2655     case NAL_BLA_W_LP:
2656     case NAL_BLA_W_RADL:
2657     case NAL_BLA_N_LP:
2658     case NAL_IDR_W_RADL:
2659     case NAL_IDR_N_LP:
2660     case NAL_CRA_NUT:
2661     case NAL_RADL_N:
2662     case NAL_RADL_R:
2663     case NAL_RASL_N:
2664     case NAL_RASL_R:
2665         ret = hls_slice_header(s);
2666         if (ret < 0)
2667             return ret;
2668
2669         if (s->max_ra == INT_MAX) {
2670             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2671                 s->max_ra = s->poc;
2672             } else {
2673                 if (IS_IDR(s))
2674                     s->max_ra = INT_MIN;
2675             }
2676         }
2677
2678         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2679             s->poc <= s->max_ra) {
2680             s->is_decoded = 0;
2681             break;
2682         } else {
2683             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2684                 s->max_ra = INT_MIN;
2685         }
2686
2687         if (s->sh.first_slice_in_pic_flag) {
2688             ret = hevc_frame_start(s);
2689             if (ret < 0)
2690                 return ret;
2691         } else if (!s->ref) {
2692             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2693             goto fail;
2694         }
2695
2696         if (s->nal_unit_type != s->first_nal_type) {
2697             av_log(s->avctx, AV_LOG_ERROR,
2698                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2699                    s->first_nal_type, s->nal_unit_type);
2700             return AVERROR_INVALIDDATA;
2701         }
2702
2703         if (!s->sh.dependent_slice_segment_flag &&
2704             s->sh.slice_type != I_SLICE) {
2705             ret = ff_hevc_slice_rpl(s);
2706             if (ret < 0) {
2707                 av_log(s->avctx, AV_LOG_WARNING,
2708                        "Error constructing the reference lists for the current slice.\n");
2709                 goto fail;
2710             }
2711         }
2712
2713         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2714             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2715             if (ret < 0)
2716                 goto fail;
2717         }
2718
2719         if (s->avctx->hwaccel) {
2720             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2721             if (ret < 0)
2722                 goto fail;
2723         } else {
2724             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2725                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2726             else
2727                 ctb_addr_ts = hls_slice_data(s);
2728             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2729                 s->is_decoded = 1;
2730             }
2731
2732             if (ctb_addr_ts < 0) {
2733                 ret = ctb_addr_ts;
2734                 goto fail;
2735             }
2736         }
2737         break;
2738     case NAL_EOS_NUT:
2739     case NAL_EOB_NUT:
2740         s->seq_decode = (s->seq_decode + 1) & 0xff;
2741         s->max_ra     = INT_MAX;
2742         break;
2743     case NAL_AUD:
2744     case NAL_FD_NUT:
2745         break;
2746     default:
2747         av_log(s->avctx, AV_LOG_INFO,
2748                "Skipping NAL unit %d\n", s->nal_unit_type);
2749     }
2750
2751     return 0;
2752 fail:
2753     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2754         return ret;
2755     return 0;
2756 }
2757
2758 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2759 {
2760     int i, ret = 0;
2761
2762     s->ref = NULL;
2763     s->last_eos = s->eos;
2764     s->eos = 0;
2765
2766     /* split the input packet into NAL units, so we know the upper bound on the
2767      * number of slices in the frame */
2768     ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2769                                s->nal_length_size);
2770     if (ret < 0) {
2771         av_log(s->avctx, AV_LOG_ERROR,
2772                "Error splitting the input into NAL units.\n");
2773         return ret;
2774     }
2775
2776     for (i = 0; i < s->pkt.nb_nals; i++) {
2777         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2778             s->pkt.nals[i].type == NAL_EOS_NUT)
2779             s->eos = 1;
2780     }
2781
2782     /* decode the NAL units */
2783     for (i = 0; i < s->pkt.nb_nals; i++) {
2784         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2785
2786         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2787         if (ret < 0) {
2788             av_log(s->avctx, AV_LOG_WARNING,
2789                    "Error parsing NAL unit #%d.\n", i);
2790             goto fail;
2791         }
2792     }
2793
2794 fail:
2795     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2796         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2797
2798     return ret;
2799 }
2800
2801 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2802 {
2803     int i;
2804     for (i = 0; i < 16; i++)
2805         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2806 }
2807
2808 static int verify_md5(HEVCContext *s, AVFrame *frame)
2809 {
2810     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2811     int pixel_shift;
2812     int i, j;
2813
2814     if (!desc)
2815         return AVERROR(EINVAL);
2816
2817     pixel_shift = desc->comp[0].depth_minus1 > 7;
2818
2819     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2820            s->poc);
2821
2822     /* the checksums are LE, so we have to byteswap for >8bpp formats
2823      * on BE arches */
2824 #if HAVE_BIGENDIAN
2825     if (pixel_shift && !s->checksum_buf) {
2826         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2827                        FFMAX3(frame->linesize[0], frame->linesize[1],
2828                               frame->linesize[2]));
2829         if (!s->checksum_buf)
2830             return AVERROR(ENOMEM);
2831     }
2832 #endif
2833
2834     for (i = 0; frame->data[i]; i++) {
2835         int width  = s->avctx->coded_width;
2836         int height = s->avctx->coded_height;
2837         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2838         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2839         uint8_t md5[16];
2840
2841         av_md5_init(s->md5_ctx);
2842         for (j = 0; j < h; j++) {
2843             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2844 #if HAVE_BIGENDIAN
2845             if (pixel_shift) {
2846                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2847                                     (const uint16_t *) src, w);
2848                 src = s->checksum_buf;
2849             }
2850 #endif
2851             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2852         }
2853         av_md5_final(s->md5_ctx, md5);
2854
2855         if (!memcmp(md5, s->md5[i], 16)) {
2856             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2857             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2858             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2859         } else {
2860             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2861             print_md5(s->avctx, AV_LOG_ERROR, md5);
2862             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2863             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2864             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2865             return AVERROR_INVALIDDATA;
2866         }
2867     }
2868
2869     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2870
2871     return 0;
2872 }
2873
2874 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2875                              AVPacket *avpkt)
2876 {
2877     int ret;
2878     HEVCContext *s = avctx->priv_data;
2879
2880     if (!avpkt->size) {
2881         ret = ff_hevc_output_frame(s, data, 1);
2882         if (ret < 0)
2883             return ret;
2884
2885         *got_output = ret;
2886         return 0;
2887     }
2888
2889     s->ref = NULL;
2890     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2891     if (ret < 0)
2892         return ret;
2893
2894     if (avctx->hwaccel) {
2895         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2896             av_log(avctx, AV_LOG_ERROR,
2897                    "hardware accelerator failed to decode picture\n");
2898     } else {
2899         /* verify the SEI checksum */
2900         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2901             s->is_md5) {
2902             ret = verify_md5(s, s->ref->frame);
2903             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2904                 ff_hevc_unref_frame(s, s->ref, ~0);
2905                 return ret;
2906             }
2907         }
2908     }
2909     s->is_md5 = 0;
2910
2911     if (s->is_decoded) {
2912         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2913         s->is_decoded = 0;
2914     }
2915
2916     if (s->output_frame->buf[0]) {
2917         av_frame_move_ref(data, s->output_frame);
2918         *got_output = 1;
2919     }
2920
2921     return avpkt->size;
2922 }
2923
2924 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2925 {
2926     int ret;
2927
2928     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2929     if (ret < 0)
2930         return ret;
2931
2932     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2933     if (!dst->tab_mvf_buf)
2934         goto fail;
2935     dst->tab_mvf = src->tab_mvf;
2936
2937     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2938     if (!dst->rpl_tab_buf)
2939         goto fail;
2940     dst->rpl_tab = src->rpl_tab;
2941
2942     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2943     if (!dst->rpl_buf)
2944         goto fail;
2945
2946     dst->poc        = src->poc;
2947     dst->ctb_count  = src->ctb_count;
2948     dst->window     = src->window;
2949     dst->flags      = src->flags;
2950     dst->sequence   = src->sequence;
2951
2952     if (src->hwaccel_picture_private) {
2953         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2954         if (!dst->hwaccel_priv_buf)
2955             goto fail;
2956         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2957     }
2958
2959     return 0;
2960 fail:
2961     ff_hevc_unref_frame(s, dst, ~0);
2962     return AVERROR(ENOMEM);
2963 }
2964
2965 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2966 {
2967     HEVCContext       *s = avctx->priv_data;
2968     int i;
2969
2970     pic_arrays_free(s);
2971
2972     av_freep(&s->md5_ctx);
2973
2974     for(i=0; i < s->pkt.nals_allocated; i++) {
2975         av_freep(&s->skipped_bytes_pos_nal[i]);
2976     }
2977     av_freep(&s->skipped_bytes_pos_nal);
2978
2979     av_freep(&s->cabac_state);
2980
2981     for (i = 0; i < 3; i++) {
2982         av_freep(&s->sao_pixel_buffer_h[i]);
2983         av_freep(&s->sao_pixel_buffer_v[i]);
2984     }
2985     av_frame_free(&s->output_frame);
2986
2987     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2988         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2989         av_frame_free(&s->DPB[i].frame);
2990     }
2991
2992     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2993         av_buffer_unref(&s->ps.vps_list[i]);
2994     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2995         av_buffer_unref(&s->ps.sps_list[i]);
2996     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2997         av_buffer_unref(&s->ps.pps_list[i]);
2998     s->ps.sps = NULL;
2999     s->ps.pps = NULL;
3000     s->ps.vps = NULL;
3001
3002     av_freep(&s->sh.entry_point_offset);
3003     av_freep(&s->sh.offset);
3004     av_freep(&s->sh.size);
3005
3006     for (i = 1; i < s->threads_number; i++) {
3007         HEVCLocalContext *lc = s->HEVClcList[i];
3008         if (lc) {
3009             av_freep(&s->HEVClcList[i]);
3010             av_freep(&s->sList[i]);
3011         }
3012     }
3013     if (s->HEVClc == s->HEVClcList[0])
3014         s->HEVClc = NULL;
3015     av_freep(&s->HEVClcList[0]);
3016
3017     for (i = 0; i < s->pkt.nals_allocated; i++)
3018         av_freep(&s->pkt.nals[i].rbsp_buffer);
3019     av_freep(&s->pkt.nals);
3020     s->pkt.nals_allocated = 0;
3021
3022     return 0;
3023 }
3024
3025 static av_cold int hevc_init_context(AVCodecContext *avctx)
3026 {
3027     HEVCContext *s = avctx->priv_data;
3028     int i;
3029
3030     s->avctx = avctx;
3031
3032     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3033     if (!s->HEVClc)
3034         goto fail;
3035     s->HEVClcList[0] = s->HEVClc;
3036     s->sList[0] = s;
3037
3038     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3039     if (!s->cabac_state)
3040         goto fail;
3041
3042     s->output_frame = av_frame_alloc();
3043     if (!s->output_frame)
3044         goto fail;
3045
3046     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3047         s->DPB[i].frame = av_frame_alloc();
3048         if (!s->DPB[i].frame)
3049             goto fail;
3050         s->DPB[i].tf.f = s->DPB[i].frame;
3051     }
3052
3053     s->max_ra = INT_MAX;
3054
3055     s->md5_ctx = av_md5_alloc();
3056     if (!s->md5_ctx)
3057         goto fail;
3058
3059     ff_bswapdsp_init(&s->bdsp);
3060
3061     s->context_initialized = 1;
3062     s->eos = 0;
3063
3064     return 0;
3065
3066 fail:
3067     hevc_decode_free(avctx);
3068     return AVERROR(ENOMEM);
3069 }
3070
3071 static int hevc_update_thread_context(AVCodecContext *dst,
3072                                       const AVCodecContext *src)
3073 {
3074     HEVCContext *s  = dst->priv_data;
3075     HEVCContext *s0 = src->priv_data;
3076     int i, ret;
3077
3078     if (!s->context_initialized) {
3079         ret = hevc_init_context(dst);
3080         if (ret < 0)
3081             return ret;
3082     }
3083
3084     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3085         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3086         if (s0->DPB[i].frame->buf[0]) {
3087             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3088             if (ret < 0)
3089                 return ret;
3090         }
3091     }
3092
3093     if (s->ps.sps != s0->ps.sps)
3094         s->ps.sps = NULL;
3095     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3096         av_buffer_unref(&s->ps.vps_list[i]);
3097         if (s0->ps.vps_list[i]) {
3098             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3099             if (!s->ps.vps_list[i])
3100                 return AVERROR(ENOMEM);
3101         }
3102     }
3103
3104     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3105         av_buffer_unref(&s->ps.sps_list[i]);
3106         if (s0->ps.sps_list[i]) {
3107             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3108             if (!s->ps.sps_list[i])
3109                 return AVERROR(ENOMEM);
3110         }
3111     }
3112
3113     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3114         av_buffer_unref(&s->ps.pps_list[i]);
3115         if (s0->ps.pps_list[i]) {
3116             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3117             if (!s->ps.pps_list[i])
3118                 return AVERROR(ENOMEM);
3119         }
3120     }
3121
3122     if (s->ps.sps != s0->ps.sps)
3123         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3124             return ret;
3125
3126     s->seq_decode = s0->seq_decode;
3127     s->seq_output = s0->seq_output;
3128     s->pocTid0    = s0->pocTid0;
3129     s->max_ra     = s0->max_ra;
3130     s->eos        = s0->eos;
3131
3132     s->is_nalff        = s0->is_nalff;
3133     s->nal_length_size = s0->nal_length_size;
3134
3135     s->threads_number      = s0->threads_number;
3136     s->threads_type        = s0->threads_type;
3137
3138     if (s0->eos) {
3139         s->seq_decode = (s->seq_decode + 1) & 0xff;
3140         s->max_ra = INT_MAX;
3141     }
3142
3143     return 0;
3144 }
3145
3146 static int hevc_decode_extradata(HEVCContext *s)
3147 {
3148     AVCodecContext *avctx = s->avctx;
3149     GetByteContext gb;
3150     int ret, i;
3151
3152     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3153
3154     if (avctx->extradata_size > 3 &&
3155         (avctx->extradata[0] || avctx->extradata[1] ||
3156          avctx->extradata[2] > 1)) {
3157         /* It seems the extradata is encoded as hvcC format.
3158          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3159          * is finalized. When finalized, configurationVersion will be 1 and we
3160          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3161         int i, j, num_arrays, nal_len_size;
3162
3163         s->is_nalff = 1;
3164
3165         bytestream2_skip(&gb, 21);
3166         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3167         num_arrays   = bytestream2_get_byte(&gb);
3168
3169         /* nal units in the hvcC always have length coded with 2 bytes,
3170          * so put a fake nal_length_size = 2 while parsing them */
3171         s->nal_length_size = 2;
3172
3173         /* Decode nal units from hvcC. */
3174         for (i = 0; i < num_arrays; i++) {
3175             int type = bytestream2_get_byte(&gb) & 0x3f;
3176             int cnt  = bytestream2_get_be16(&gb);
3177
3178             for (j = 0; j < cnt; j++) {
3179                 // +2 for the nal size field
3180                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3181                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3182                     av_log(s->avctx, AV_LOG_ERROR,
3183                            "Invalid NAL unit size in extradata.\n");
3184                     return AVERROR_INVALIDDATA;
3185                 }
3186
3187                 ret = decode_nal_units(s, gb.buffer, nalsize);
3188                 if (ret < 0) {
3189                     av_log(avctx, AV_LOG_ERROR,
3190                            "Decoding nal unit %d %d from hvcC failed\n",
3191                            type, i);
3192                     return ret;
3193                 }
3194                 bytestream2_skip(&gb, nalsize);
3195             }
3196         }
3197
3198         /* Now store right nal length size, that will be used to parse
3199          * all other nals */
3200         s->nal_length_size = nal_len_size;
3201     } else {
3202         s->is_nalff = 0;
3203         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3204         if (ret < 0)
3205             return ret;
3206     }
3207
3208     /* export stream parameters from the first SPS */
3209     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3210         if (s->ps.sps_list[i]) {
3211             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3212             export_stream_params(s->avctx, s, sps);
3213             break;
3214         }
3215     }
3216
3217     return 0;
3218 }
3219
3220 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3221 {
3222     HEVCContext *s = avctx->priv_data;
3223     int ret;
3224
3225     ff_init_cabac_states();
3226
3227     avctx->internal->allocate_progress = 1;
3228
3229     ret = hevc_init_context(avctx);
3230     if (ret < 0)
3231         return ret;
3232
3233     s->enable_parallel_tiles = 0;
3234     s->picture_struct = 0;
3235
3236     if(avctx->active_thread_type & FF_THREAD_SLICE)
3237         s->threads_number = avctx->thread_count;
3238     else
3239         s->threads_number = 1;
3240
3241     if (avctx->extradata_size > 0 && avctx->extradata) {
3242         ret = hevc_decode_extradata(s);
3243         if (ret < 0) {
3244             hevc_decode_free(avctx);
3245             return ret;
3246         }
3247     }
3248
3249     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3250             s->threads_type = FF_THREAD_FRAME;
3251         else
3252             s->threads_type = FF_THREAD_SLICE;
3253
3254     return 0;
3255 }
3256
3257 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3258 {
3259     HEVCContext *s = avctx->priv_data;
3260     int ret;
3261
3262     memset(s, 0, sizeof(*s));
3263
3264     ret = hevc_init_context(avctx);
3265     if (ret < 0)
3266         return ret;
3267
3268     return 0;
3269 }
3270
3271 static void hevc_decode_flush(AVCodecContext *avctx)
3272 {
3273     HEVCContext *s = avctx->priv_data;
3274     ff_hevc_flush_dpb(s);
3275     s->max_ra = INT_MAX;
3276 }
3277
3278 #define OFFSET(x) offsetof(HEVCContext, x)
3279 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3280
3281 static const AVProfile profiles[] = {
3282     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3283     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3284     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3285     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3286     { FF_PROFILE_UNKNOWN },
3287 };
3288
3289 static const AVOption options[] = {
3290     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3291         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3292     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3293         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3294     { NULL },
3295 };
3296
3297 static const AVClass hevc_decoder_class = {
3298     .class_name = "HEVC decoder",
3299     .item_name  = av_default_item_name,
3300     .option     = options,
3301     .version    = LIBAVUTIL_VERSION_INT,
3302 };
3303
3304 AVCodec ff_hevc_decoder = {
3305     .name                  = "hevc",
3306     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3307     .type                  = AVMEDIA_TYPE_VIDEO,
3308     .id                    = AV_CODEC_ID_HEVC,
3309     .priv_data_size        = sizeof(HEVCContext),
3310     .priv_class            = &hevc_decoder_class,
3311     .init                  = hevc_decode_init,
3312     .close                 = hevc_decode_free,
3313     .decode                = hevc_decode_frame,
3314     .flush                 = hevc_decode_flush,
3315     .update_thread_context = hevc_update_thread_context,
3316     .init_thread_copy      = hevc_init_thread_copy,
3317     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3318                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3319     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3320 };