git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->ps.sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->ps.sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->ps.sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->ps.sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 284                                  const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     pic_arrays_free(s);
 336     s->ps.sps = NULL;
 337     s->ps.vps = NULL;
 338
 339     if (!sps)
 340         return 0;
 341
 342     ret = pic_arrays_init(s, sps);
 343     if (ret < 0)
 344         goto fail;
 345
 346     export_stream_params(s->avctx, &s->ps, sps);
 347
 348     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 349 #if CONFIG_HEVC_DXVA2_HWACCEL
 350         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 351 #endif
 352 #if CONFIG_HEVC_D3D11VA_HWACCEL
 353         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 354 #endif
 355 #if CONFIG_HEVC_VAAPI_HWACCEL
 356         *fmt++ = AV_PIX_FMT_VAAPI;
 357 #endif
 358 #if CONFIG_HEVC_VDPAU_HWACCEL
 359         *fmt++ = AV_PIX_FMT_VDPAU;
 360 #endif
 361     }
 362
 363     if (pix_fmt == AV_PIX_FMT_NONE) {
 364         *fmt++ = sps->pix_fmt;
 365         *fmt = AV_PIX_FMT_NONE;
 366
 367         ret = ff_thread_get_format(s->avctx, pix_fmts);
 368         if (ret < 0)
 369             goto fail;
 370         s->avctx->pix_fmt = ret;
 371     }
 372     else {
 373         s->avctx->pix_fmt = pix_fmt;
 374     }
 375
 376     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 377     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 378     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 379
 380     for (i = 0; i < 3; i++) {
 381         av_freep(&s->sao_pixel_buffer_h[i]);
 382         av_freep(&s->sao_pixel_buffer_v[i]);
 383     }
 384
 385     if (sps->sao_enabled && !s->avctx->hwaccel) {
 386         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 387         int c_idx;
 388
 389         for(c_idx = 0; c_idx < c_count; c_idx++) {
 390             int w = sps->width >> sps->hshift[c_idx];
 391             int h = sps->height >> sps->vshift[c_idx];
 392             s->sao_pixel_buffer_h[c_idx] =
 393                 av_malloc((w * 2 * sps->ctb_height) <<
 394                           sps->pixel_shift);
 395             s->sao_pixel_buffer_v[c_idx] =
 396                 av_malloc((h * 2 * sps->ctb_width) <<
 397                           sps->pixel_shift);
 398         }
 399     }
 400
 401     s->ps.sps = sps;
 402     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 403
 404     return 0;
 405
 406 fail:
 407     pic_arrays_free(s);
 408     s->ps.sps = NULL;
 409     return ret;
 410 }
 411
 412 static int hls_slice_header(HEVCContext *s)
 413 {
 414     GetBitContext *gb = &s->HEVClc->gb;
 415     SliceHeader *sh   = &s->sh;
 416     int i, ret;
 417
 418     // Coded parameters
 419     sh->first_slice_in_pic_flag = get_bits1(gb);
 420     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 421         s->seq_decode = (s->seq_decode + 1) & 0xff;
 422         s->max_ra     = INT_MAX;
 423         if (IS_IDR(s))
 424             ff_hevc_clear_refs(s);
 425     }
 426     sh->no_output_of_prior_pics_flag = 0;
 427     if (IS_IRAP(s))
 428         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 429
 430     sh->pps_id = get_ue_golomb_long(gb);
 431     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 432         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 433         return AVERROR_INVALIDDATA;
 434     }
 435     if (!sh->first_slice_in_pic_flag &&
 436         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 437         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 438         return AVERROR_INVALIDDATA;
 439     }
 440     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 441     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 442         sh->no_output_of_prior_pics_flag = 1;
 443
 444     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 445         const HEVCSPS* last_sps = s->ps.sps;
 446         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 447         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 448             if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
 449                 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
 450                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 451                 sh->no_output_of_prior_pics_flag = 0;
 452         }
 453         ff_hevc_clear_refs(s);
 454         ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
 455         if (ret < 0)
 456             return ret;
 457
 458         s->seq_decode = (s->seq_decode + 1) & 0xff;
 459         s->max_ra     = INT_MAX;
 460     }
 461
 462     sh->dependent_slice_segment_flag = 0;
 463     if (!sh->first_slice_in_pic_flag) {
 464         int slice_address_length;
 465
 466         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 467             sh->dependent_slice_segment_flag = get_bits1(gb);
 468
 469         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 470                                             s->ps.sps->ctb_height);
 471         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 472         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 473             av_log(s->avctx, AV_LOG_ERROR,
 474                    "Invalid slice segment address: %u.\n",
 475                    sh->slice_segment_addr);
 476             return AVERROR_INVALIDDATA;
 477         }
 478
 479         if (!sh->dependent_slice_segment_flag) {
 480             sh->slice_addr = sh->slice_segment_addr;
 481             s->slice_idx++;
 482         }
 483     } else {
 484         sh->slice_segment_addr = sh->slice_addr = 0;
 485         s->slice_idx           = 0;
 486         s->slice_initialized   = 0;
 487     }
 488
 489     if (!sh->dependent_slice_segment_flag) {
 490         s->slice_initialized = 0;
 491
 492         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 493             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 494
 495         sh->slice_type = get_ue_golomb_long(gb);
 496         if (!(sh->slice_type == I_SLICE ||
 497               sh->slice_type == P_SLICE ||
 498               sh->slice_type == B_SLICE)) {
 499             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 500                    sh->slice_type);
 501             return AVERROR_INVALIDDATA;
 502         }
 503         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 504             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 505             return AVERROR_INVALIDDATA;
 506         }
 507
 508         // when flag is not present, picture is inferred to be output
 509         sh->pic_output_flag = 1;
 510         if (s->ps.pps->output_flag_present_flag)
 511             sh->pic_output_flag = get_bits1(gb);
 512
 513         if (s->ps.sps->separate_colour_plane_flag)
 514             sh->colour_plane_id = get_bits(gb, 2);
 515
 516         if (!IS_IDR(s)) {
 517             int poc, pos;
 518
 519             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 520             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 521             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 522                 av_log(s->avctx, AV_LOG_WARNING,
 523                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 524                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 525                     return AVERROR_INVALIDDATA;
 526                 poc = s->poc;
 527             }
 528             s->poc = poc;
 529
 530             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 531             pos = get_bits_left(gb);
 532             if (!sh->short_term_ref_pic_set_sps_flag) {
 533                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 534                 if (ret < 0)
 535                     return ret;
 536
 537                 sh->short_term_rps = &sh->slice_rps;
 538             } else {
 539                 int numbits, rps_idx;
 540
 541                 if (!s->ps.sps->nb_st_rps) {
 542                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 543                     return AVERROR_INVALIDDATA;
 544                 }
 545
 546                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 547                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 548                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 549             }
 550             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 551
 552             pos = get_bits_left(gb);
 553             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 554             if (ret < 0) {
 555                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 556                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 557                     return AVERROR_INVALIDDATA;
 558             }
 559             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 560
 561             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 562                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 563             else
 564                 sh->slice_temporal_mvp_enabled_flag = 0;
 565         } else {
 566             s->sh.short_term_rps = NULL;
 567             s->poc               = 0;
 568         }
 569
 570         /* 8.3.1 */
 571         if (s->temporal_id == 0 &&
 572             s->nal_unit_type != NAL_TRAIL_N &&
 573             s->nal_unit_type != NAL_TSA_N   &&
 574             s->nal_unit_type != NAL_STSA_N  &&
 575             s->nal_unit_type != NAL_RADL_N  &&
 576             s->nal_unit_type != NAL_RADL_R  &&
 577             s->nal_unit_type != NAL_RASL_N  &&
 578             s->nal_unit_type != NAL_RASL_R)
 579             s->pocTid0 = s->poc;
 580
 581         if (s->ps.sps->sao_enabled) {
 582             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 583             if (s->ps.sps->chroma_format_idc) {
 584                 sh->slice_sample_adaptive_offset_flag[1] =
 585                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 586             }
 587         } else {
 588             sh->slice_sample_adaptive_offset_flag[0] = 0;
 589             sh->slice_sample_adaptive_offset_flag[1] = 0;
 590             sh->slice_sample_adaptive_offset_flag[2] = 0;
 591         }
 592
 593         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 594         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 595             int nb_refs;
 596
 597             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 598             if (sh->slice_type == B_SLICE)
 599                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 600
 601             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 602                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 603                 if (sh->slice_type == B_SLICE)
 604                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 605             }
 606             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 607                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 608                        sh->nb_refs[L0], sh->nb_refs[L1]);
 609                 return AVERROR_INVALIDDATA;
 610             }
 611
 612             sh->rpl_modification_flag[0] = 0;
 613             sh->rpl_modification_flag[1] = 0;
 614             nb_refs = ff_hevc_frame_nb_refs(s);
 615             if (!nb_refs) {
 616                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 617                 return AVERROR_INVALIDDATA;
 618             }
 619
 620             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 621                 sh->rpl_modification_flag[0] = get_bits1(gb);
 622                 if (sh->rpl_modification_flag[0]) {
 623                     for (i = 0; i < sh->nb_refs[L0]; i++)
 624                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 625                 }
 626
 627                 if (sh->slice_type == B_SLICE) {
 628                     sh->rpl_modification_flag[1] = get_bits1(gb);
 629                     if (sh->rpl_modification_flag[1] == 1)
 630                         for (i = 0; i < sh->nb_refs[L1]; i++)
 631                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 632                 }
 633             }
 634
 635             if (sh->slice_type == B_SLICE)
 636                 sh->mvd_l1_zero_flag = get_bits1(gb);
 637
 638             if (s->ps.pps->cabac_init_present_flag)
 639                 sh->cabac_init_flag = get_bits1(gb);
 640             else
 641                 sh->cabac_init_flag = 0;
 642
 643             sh->collocated_ref_idx = 0;
 644             if (sh->slice_temporal_mvp_enabled_flag) {
 645                 sh->collocated_list = L0;
 646                 if (sh->slice_type == B_SLICE)
 647                     sh->collocated_list = !get_bits1(gb);
 648
 649                 if (sh->nb_refs[sh->collocated_list] > 1) {
 650                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 651                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 652                         av_log(s->avctx, AV_LOG_ERROR,
 653                                "Invalid collocated_ref_idx: %d.\n",
 654                                sh->collocated_ref_idx);
 655                         return AVERROR_INVALIDDATA;
 656                     }
 657                 }
 658             }
 659
 660             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 661                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 662                 pred_weight_table(s, gb);
 663             }
 664
 665             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 666             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 667                 av_log(s->avctx, AV_LOG_ERROR,
 668                        "Invalid number of merging MVP candidates: %d.\n",
 669                        sh->max_num_merge_cand);
 670                 return AVERROR_INVALIDDATA;
 671             }
 672         }
 673
 674         sh->slice_qp_delta = get_se_golomb(gb);
 675
 676         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 677             sh->slice_cb_qp_offset = get_se_golomb(gb);
 678             sh->slice_cr_qp_offset = get_se_golomb(gb);
 679         } else {
 680             sh->slice_cb_qp_offset = 0;
 681             sh->slice_cr_qp_offset = 0;
 682         }
 683
 684         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 685             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 686         else
 687             sh->cu_chroma_qp_offset_enabled_flag = 0;
 688
 689         if (s->ps.pps->deblocking_filter_control_present_flag) {
 690             int deblocking_filter_override_flag = 0;
 691
 692             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 693                 deblocking_filter_override_flag = get_bits1(gb);
 694
 695             if (deblocking_filter_override_flag) {
 696                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 697                 if (!sh->disable_deblocking_filter_flag) {
 698                     sh->beta_offset = get_se_golomb(gb) * 2;
 699                     sh->tc_offset   = get_se_golomb(gb) * 2;
 700                 }
 701             } else {
 702                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 703                 sh->beta_offset                    = s->ps.pps->beta_offset;
 704                 sh->tc_offset                      = s->ps.pps->tc_offset;
 705             }
 706         } else {
 707             sh->disable_deblocking_filter_flag = 0;
 708             sh->beta_offset                    = 0;
 709             sh->tc_offset                      = 0;
 710         }
 711
 712         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 713             (sh->slice_sample_adaptive_offset_flag[0] ||
 714              sh->slice_sample_adaptive_offset_flag[1] ||
 715              !sh->disable_deblocking_filter_flag)) {
 716             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 717         } else {
 718             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 719         }
 720     } else if (!s->slice_initialized) {
 721         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 722         return AVERROR_INVALIDDATA;
 723     }
 724
 725     sh->num_entry_point_offsets = 0;
 726     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 727         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 728         // It would be possible to bound this tighter but this here is simpler
 729         if (num_entry_point_offsets > get_bits_left(gb)) {
 730             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 731             return AVERROR_INVALIDDATA;
 732         }
 733
 734         sh->num_entry_point_offsets = num_entry_point_offsets;
 735         if (sh->num_entry_point_offsets > 0) {
 736             int offset_len = get_ue_golomb_long(gb) + 1;
 737
 738             if (offset_len < 1 || offset_len > 32) {
 739                 sh->num_entry_point_offsets = 0;
 740                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 741                 return AVERROR_INVALIDDATA;
 742             }
 743
 744             av_freep(&sh->entry_point_offset);
 745             av_freep(&sh->offset);
 746             av_freep(&sh->size);
 747             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 748             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 749             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 750             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 751                 sh->num_entry_point_offsets = 0;
 752                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 753                 return AVERROR(ENOMEM);
 754             }
 755             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 756                 unsigned val = get_bits_long(gb, offset_len);
 757                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 758             }
 759             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 760                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 761                 s->threads_number = 1;
 762             } else
 763                 s->enable_parallel_tiles = 0;
 764         } else
 765             s->enable_parallel_tiles = 0;
 766     }
 767
 768     if (s->ps.pps->slice_header_extension_present_flag) {
 769         unsigned int length = get_ue_golomb_long(gb);
 770         if (length*8LL > get_bits_left(gb)) {
 771             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 772             return AVERROR_INVALIDDATA;
 773         }
 774         for (i = 0; i < length; i++)
 775             skip_bits(gb, 8);  // slice_header_extension_data_byte
 776     }
 777
 778     // Inferred parameters
 779     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 780     if (sh->slice_qp > 51 ||
 781         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 782         av_log(s->avctx, AV_LOG_ERROR,
 783                "The slice_qp %d is outside the valid range "
 784                "[%d, 51].\n",
 785                sh->slice_qp,
 786                -s->ps.sps->qp_bd_offset);
 787         return AVERROR_INVALIDDATA;
 788     }
 789
 790     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 791
 792     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 793         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 794         return AVERROR_INVALIDDATA;
 795     }
 796
 797     if (get_bits_left(gb) < 0) {
 798         av_log(s->avctx, AV_LOG_ERROR,
 799                "Overread slice header by %d bits\n", -get_bits_left(gb));
 800         return AVERROR_INVALIDDATA;
 801     }
 802
 803     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 804
 805     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 806         s->HEVClc->qp_y = s->sh.slice_qp;
 807
 808     s->slice_initialized = 1;
 809     s->HEVClc->tu.cu_qp_offset_cb = 0;
 810     s->HEVClc->tu.cu_qp_offset_cr = 0;
 811
 812     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == NAL_CRA_NUT && s->last_eos);
 813
 814     return 0;
 815 }
 816
 817 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 818
 819 #define SET_SAO(elem, value)                            \
 820 do {                                                    \
 821     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 822         sao->elem = value;                              \
 823     else if (sao_merge_left_flag)                       \
 824         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 825     else if (sao_merge_up_flag)                         \
 826         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 827     else                                                \
 828         sao->elem = 0;                                  \
 829 } while (0)
 830
 831 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 832 {
 833     HEVCLocalContext *lc    = s->HEVClc;
 834     int sao_merge_left_flag = 0;
 835     int sao_merge_up_flag   = 0;
 836     SAOParams *sao          = &CTB(s->sao, rx, ry);
 837     int c_idx, i;
 838
 839     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 840         s->sh.slice_sample_adaptive_offset_flag[1]) {
 841         if (rx > 0) {
 842             if (lc->ctb_left_flag)
 843                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 844         }
 845         if (ry > 0 && !sao_merge_left_flag) {
 846             if (lc->ctb_up_flag)
 847                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 848         }
 849     }
 850
 851     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 852         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 853                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 854
 855         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 856             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 857             continue;
 858         }
 859
 860         if (c_idx == 2) {
 861             sao->type_idx[2] = sao->type_idx[1];
 862             sao->eo_class[2] = sao->eo_class[1];
 863         } else {
 864             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 865         }
 866
 867         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 868             continue;
 869
 870         for (i = 0; i < 4; i++)
 871             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 872
 873         if (sao->type_idx[c_idx] == SAO_BAND) {
 874             for (i = 0; i < 4; i++) {
 875                 if (sao->offset_abs[c_idx][i]) {
 876                     SET_SAO(offset_sign[c_idx][i],
 877                             ff_hevc_sao_offset_sign_decode(s));
 878                 } else {
 879                     sao->offset_sign[c_idx][i] = 0;
 880                 }
 881             }
 882             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 883         } else if (c_idx != 2) {
 884             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 885         }
 886
 887         // Inferred parameters
 888         sao->offset_val[c_idx][0] = 0;
 889         for (i = 0; i < 4; i++) {
 890             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 891             if (sao->type_idx[c_idx] == SAO_EDGE) {
 892                 if (i > 1)
 893                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 894             } else if (sao->offset_sign[c_idx][i]) {
 895                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 896             }
 897             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 898         }
 899     }
 900 }
 901
 902 #undef SET_SAO
 903 #undef CTB
 904
 905 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 906     HEVCLocalContext *lc    = s->HEVClc;
 907     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 908
 909     if (log2_res_scale_abs_plus1 !=  0) {
 910         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 911         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 912                                (1 - 2 * res_scale_sign_flag);
 913     } else {
 914         lc->tu.res_scale_val = 0;
 915     }
 916
 917
 918     return 0;
 919 }
 920
 921 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 922                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 923                               int log2_cb_size, int log2_trafo_size,
 924                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 925 {
 926     HEVCLocalContext *lc = s->HEVClc;
 927     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
 928     int i;
 929
 930     if (lc->cu.pred_mode == MODE_INTRA) {
 931         int trafo_size = 1 << log2_trafo_size;
 932         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 933
 934         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 935     }
 936
 937     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 938         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 939         int scan_idx   = SCAN_DIAG;
 940         int scan_idx_c = SCAN_DIAG;
 941         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 942                          (s->ps.sps->chroma_format_idc == 2 &&
 943                          (cbf_cb[1] || cbf_cr[1]));
 944
 945         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 946             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 947             if (lc->tu.cu_qp_delta != 0)
 948                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 949                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 950             lc->tu.is_cu_qp_delta_coded = 1;
 951
 952             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
 953                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
 954                 av_log(s->avctx, AV_LOG_ERROR,
 955                        "The cu_qp_delta %d is outside the valid range "
 956                        "[%d, %d].\n",
 957                        lc->tu.cu_qp_delta,
 958                        -(26 + s->ps.sps->qp_bd_offset / 2),
 959                         (25 + s->ps.sps->qp_bd_offset / 2));
 960                 return AVERROR_INVALIDDATA;
 961             }
 962
 963             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 964         }
 965
 966         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 967             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 968             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 969             if (cu_chroma_qp_offset_flag) {
 970                 int cu_chroma_qp_offset_idx  = 0;
 971                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
 972                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 973                     av_log(s->avctx, AV_LOG_ERROR,
 974                         "cu_chroma_qp_offset_idx not yet tested.\n");
 975                 }
 976                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 977                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 978             } else {
 979                 lc->tu.cu_qp_offset_cb = 0;
 980                 lc->tu.cu_qp_offset_cr = 0;
 981             }
 982             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 983         }
 984
 985         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 986             if (lc->tu.intra_pred_mode >= 6 &&
 987                 lc->tu.intra_pred_mode <= 14) {
 988                 scan_idx = SCAN_VERT;
 989             } else if (lc->tu.intra_pred_mode >= 22 &&
 990                        lc->tu.intra_pred_mode <= 30) {
 991                 scan_idx = SCAN_HORIZ;
 992             }
 993
 994             if (lc->tu.intra_pred_mode_c >=  6 &&
 995                 lc->tu.intra_pred_mode_c <= 14) {
 996                 scan_idx_c = SCAN_VERT;
 997             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 998                        lc->tu.intra_pred_mode_c <= 30) {
 999                 scan_idx_c = SCAN_HORIZ;
1000             }
1001         }
1002
1003         lc->tu.cross_pf = 0;
1004
1005         if (cbf_luma)
1006             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1007         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1008             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1009             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1010             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1011                                 (lc->cu.pred_mode == MODE_INTER ||
1012                                  (lc->tu.chroma_mode_c ==  4)));
1013
1014             if (lc->tu.cross_pf) {
1015                 hls_cross_component_pred(s, 0);
1016             }
1017             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1018                 if (lc->cu.pred_mode == MODE_INTRA) {
1019                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1020                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1021                 }
1022                 if (cbf_cb[i])
1023                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1024                                                 log2_trafo_size_c, scan_idx_c, 1);
1025                 else
1026                     if (lc->tu.cross_pf) {
1027                         ptrdiff_t stride = s->frame->linesize[1];
1028                         int hshift = s->ps.sps->hshift[1];
1029                         int vshift = s->ps.sps->vshift[1];
1030                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1031                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1032                         int size = 1 << log2_trafo_size_c;
1033
1034                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1035                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1036                         for (i = 0; i < (size * size); i++) {
1037                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1038                         }
1039                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1040                     }
1041             }
1042
1043             if (lc->tu.cross_pf) {
1044                 hls_cross_component_pred(s, 1);
1045             }
1046             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1047                 if (lc->cu.pred_mode == MODE_INTRA) {
1048                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1049                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1050                 }
1051                 if (cbf_cr[i])
1052                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1053                                                 log2_trafo_size_c, scan_idx_c, 2);
1054                 else
1055                     if (lc->tu.cross_pf) {
1056                         ptrdiff_t stride = s->frame->linesize[2];
1057                         int hshift = s->ps.sps->hshift[2];
1058                         int vshift = s->ps.sps->vshift[2];
1059                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1060                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1061                         int size = 1 << log2_trafo_size_c;
1062
1063                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1064                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1065                         for (i = 0; i < (size * size); i++) {
1066                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1067                         }
1068                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1069                     }
1070             }
1071         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1072             int trafo_size_h = 1 << (log2_trafo_size + 1);
1073             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1074             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1075                 if (lc->cu.pred_mode == MODE_INTRA) {
1076                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1077                                                     trafo_size_h, trafo_size_v);
1078                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1079                 }
1080                 if (cbf_cb[i])
1081                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1082                                                 log2_trafo_size, scan_idx_c, 1);
1083             }
1084             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1085                 if (lc->cu.pred_mode == MODE_INTRA) {
1086                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1087                                                 trafo_size_h, trafo_size_v);
1088                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1089                 }
1090                 if (cbf_cr[i])
1091                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1092                                                 log2_trafo_size, scan_idx_c, 2);
1093             }
1094         }
1095     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1096         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1097             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1098             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1099             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1100             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1101             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1102             if (s->ps.sps->chroma_format_idc == 2) {
1103                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1104                                                 trafo_size_h, trafo_size_v);
1105                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1106                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1107             }
1108         } else if (blk_idx == 3) {
1109             int trafo_size_h = 1 << (log2_trafo_size + 1);
1110             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1111             ff_hevc_set_neighbour_available(s, xBase, yBase,
1112                                             trafo_size_h, trafo_size_v);
1113             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1114             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1115             if (s->ps.sps->chroma_format_idc == 2) {
1116                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1117                                                 trafo_size_h, trafo_size_v);
1118                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1119                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1120             }
1121         }
1122     }
1123
1124     return 0;
1125 }
1126
1127 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1128 {
1129     int cb_size          = 1 << log2_cb_size;
1130     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1131
1132     int min_pu_width     = s->ps.sps->min_pu_width;
1133     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1134     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1135     int i, j;
1136
1137     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1138         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1139             s->is_pcm[i + j * min_pu_width] = 2;
1140 }
1141
1142 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1143                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1144                               int log2_cb_size, int log2_trafo_size,
1145                               int trafo_depth, int blk_idx,
1146                               const int *base_cbf_cb, const int *base_cbf_cr)
1147 {
1148     HEVCLocalContext *lc = s->HEVClc;
1149     uint8_t split_transform_flag;
1150     int cbf_cb[2];
1151     int cbf_cr[2];
1152     int ret;
1153
1154     cbf_cb[0] = base_cbf_cb[0];
1155     cbf_cb[1] = base_cbf_cb[1];
1156     cbf_cr[0] = base_cbf_cr[0];
1157     cbf_cr[1] = base_cbf_cr[1];
1158
1159     if (lc->cu.intra_split_flag) {
1160         if (trafo_depth == 1) {
1161             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1162             if (s->ps.sps->chroma_format_idc == 3) {
1163                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1164                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1165             } else {
1166                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1167                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1168             }
1169         }
1170     } else {
1171         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1172         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1173         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1174     }
1175
1176     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1177         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1178         trafo_depth     < lc->cu.max_trafo_depth       &&
1179         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1180         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1181     } else {
1182         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1183                           lc->cu.pred_mode == MODE_INTER &&
1184                           lc->cu.part_mode != PART_2Nx2N &&
1185                           trafo_depth == 0;
1186
1187         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1188                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1189                                inter_split;
1190     }
1191
1192     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1193         if (trafo_depth == 0 || cbf_cb[0]) {
1194             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1195             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1196                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1197             }
1198         }
1199
1200         if (trafo_depth == 0 || cbf_cr[0]) {
1201             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1202             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1203                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1204             }
1205         }
1206     }
1207
1208     if (split_transform_flag) {
1209         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1210         const int x1 = x0 + trafo_size_split;
1211         const int y1 = y0 + trafo_size_split;
1212
1213 #define SUBDIVIDE(x, y, idx)                                                    \
1214 do {                                                                            \
1215     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1216                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1217                              cbf_cb, cbf_cr);                                   \
1218     if (ret < 0)                                                                \
1219         return ret;                                                             \
1220 } while (0)
1221
1222         SUBDIVIDE(x0, y0, 0);
1223         SUBDIVIDE(x1, y0, 1);
1224         SUBDIVIDE(x0, y1, 2);
1225         SUBDIVIDE(x1, y1, 3);
1226
1227 #undef SUBDIVIDE
1228     } else {
1229         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1230         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1231         int min_tu_width     = s->ps.sps->min_tb_width;
1232         int cbf_luma         = 1;
1233
1234         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1235             cbf_cb[0] || cbf_cr[0] ||
1236             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1237             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1238         }
1239
1240         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1241                                  log2_cb_size, log2_trafo_size,
1242                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1243         if (ret < 0)
1244             return ret;
1245         // TODO: store cbf_luma somewhere else
1246         if (cbf_luma) {
1247             int i, j;
1248             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1249                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1250                     int x_tu = (x0 + j) >> log2_min_tu_size;
1251                     int y_tu = (y0 + i) >> log2_min_tu_size;
1252                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1253                 }
1254         }
1255         if (!s->sh.disable_deblocking_filter_flag) {
1256             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1257             if (s->ps.pps->transquant_bypass_enable_flag &&
1258                 lc->cu.cu_transquant_bypass_flag)
1259                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1260         }
1261     }
1262     return 0;
1263 }
1264
1265 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1266 {
1267     HEVCLocalContext *lc = s->HEVClc;
1268     GetBitContext gb;
1269     int cb_size   = 1 << log2_cb_size;
1270     int stride0   = s->frame->linesize[0];
1271     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1272     int   stride1 = s->frame->linesize[1];
1273     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1274     int   stride2 = s->frame->linesize[2];
1275     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1276
1277     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1278                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1279                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1280                           s->ps.sps->pcm.bit_depth_chroma;
1281     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1282     int ret;
1283
1284     if (!s->sh.disable_deblocking_filter_flag)
1285         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1286
1287     ret = init_get_bits(&gb, pcm, length);
1288     if (ret < 0)
1289         return ret;
1290
1291     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1292     if (s->ps.sps->chroma_format_idc) {
1293         s->hevcdsp.put_pcm(dst1, stride1,
1294                            cb_size >> s->ps.sps->hshift[1],
1295                            cb_size >> s->ps.sps->vshift[1],
1296                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1297         s->hevcdsp.put_pcm(dst2, stride2,
1298                            cb_size >> s->ps.sps->hshift[2],
1299                            cb_size >> s->ps.sps->vshift[2],
1300                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1301     }
1302
1303     return 0;
1304 }
1305
1306 /**
1307  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1308  *
1309  * @param s HEVC decoding context
1310  * @param dst target buffer for block data at block position
1311  * @param dststride stride of the dst buffer
1312  * @param ref reference picture buffer at origin (0, 0)
1313  * @param mv motion vector (relative to block position) to get pixel data from
1314  * @param x_off horizontal position of block from origin (0, 0)
1315  * @param y_off vertical position of block from origin (0, 0)
1316  * @param block_w width of block
1317  * @param block_h height of block
1318  * @param luma_weight weighting factor applied to the luma prediction
1319  * @param luma_offset additive offset applied to the luma prediction value
1320  */
1321
1322 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1323                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1324                         int block_w, int block_h, int luma_weight, int luma_offset)
1325 {
1326     HEVCLocalContext *lc = s->HEVClc;
1327     uint8_t *src         = ref->data[0];
1328     ptrdiff_t srcstride  = ref->linesize[0];
1329     int pic_width        = s->ps.sps->width;
1330     int pic_height       = s->ps.sps->height;
1331     int mx               = mv->x & 3;
1332     int my               = mv->y & 3;
1333     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1334                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1335     int idx              = ff_hevc_pel_weight[block_w];
1336
1337     x_off += mv->x >> 2;
1338     y_off += mv->y >> 2;
1339     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1340
1341     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1342         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1343         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1344         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1345         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1346         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1347
1348         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1349                                  edge_emu_stride, srcstride,
1350                                  block_w + QPEL_EXTRA,
1351                                  block_h + QPEL_EXTRA,
1352                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1353                                  pic_width, pic_height);
1354         src = lc->edge_emu_buffer + buf_offset;
1355         srcstride = edge_emu_stride;
1356     }
1357
1358     if (!weight_flag)
1359         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1360                                                       block_h, mx, my, block_w);
1361     else
1362         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1363                                                         block_h, s->sh.luma_log2_weight_denom,
1364                                                         luma_weight, luma_offset, mx, my, block_w);
1365 }
1366
1367 /**
1368  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1369  *
1370  * @param s HEVC decoding context
1371  * @param dst target buffer for block data at block position
1372  * @param dststride stride of the dst buffer
1373  * @param ref0 reference picture0 buffer at origin (0, 0)
1374  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1375  * @param x_off horizontal position of block from origin (0, 0)
1376  * @param y_off vertical position of block from origin (0, 0)
1377  * @param block_w width of block
1378  * @param block_h height of block
1379  * @param ref1 reference picture1 buffer at origin (0, 0)
1380  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1381  * @param current_mv current motion vector structure
1382  */
1383  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1384                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1385                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1386 {
1387     HEVCLocalContext *lc = s->HEVClc;
1388     ptrdiff_t src0stride  = ref0->linesize[0];
1389     ptrdiff_t src1stride  = ref1->linesize[0];
1390     int pic_width        = s->ps.sps->width;
1391     int pic_height       = s->ps.sps->height;
1392     int mx0              = mv0->x & 3;
1393     int my0              = mv0->y & 3;
1394     int mx1              = mv1->x & 3;
1395     int my1              = mv1->y & 3;
1396     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1397                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1398     int x_off0           = x_off + (mv0->x >> 2);
1399     int y_off0           = y_off + (mv0->y >> 2);
1400     int x_off1           = x_off + (mv1->x >> 2);
1401     int y_off1           = y_off + (mv1->y >> 2);
1402     int idx              = ff_hevc_pel_weight[block_w];
1403
1404     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1405     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1406
1407     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1408         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1409         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1410         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1411         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1412         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1413
1414         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1415                                  edge_emu_stride, src0stride,
1416                                  block_w + QPEL_EXTRA,
1417                                  block_h + QPEL_EXTRA,
1418                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1419                                  pic_width, pic_height);
1420         src0 = lc->edge_emu_buffer + buf_offset;
1421         src0stride = edge_emu_stride;
1422     }
1423
1424     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1425         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1426         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1427         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1428         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1429         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1430
1431         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1432                                  edge_emu_stride, src1stride,
1433                                  block_w + QPEL_EXTRA,
1434                                  block_h + QPEL_EXTRA,
1435                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1436                                  pic_width, pic_height);
1437         src1 = lc->edge_emu_buffer2 + buf_offset;
1438         src1stride = edge_emu_stride;
1439     }
1440
1441     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1442                                                 block_h, mx0, my0, block_w);
1443     if (!weight_flag)
1444         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1445                                                        block_h, mx1, my1, block_w);
1446     else
1447         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1448                                                          block_h, s->sh.luma_log2_weight_denom,
1449                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1450                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1451                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1452                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1453                                                          mx1, my1, block_w);
1454
1455 }
1456
1457 /**
1458  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1459  *
1460  * @param s HEVC decoding context
1461  * @param dst1 target buffer for block data at block position (U plane)
1462  * @param dst2 target buffer for block data at block position (V plane)
1463  * @param dststride stride of the dst1 and dst2 buffers
1464  * @param ref reference picture buffer at origin (0, 0)
1465  * @param mv motion vector (relative to block position) to get pixel data from
1466  * @param x_off horizontal position of block from origin (0, 0)
1467  * @param y_off vertical position of block from origin (0, 0)
1468  * @param block_w width of block
1469  * @param block_h height of block
1470  * @param chroma_weight weighting factor applied to the chroma prediction
1471  * @param chroma_offset additive offset applied to the chroma prediction value
1472  */
1473
1474 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1475                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1476                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1477 {
1478     HEVCLocalContext *lc = s->HEVClc;
1479     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1480     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1481     const Mv *mv         = &current_mv->mv[reflist];
1482     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1483                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1484     int idx              = ff_hevc_pel_weight[block_w];
1485     int hshift           = s->ps.sps->hshift[1];
1486     int vshift           = s->ps.sps->vshift[1];
1487     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1488     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1489     intptr_t _mx         = mx << (1 - hshift);
1490     intptr_t _my         = my << (1 - vshift);
1491
1492     x_off += mv->x >> (2 + hshift);
1493     y_off += mv->y >> (2 + vshift);
1494     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1495
1496     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1497         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1498         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1499         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1500         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1501         int buf_offset0 = EPEL_EXTRA_BEFORE *
1502                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1503         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1504                                  edge_emu_stride, srcstride,
1505                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1506                                  x_off - EPEL_EXTRA_BEFORE,
1507                                  y_off - EPEL_EXTRA_BEFORE,
1508                                  pic_width, pic_height);
1509
1510         src0 = lc->edge_emu_buffer + buf_offset0;
1511         srcstride = edge_emu_stride;
1512     }
1513     if (!weight_flag)
1514         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1515                                                   block_h, _mx, _my, block_w);
1516     else
1517         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1518                                                         block_h, s->sh.chroma_log2_weight_denom,
1519                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1520 }
1521
1522 /**
1523  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1524  *
1525  * @param s HEVC decoding context
1526  * @param dst target buffer for block data at block position
1527  * @param dststride stride of the dst buffer
1528  * @param ref0 reference picture0 buffer at origin (0, 0)
1529  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1530  * @param x_off horizontal position of block from origin (0, 0)
1531  * @param y_off vertical position of block from origin (0, 0)
1532  * @param block_w width of block
1533  * @param block_h height of block
1534  * @param ref1 reference picture1 buffer at origin (0, 0)
1535  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1536  * @param current_mv current motion vector structure
1537  * @param cidx chroma component(cb, cr)
1538  */
1539 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1540                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1541 {
1542     HEVCLocalContext *lc = s->HEVClc;
1543     uint8_t *src1        = ref0->data[cidx+1];
1544     uint8_t *src2        = ref1->data[cidx+1];
1545     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1546     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1547     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1548                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1549     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1550     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1551     Mv *mv0              = &current_mv->mv[0];
1552     Mv *mv1              = &current_mv->mv[1];
1553     int hshift = s->ps.sps->hshift[1];
1554     int vshift = s->ps.sps->vshift[1];
1555
1556     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1557     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1558     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1559     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1560     intptr_t _mx0 = mx0 << (1 - hshift);
1561     intptr_t _my0 = my0 << (1 - vshift);
1562     intptr_t _mx1 = mx1 << (1 - hshift);
1563     intptr_t _my1 = my1 << (1 - vshift);
1564
1565     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1566     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1567     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1568     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1569     int idx = ff_hevc_pel_weight[block_w];
1570     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1571     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1572
1573     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1574         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1575         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1576         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1577         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1578         int buf_offset1 = EPEL_EXTRA_BEFORE *
1579                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1580
1581         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1582                                  edge_emu_stride, src1stride,
1583                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1584                                  x_off0 - EPEL_EXTRA_BEFORE,
1585                                  y_off0 - EPEL_EXTRA_BEFORE,
1586                                  pic_width, pic_height);
1587
1588         src1 = lc->edge_emu_buffer + buf_offset1;
1589         src1stride = edge_emu_stride;
1590     }
1591
1592     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1593         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1594         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1595         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1596         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1597         int buf_offset1 = EPEL_EXTRA_BEFORE *
1598                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1599
1600         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1601                                  edge_emu_stride, src2stride,
1602                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1603                                  x_off1 - EPEL_EXTRA_BEFORE,
1604                                  y_off1 - EPEL_EXTRA_BEFORE,
1605                                  pic_width, pic_height);
1606
1607         src2 = lc->edge_emu_buffer2 + buf_offset1;
1608         src2stride = edge_emu_stride;
1609     }
1610
1611     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1612                                                 block_h, _mx0, _my0, block_w);
1613     if (!weight_flag)
1614         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1615                                                        src2, src2stride, lc->tmp,
1616                                                        block_h, _mx1, _my1, block_w);
1617     else
1618         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1619                                                          src2, src2stride, lc->tmp,
1620                                                          block_h,
1621                                                          s->sh.chroma_log2_weight_denom,
1622                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1623                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1624                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1625                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1626                                                          _mx1, _my1, block_w);
1627 }
1628
1629 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1630                                 const Mv *mv, int y0, int height)
1631 {
1632     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1633
1634     if (s->threads_type == FF_THREAD_FRAME )
1635         ff_thread_await_progress(&ref->tf, y, 0);
1636 }
1637
1638 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1639                                   int nPbH, int log2_cb_size, int part_idx,
1640                                   int merge_idx, MvField *mv)
1641 {
1642     HEVCLocalContext *lc = s->HEVClc;
1643     enum InterPredIdc inter_pred_idc = PRED_L0;
1644     int mvp_flag;
1645
1646     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1647     mv->pred_flag = 0;
1648     if (s->sh.slice_type == B_SLICE)
1649         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1650
1651     if (inter_pred_idc != PRED_L1) {
1652         if (s->sh.nb_refs[L0])
1653             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1654
1655         mv->pred_flag = PF_L0;
1656         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1657         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1658         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1659                                  part_idx, merge_idx, mv, mvp_flag, 0);
1660         mv->mv[0].x += lc->pu.mvd.x;
1661         mv->mv[0].y += lc->pu.mvd.y;
1662     }
1663
1664     if (inter_pred_idc != PRED_L0) {
1665         if (s->sh.nb_refs[L1])
1666             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1667
1668         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1669             AV_ZERO32(&lc->pu.mvd);
1670         } else {
1671             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1672         }
1673
1674         mv->pred_flag += PF_L1;
1675         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1676         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1677                                  part_idx, merge_idx, mv, mvp_flag, 1);
1678         mv->mv[1].x += lc->pu.mvd.x;
1679         mv->mv[1].y += lc->pu.mvd.y;
1680     }
1681 }
1682
1683 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1684                                 int nPbW, int nPbH,
1685                                 int log2_cb_size, int partIdx, int idx)
1686 {
1687 #define POS(c_idx, x, y)                                                              \
1688     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1689                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1690     HEVCLocalContext *lc = s->HEVClc;
1691     int merge_idx = 0;
1692     struct MvField current_mv = {{{ 0 }}};
1693
1694     int min_pu_width = s->ps.sps->min_pu_width;
1695
1696     MvField *tab_mvf = s->ref->tab_mvf;
1697     RefPicList  *refPicList = s->ref->refPicList;
1698     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1699     uint8_t *dst0 = POS(0, x0, y0);
1700     uint8_t *dst1 = POS(1, x0, y0);
1701     uint8_t *dst2 = POS(2, x0, y0);
1702     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1703     int min_cb_width     = s->ps.sps->min_cb_width;
1704     int x_cb             = x0 >> log2_min_cb_size;
1705     int y_cb             = y0 >> log2_min_cb_size;
1706     int x_pu, y_pu;
1707     int i, j;
1708
1709     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1710
1711     if (!skip_flag)
1712         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1713
1714     if (skip_flag || lc->pu.merge_flag) {
1715         if (s->sh.max_num_merge_cand > 1)
1716             merge_idx = ff_hevc_merge_idx_decode(s);
1717         else
1718             merge_idx = 0;
1719
1720         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1721                                    partIdx, merge_idx, &current_mv);
1722     } else {
1723         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1724                               partIdx, merge_idx, &current_mv);
1725     }
1726
1727     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1728     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1729
1730     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1731         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1732             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1733
1734     if (current_mv.pred_flag & PF_L0) {
1735         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1736         if (!ref0)
1737             return;
1738         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1739     }
1740     if (current_mv.pred_flag & PF_L1) {
1741         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1742         if (!ref1)
1743             return;
1744         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1745     }
1746
1747     if (current_mv.pred_flag == PF_L0) {
1748         int x0_c = x0 >> s->ps.sps->hshift[1];
1749         int y0_c = y0 >> s->ps.sps->vshift[1];
1750         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1751         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1752
1753         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1754                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1755                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1756                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1757
1758         if (s->ps.sps->chroma_format_idc) {
1759             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1760                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1761                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1762             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1763                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1764                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1765         }
1766     } else if (current_mv.pred_flag == PF_L1) {
1767         int x0_c = x0 >> s->ps.sps->hshift[1];
1768         int y0_c = y0 >> s->ps.sps->vshift[1];
1769         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1770         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1771
1772         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1773                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1774                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1775                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1776
1777         if (s->ps.sps->chroma_format_idc) {
1778             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1779                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1780                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1781
1782             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1783                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1784                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1785         }
1786     } else if (current_mv.pred_flag == PF_BI) {
1787         int x0_c = x0 >> s->ps.sps->hshift[1];
1788         int y0_c = y0 >> s->ps.sps->vshift[1];
1789         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1790         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1791
1792         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1793                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1794                    ref1->frame, &current_mv.mv[1], &current_mv);
1795
1796         if (s->ps.sps->chroma_format_idc) {
1797             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1798                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1799
1800             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1801                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1802         }
1803     }
1804 }
1805
1806 /**
1807  * 8.4.1
1808  */
1809 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1810                                 int prev_intra_luma_pred_flag)
1811 {
1812     HEVCLocalContext *lc = s->HEVClc;
1813     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1814     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1815     int min_pu_width     = s->ps.sps->min_pu_width;
1816     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1817     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1818     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1819
1820     int cand_up   = (lc->ctb_up_flag || y0b) ?
1821                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1822     int cand_left = (lc->ctb_left_flag || x0b) ?
1823                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1824
1825     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1826
1827     MvField *tab_mvf = s->ref->tab_mvf;
1828     int intra_pred_mode;
1829     int candidate[3];
1830     int i, j;
1831
1832     // intra_pred_mode prediction does not cross vertical CTB boundaries
1833     if ((y0 - 1) < y_ctb)
1834         cand_up = INTRA_DC;
1835
1836     if (cand_left == cand_up) {
1837         if (cand_left < 2) {
1838             candidate[0] = INTRA_PLANAR;
1839             candidate[1] = INTRA_DC;
1840             candidate[2] = INTRA_ANGULAR_26;
1841         } else {
1842             candidate[0] = cand_left;
1843             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1844             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1845         }
1846     } else {
1847         candidate[0] = cand_left;
1848         candidate[1] = cand_up;
1849         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1850             candidate[2] = INTRA_PLANAR;
1851         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1852             candidate[2] = INTRA_DC;
1853         } else {
1854             candidate[2] = INTRA_ANGULAR_26;
1855         }
1856     }
1857
1858     if (prev_intra_luma_pred_flag) {
1859         intra_pred_mode = candidate[lc->pu.mpm_idx];
1860     } else {
1861         if (candidate[0] > candidate[1])
1862             FFSWAP(uint8_t, candidate[0], candidate[1]);
1863         if (candidate[0] > candidate[2])
1864             FFSWAP(uint8_t, candidate[0], candidate[2]);
1865         if (candidate[1] > candidate[2])
1866             FFSWAP(uint8_t, candidate[1], candidate[2]);
1867
1868         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1869         for (i = 0; i < 3; i++)
1870             if (intra_pred_mode >= candidate[i])
1871                 intra_pred_mode++;
1872     }
1873
1874     /* write the intra prediction units into the mv array */
1875     if (!size_in_pus)
1876         size_in_pus = 1;
1877     for (i = 0; i < size_in_pus; i++) {
1878         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1879                intra_pred_mode, size_in_pus);
1880
1881         for (j = 0; j < size_in_pus; j++) {
1882             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1883         }
1884     }
1885
1886     return intra_pred_mode;
1887 }
1888
1889 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1890                                           int log2_cb_size, int ct_depth)
1891 {
1892     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1893     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1894     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1895     int y;
1896
1897     for (y = 0; y < length; y++)
1898         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1899                ct_depth, length);
1900 }
1901
1902 static const uint8_t tab_mode_idx[] = {
1903      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1904     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1905
1906 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1907                                   int log2_cb_size)
1908 {
1909     HEVCLocalContext *lc = s->HEVClc;
1910     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1911     uint8_t prev_intra_luma_pred_flag[4];
1912     int split   = lc->cu.part_mode == PART_NxN;
1913     int pb_size = (1 << log2_cb_size) >> split;
1914     int side    = split + 1;
1915     int chroma_mode;
1916     int i, j;
1917
1918     for (i = 0; i < side; i++)
1919         for (j = 0; j < side; j++)
1920             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1921
1922     for (i = 0; i < side; i++) {
1923         for (j = 0; j < side; j++) {
1924             if (prev_intra_luma_pred_flag[2 * i + j])
1925                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1926             else
1927                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1928
1929             lc->pu.intra_pred_mode[2 * i + j] =
1930                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1931                                      prev_intra_luma_pred_flag[2 * i + j]);
1932         }
1933     }
1934
1935     if (s->ps.sps->chroma_format_idc == 3) {
1936         for (i = 0; i < side; i++) {
1937             for (j = 0; j < side; j++) {
1938                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1939                 if (chroma_mode != 4) {
1940                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1941                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1942                     else
1943                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1944                 } else {
1945                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1946                 }
1947             }
1948         }
1949     } else if (s->ps.sps->chroma_format_idc == 2) {
1950         int mode_idx;
1951         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1952         if (chroma_mode != 4) {
1953             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1954                 mode_idx = 34;
1955             else
1956                 mode_idx = intra_chroma_table[chroma_mode];
1957         } else {
1958             mode_idx = lc->pu.intra_pred_mode[0];
1959         }
1960         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1961     } else if (s->ps.sps->chroma_format_idc != 0) {
1962         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1963         if (chroma_mode != 4) {
1964             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1965                 lc->pu.intra_pred_mode_c[0] = 34;
1966             else
1967                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1968         } else {
1969             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1970         }
1971     }
1972 }
1973
1974 static void intra_prediction_unit_default_value(HEVCContext *s,
1975                                                 int x0, int y0,
1976                                                 int log2_cb_size)
1977 {
1978     HEVCLocalContext *lc = s->HEVClc;
1979     int pb_size          = 1 << log2_cb_size;
1980     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1981     int min_pu_width     = s->ps.sps->min_pu_width;
1982     MvField *tab_mvf     = s->ref->tab_mvf;
1983     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1984     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1985     int j, k;
1986
1987     if (size_in_pus == 0)
1988         size_in_pus = 1;
1989     for (j = 0; j < size_in_pus; j++)
1990         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1991     if (lc->cu.pred_mode == MODE_INTRA)
1992         for (j = 0; j < size_in_pus; j++)
1993             for (k = 0; k < size_in_pus; k++)
1994                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1995 }
1996
1997 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1998 {
1999     int cb_size          = 1 << log2_cb_size;
2000     HEVCLocalContext *lc = s->HEVClc;
2001     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2002     int length           = cb_size >> log2_min_cb_size;
2003     int min_cb_width     = s->ps.sps->min_cb_width;
2004     int x_cb             = x0 >> log2_min_cb_size;
2005     int y_cb             = y0 >> log2_min_cb_size;
2006     int idx              = log2_cb_size - 2;
2007     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2008     int x, y, ret;
2009
2010     lc->cu.x                = x0;
2011     lc->cu.y                = y0;
2012     lc->cu.pred_mode        = MODE_INTRA;
2013     lc->cu.part_mode        = PART_2Nx2N;
2014     lc->cu.intra_split_flag = 0;
2015
2016     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2017     for (x = 0; x < 4; x++)
2018         lc->pu.intra_pred_mode[x] = 1;
2019     if (s->ps.pps->transquant_bypass_enable_flag) {
2020         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2021         if (lc->cu.cu_transquant_bypass_flag)
2022             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2023     } else
2024         lc->cu.cu_transquant_bypass_flag = 0;
2025
2026     if (s->sh.slice_type != I_SLICE) {
2027         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2028
2029         x = y_cb * min_cb_width + x_cb;
2030         for (y = 0; y < length; y++) {
2031             memset(&s->skip_flag[x], skip_flag, length);
2032             x += min_cb_width;
2033         }
2034         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2035     } else {
2036         x = y_cb * min_cb_width + x_cb;
2037         for (y = 0; y < length; y++) {
2038             memset(&s->skip_flag[x], 0, length);
2039             x += min_cb_width;
2040         }
2041     }
2042
2043     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2044         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2045         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2046
2047         if (!s->sh.disable_deblocking_filter_flag)
2048             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2049     } else {
2050         int pcm_flag = 0;
2051
2052         if (s->sh.slice_type != I_SLICE)
2053             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2054         if (lc->cu.pred_mode != MODE_INTRA ||
2055             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2056             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2057             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2058                                       lc->cu.pred_mode == MODE_INTRA;
2059         }
2060
2061         if (lc->cu.pred_mode == MODE_INTRA) {
2062             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2063                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2064                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2065                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2066             }
2067             if (pcm_flag) {
2068                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2070                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2071                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2072
2073                 if (ret < 0)
2074                     return ret;
2075             } else {
2076                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2077             }
2078         } else {
2079             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2080             switch (lc->cu.part_mode) {
2081             case PART_2Nx2N:
2082                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2083                 break;
2084             case PART_2NxN:
2085                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2086                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2087                 break;
2088             case PART_Nx2N:
2089                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2090                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2091                 break;
2092             case PART_2NxnU:
2093                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2094                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2095                 break;
2096             case PART_2NxnD:
2097                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2098                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2099                 break;
2100             case PART_nLx2N:
2101                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2102                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2103                 break;
2104             case PART_nRx2N:
2105                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2106                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2107                 break;
2108             case PART_NxN:
2109                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2110                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2111                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2112                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2113                 break;
2114             }
2115         }
2116
2117         if (!pcm_flag) {
2118             int rqt_root_cbf = 1;
2119
2120             if (lc->cu.pred_mode != MODE_INTRA &&
2121                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2122                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2123             }
2124             if (rqt_root_cbf) {
2125                 const static int cbf[2] = { 0 };
2126                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2127                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2128                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2129                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2130                                          log2_cb_size,
2131                                          log2_cb_size, 0, 0, cbf, cbf);
2132                 if (ret < 0)
2133                     return ret;
2134             } else {
2135                 if (!s->sh.disable_deblocking_filter_flag)
2136                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2137             }
2138         }
2139     }
2140
2141     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2142         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2143
2144     x = y_cb * min_cb_width + x_cb;
2145     for (y = 0; y < length; y++) {
2146         memset(&s->qp_y_tab[x], lc->qp_y, length);
2147         x += min_cb_width;
2148     }
2149
2150     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2151        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2152         lc->qPy_pred = lc->qp_y;
2153     }
2154
2155     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2156
2157     return 0;
2158 }
2159
2160 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2161                                int log2_cb_size, int cb_depth)
2162 {
2163     HEVCLocalContext *lc = s->HEVClc;
2164     const int cb_size    = 1 << log2_cb_size;
2165     int ret;
2166     int split_cu;
2167
2168     lc->ct_depth = cb_depth;
2169     if (x0 + cb_size <= s->ps.sps->width  &&
2170         y0 + cb_size <= s->ps.sps->height &&
2171         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2172         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2173     } else {
2174         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2175     }
2176     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2177         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2178         lc->tu.is_cu_qp_delta_coded = 0;
2179         lc->tu.cu_qp_delta          = 0;
2180     }
2181
2182     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2183         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2184         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2185     }
2186
2187     if (split_cu) {
2188         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2189         const int cb_size_split = cb_size >> 1;
2190         const int x1 = x0 + cb_size_split;
2191         const int y1 = y0 + cb_size_split;
2192
2193         int more_data = 0;
2194
2195         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2196         if (more_data < 0)
2197             return more_data;
2198
2199         if (more_data && x1 < s->ps.sps->width) {
2200             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2201             if (more_data < 0)
2202                 return more_data;
2203         }
2204         if (more_data && y1 < s->ps.sps->height) {
2205             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2206             if (more_data < 0)
2207                 return more_data;
2208         }
2209         if (more_data && x1 < s->ps.sps->width &&
2210             y1 < s->ps.sps->height) {
2211             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2212             if (more_data < 0)
2213                 return more_data;
2214         }
2215
2216         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2217             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2218             lc->qPy_pred = lc->qp_y;
2219
2220         if (more_data)
2221             return ((x1 + cb_size_split) < s->ps.sps->width ||
2222                     (y1 + cb_size_split) < s->ps.sps->height);
2223         else
2224             return 0;
2225     } else {
2226         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2227         if (ret < 0)
2228             return ret;
2229         if ((!((x0 + cb_size) %
2230                (1 << (s->ps.sps->log2_ctb_size))) ||
2231              (x0 + cb_size >= s->ps.sps->width)) &&
2232             (!((y0 + cb_size) %
2233                (1 << (s->ps.sps->log2_ctb_size))) ||
2234              (y0 + cb_size >= s->ps.sps->height))) {
2235             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2236             return !end_of_slice_flag;
2237         } else {
2238             return 1;
2239         }
2240     }
2241
2242     return 0;
2243 }
2244
2245 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2246                                  int ctb_addr_ts)
2247 {
2248     HEVCLocalContext *lc  = s->HEVClc;
2249     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2250     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2251     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2252
2253     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2254
2255     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2256         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2257             lc->first_qp_group = 1;
2258         lc->end_of_tiles_x = s->ps.sps->width;
2259     } else if (s->ps.pps->tiles_enabled_flag) {
2260         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2261             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2262             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2263             lc->first_qp_group   = 1;
2264         }
2265     } else {
2266         lc->end_of_tiles_x = s->ps.sps->width;
2267     }
2268
2269     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2270
2271     lc->boundary_flags = 0;
2272     if (s->ps.pps->tiles_enabled_flag) {
2273         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2274             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2275         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2276             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2277         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2278             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2279         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2280             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2281     } else {
2282         if (ctb_addr_in_slice <= 0)
2283             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2284         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2285             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2286     }
2287
2288     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2289     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2290     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2291     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2292 }
2293
2294 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2295 {
2296     HEVCContext *s  = avctxt->priv_data;
2297     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2298     int more_data   = 1;
2299     int x_ctb       = 0;
2300     int y_ctb       = 0;
2301     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2302
2303     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2304         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2305         return AVERROR_INVALIDDATA;
2306     }
2307
2308     if (s->sh.dependent_slice_segment_flag) {
2309         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2310         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2311             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2312             return AVERROR_INVALIDDATA;
2313         }
2314     }
2315
2316     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2317         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2318
2319         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2320         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2321         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2322
2323         ff_hevc_cabac_init(s, ctb_addr_ts);
2324
2325         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2326
2327         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2328         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2329         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2330
2331         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2332         if (more_data < 0) {
2333             s->tab_slice_address[ctb_addr_rs] = -1;
2334             return more_data;
2335         }
2336
2337
2338         ctb_addr_ts++;
2339         ff_hevc_save_states(s, ctb_addr_ts);
2340         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2341     }
2342
2343     if (x_ctb + ctb_size >= s->ps.sps->width &&
2344         y_ctb + ctb_size >= s->ps.sps->height)
2345         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2346
2347     return ctb_addr_ts;
2348 }
2349
2350 static int hls_slice_data(HEVCContext *s)
2351 {
2352     int arg[2];
2353     int ret[2];
2354
2355     arg[0] = 0;
2356     arg[1] = 1;
2357
2358     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2359     return ret[0];
2360 }
2361 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2362 {
2363     HEVCContext *s1  = avctxt->priv_data, *s;
2364     HEVCLocalContext *lc;
2365     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2366     int more_data   = 1;
2367     int *ctb_row_p    = input_ctb_row;
2368     int ctb_row = ctb_row_p[job];
2369     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2370     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2371     int thread = ctb_row % s1->threads_number;
2372     int ret;
2373
2374     s = s1->sList[self_id];
2375     lc = s->HEVClc;
2376
2377     if(ctb_row) {
2378         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2379
2380         if (ret < 0)
2381             return ret;
2382         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2383     }
2384
2385     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2386         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2387         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2388
2389         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2390
2391         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2392
2393         if (avpriv_atomic_int_get(&s1->wpp_err)){
2394             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2395             return 0;
2396         }
2397
2398         ff_hevc_cabac_init(s, ctb_addr_ts);
2399         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2400         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2401
2402         if (more_data < 0) {
2403             s->tab_slice_address[ctb_addr_rs] = -1;
2404             avpriv_atomic_int_set(&s1->wpp_err,  1);
2405             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2406             return more_data;
2407         }
2408
2409         ctb_addr_ts++;
2410
2411         ff_hevc_save_states(s, ctb_addr_ts);
2412         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2413         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2414
2415         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2416             avpriv_atomic_int_set(&s1->wpp_err,  1);
2417             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2418             return 0;
2419         }
2420
2421         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2422             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2423             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2424             return ctb_addr_ts;
2425         }
2426         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2427         x_ctb+=ctb_size;
2428
2429         if(x_ctb >= s->ps.sps->width) {
2430             break;
2431         }
2432     }
2433     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2434
2435     return 0;
2436 }
2437
2438 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2439 {
2440     const uint8_t *data = nal->data;
2441     int length          = nal->size;
2442     HEVCLocalContext *lc = s->HEVClc;
2443     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2444     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2445     int offset;
2446     int startheader, cmpt = 0;
2447     int i, j, res = 0;
2448
2449     if (!ret || !arg) {
2450         av_free(ret);
2451         av_free(arg);
2452         return AVERROR(ENOMEM);
2453     }
2454
2455
2456     if (!s->sList[1]) {
2457         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2458
2459
2460         for (i = 1; i < s->threads_number; i++) {
2461             s->sList[i] = av_malloc(sizeof(HEVCContext));
2462             memcpy(s->sList[i], s, sizeof(HEVCContext));
2463             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2464             s->sList[i]->HEVClc = s->HEVClcList[i];
2465         }
2466     }
2467
2468     offset = (lc->gb.index >> 3);
2469
2470     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2471         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2472             startheader--;
2473             cmpt++;
2474         }
2475     }
2476
2477     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2478         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2479         for (j = 0, cmpt = 0, startheader = offset
2480              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2481             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2482                 startheader--;
2483                 cmpt++;
2484             }
2485         }
2486         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2487         s->sh.offset[i - 1] = offset;
2488
2489     }
2490     if (s->sh.num_entry_point_offsets != 0) {
2491         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2492         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2493         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2494
2495     }
2496     s->data = data;
2497
2498     for (i = 1; i < s->threads_number; i++) {
2499         s->sList[i]->HEVClc->first_qp_group = 1;
2500         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2501         memcpy(s->sList[i], s, sizeof(HEVCContext));
2502         s->sList[i]->HEVClc = s->HEVClcList[i];
2503     }
2504
2505     avpriv_atomic_int_set(&s->wpp_err, 0);
2506     ff_reset_entries(s->avctx);
2507
2508     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2509         arg[i] = i;
2510         ret[i] = 0;
2511     }
2512
2513     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2514         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2515
2516     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2517         res += ret[i];
2518     av_free(ret);
2519     av_free(arg);
2520     return res;
2521 }
2522
2523 static int set_side_data(HEVCContext *s)
2524 {
2525     AVFrame *out = s->ref->frame;
2526
2527     if (s->sei_frame_packing_present &&
2528         s->frame_packing_arrangement_type >= 3 &&
2529         s->frame_packing_arrangement_type <= 5 &&
2530         s->content_interpretation_type > 0 &&
2531         s->content_interpretation_type < 3) {
2532         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2533         if (!stereo)
2534             return AVERROR(ENOMEM);
2535
2536         switch (s->frame_packing_arrangement_type) {
2537         case 3:
2538             if (s->quincunx_subsampling)
2539                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2540             else
2541                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2542             break;
2543         case 4:
2544             stereo->type = AV_STEREO3D_TOPBOTTOM;
2545             break;
2546         case 5:
2547             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2548             break;
2549         }
2550
2551         if (s->content_interpretation_type == 2)
2552             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2553     }
2554
2555     if (s->sei_display_orientation_present &&
2556         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2557         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2558         AVFrameSideData *rotation = av_frame_new_side_data(out,
2559                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2560                                                            sizeof(int32_t) * 9);
2561         if (!rotation)
2562             return AVERROR(ENOMEM);
2563
2564         av_display_rotation_set((int32_t *)rotation->data, angle);
2565         av_display_matrix_flip((int32_t *)rotation->data,
2566                                s->sei_hflip, s->sei_vflip);
2567     }
2568
2569     return 0;
2570 }
2571
2572 static int hevc_frame_start(HEVCContext *s)
2573 {
2574     HEVCLocalContext *lc = s->HEVClc;
2575     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2576                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2577     int ret;
2578
2579     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2580     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2581     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2582     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2583     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2584
2585     s->is_decoded        = 0;
2586     s->first_nal_type    = s->nal_unit_type;
2587
2588     if (s->ps.pps->tiles_enabled_flag)
2589         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2590
2591     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2592     if (ret < 0)
2593         goto fail;
2594
2595     ret = ff_hevc_frame_rps(s);
2596     if (ret < 0) {
2597         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2598         goto fail;
2599     }
2600
2601     s->ref->frame->key_frame = IS_IRAP(s);
2602
2603     ret = set_side_data(s);
2604     if (ret < 0)
2605         goto fail;
2606
2607     s->frame->pict_type = 3 - s->sh.slice_type;
2608
2609     if (!IS_IRAP(s))
2610         ff_hevc_bump_frame(s);
2611
2612     av_frame_unref(s->output_frame);
2613     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2614     if (ret < 0)
2615         goto fail;
2616
2617     if (!s->avctx->hwaccel)
2618         ff_thread_finish_setup(s->avctx);
2619
2620     return 0;
2621
2622 fail:
2623     if (s->ref)
2624         ff_hevc_unref_frame(s, s->ref, ~0);
2625     s->ref = NULL;
2626     return ret;
2627 }
2628
2629 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2630 {
2631     HEVCLocalContext *lc = s->HEVClc;
2632     GetBitContext *gb    = &lc->gb;
2633     int ctb_addr_ts, ret;
2634
2635     *gb              = nal->gb;
2636     s->nal_unit_type = nal->type;
2637     s->temporal_id   = nal->temporal_id;
2638
2639     switch (s->nal_unit_type) {
2640     case NAL_VPS:
2641         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2642         if (ret < 0)
2643             goto fail;
2644         break;
2645     case NAL_SPS:
2646         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2647                                      s->apply_defdispwin);
2648         if (ret < 0)
2649             goto fail;
2650         break;
2651     case NAL_PPS:
2652         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2653         if (ret < 0)
2654             goto fail;
2655         break;
2656     case NAL_SEI_PREFIX:
2657     case NAL_SEI_SUFFIX:
2658         ret = ff_hevc_decode_nal_sei(s);
2659         if (ret < 0)
2660             goto fail;
2661         break;
2662     case NAL_TRAIL_R:
2663     case NAL_TRAIL_N:
2664     case NAL_TSA_N:
2665     case NAL_TSA_R:
2666     case NAL_STSA_N:
2667     case NAL_STSA_R:
2668     case NAL_BLA_W_LP:
2669     case NAL_BLA_W_RADL:
2670     case NAL_BLA_N_LP:
2671     case NAL_IDR_W_RADL:
2672     case NAL_IDR_N_LP:
2673     case NAL_CRA_NUT:
2674     case NAL_RADL_N:
2675     case NAL_RADL_R:
2676     case NAL_RASL_N:
2677     case NAL_RASL_R:
2678         ret = hls_slice_header(s);
2679         if (ret < 0)
2680             return ret;
2681
2682         if (s->max_ra == INT_MAX) {
2683             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2684                 s->max_ra = s->poc;
2685             } else {
2686                 if (IS_IDR(s))
2687                     s->max_ra = INT_MIN;
2688             }
2689         }
2690
2691         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2692             s->poc <= s->max_ra) {
2693             s->is_decoded = 0;
2694             break;
2695         } else {
2696             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2697                 s->max_ra = INT_MIN;
2698         }
2699
2700         if (s->sh.first_slice_in_pic_flag) {
2701             ret = hevc_frame_start(s);
2702             if (ret < 0)
2703                 return ret;
2704         } else if (!s->ref) {
2705             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2706             goto fail;
2707         }
2708
2709         if (s->nal_unit_type != s->first_nal_type) {
2710             av_log(s->avctx, AV_LOG_ERROR,
2711                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2712                    s->first_nal_type, s->nal_unit_type);
2713             return AVERROR_INVALIDDATA;
2714         }
2715
2716         if (!s->sh.dependent_slice_segment_flag &&
2717             s->sh.slice_type != I_SLICE) {
2718             ret = ff_hevc_slice_rpl(s);
2719             if (ret < 0) {
2720                 av_log(s->avctx, AV_LOG_WARNING,
2721                        "Error constructing the reference lists for the current slice.\n");
2722                 goto fail;
2723             }
2724         }
2725
2726         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2727             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2728             if (ret < 0)
2729                 goto fail;
2730         }
2731
2732         if (s->avctx->hwaccel) {
2733             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2734             if (ret < 0)
2735                 goto fail;
2736         } else {
2737             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2738                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2739             else
2740                 ctb_addr_ts = hls_slice_data(s);
2741             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2742                 s->is_decoded = 1;
2743             }
2744
2745             if (ctb_addr_ts < 0) {
2746                 ret = ctb_addr_ts;
2747                 goto fail;
2748             }
2749         }
2750         break;
2751     case NAL_EOS_NUT:
2752     case NAL_EOB_NUT:
2753         s->seq_decode = (s->seq_decode + 1) & 0xff;
2754         s->max_ra     = INT_MAX;
2755         break;
2756     case NAL_AUD:
2757     case NAL_FD_NUT:
2758         break;
2759     default:
2760         av_log(s->avctx, AV_LOG_INFO,
2761                "Skipping NAL unit %d\n", s->nal_unit_type);
2762     }
2763
2764     return 0;
2765 fail:
2766     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2767         return ret;
2768     return 0;
2769 }
2770
2771 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2772 {
2773     int i, ret = 0;
2774
2775     s->ref = NULL;
2776     s->last_eos = s->eos;
2777     s->eos = 0;
2778
2779     /* split the input packet into NAL units, so we know the upper bound on the
2780      * number of slices in the frame */
2781     ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2782                                s->nal_length_size);
2783     if (ret < 0) {
2784         av_log(s->avctx, AV_LOG_ERROR,
2785                "Error splitting the input into NAL units.\n");
2786         return ret;
2787     }
2788
2789     for (i = 0; i < s->pkt.nb_nals; i++) {
2790         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2791             s->pkt.nals[i].type == NAL_EOS_NUT)
2792             s->eos = 1;
2793     }
2794
2795     /* decode the NAL units */
2796     for (i = 0; i < s->pkt.nb_nals; i++) {
2797         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2798         if (ret < 0) {
2799             av_log(s->avctx, AV_LOG_WARNING,
2800                    "Error parsing NAL unit #%d.\n", i);
2801             goto fail;
2802         }
2803     }
2804
2805 fail:
2806     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2807         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2808
2809     return ret;
2810 }
2811
2812 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2813 {
2814     int i;
2815     for (i = 0; i < 16; i++)
2816         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2817 }
2818
2819 static int verify_md5(HEVCContext *s, AVFrame *frame)
2820 {
2821     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2822     int pixel_shift;
2823     int i, j;
2824
2825     if (!desc)
2826         return AVERROR(EINVAL);
2827
2828     pixel_shift = desc->comp[0].depth > 8;
2829
2830     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2831            s->poc);
2832
2833     /* the checksums are LE, so we have to byteswap for >8bpp formats
2834      * on BE arches */
2835 #if HAVE_BIGENDIAN
2836     if (pixel_shift && !s->checksum_buf) {
2837         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2838                        FFMAX3(frame->linesize[0], frame->linesize[1],
2839                               frame->linesize[2]));
2840         if (!s->checksum_buf)
2841             return AVERROR(ENOMEM);
2842     }
2843 #endif
2844
2845     for (i = 0; frame->data[i]; i++) {
2846         int width  = s->avctx->coded_width;
2847         int height = s->avctx->coded_height;
2848         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2849         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2850         uint8_t md5[16];
2851
2852         av_md5_init(s->md5_ctx);
2853         for (j = 0; j < h; j++) {
2854             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2855 #if HAVE_BIGENDIAN
2856             if (pixel_shift) {
2857                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2858                                     (const uint16_t *) src, w);
2859                 src = s->checksum_buf;
2860             }
2861 #endif
2862             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2863         }
2864         av_md5_final(s->md5_ctx, md5);
2865
2866         if (!memcmp(md5, s->md5[i], 16)) {
2867             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2868             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2869             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2870         } else {
2871             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2872             print_md5(s->avctx, AV_LOG_ERROR, md5);
2873             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2874             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2875             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2876             return AVERROR_INVALIDDATA;
2877         }
2878     }
2879
2880     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2881
2882     return 0;
2883 }
2884
2885 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2886                              AVPacket *avpkt)
2887 {
2888     int ret;
2889     HEVCContext *s = avctx->priv_data;
2890
2891     if (!avpkt->size) {
2892         ret = ff_hevc_output_frame(s, data, 1);
2893         if (ret < 0)
2894             return ret;
2895
2896         *got_output = ret;
2897         return 0;
2898     }
2899
2900     s->ref = NULL;
2901     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2902     if (ret < 0)
2903         return ret;
2904
2905     if (avctx->hwaccel) {
2906         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
2907             av_log(avctx, AV_LOG_ERROR,
2908                    "hardware accelerator failed to decode picture\n");
2909             ff_hevc_unref_frame(s, s->ref, ~0);
2910             return ret;
2911         }
2912     } else {
2913         /* verify the SEI checksum */
2914         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2915             s->is_md5) {
2916             ret = verify_md5(s, s->ref->frame);
2917             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2918                 ff_hevc_unref_frame(s, s->ref, ~0);
2919                 return ret;
2920             }
2921         }
2922     }
2923     s->is_md5 = 0;
2924
2925     if (s->is_decoded) {
2926         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2927         s->is_decoded = 0;
2928     }
2929
2930     if (s->output_frame->buf[0]) {
2931         av_frame_move_ref(data, s->output_frame);
2932         *got_output = 1;
2933     }
2934
2935     return avpkt->size;
2936 }
2937
2938 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2939 {
2940     int ret;
2941
2942     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2943     if (ret < 0)
2944         return ret;
2945
2946     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2947     if (!dst->tab_mvf_buf)
2948         goto fail;
2949     dst->tab_mvf = src->tab_mvf;
2950
2951     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2952     if (!dst->rpl_tab_buf)
2953         goto fail;
2954     dst->rpl_tab = src->rpl_tab;
2955
2956     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2957     if (!dst->rpl_buf)
2958         goto fail;
2959
2960     dst->poc        = src->poc;
2961     dst->ctb_count  = src->ctb_count;
2962     dst->window     = src->window;
2963     dst->flags      = src->flags;
2964     dst->sequence   = src->sequence;
2965
2966     if (src->hwaccel_picture_private) {
2967         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2968         if (!dst->hwaccel_priv_buf)
2969             goto fail;
2970         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2971     }
2972
2973     return 0;
2974 fail:
2975     ff_hevc_unref_frame(s, dst, ~0);
2976     return AVERROR(ENOMEM);
2977 }
2978
2979 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2980 {
2981     HEVCContext       *s = avctx->priv_data;
2982     int i;
2983
2984     pic_arrays_free(s);
2985
2986     av_freep(&s->md5_ctx);
2987
2988     av_freep(&s->cabac_state);
2989
2990     for (i = 0; i < 3; i++) {
2991         av_freep(&s->sao_pixel_buffer_h[i]);
2992         av_freep(&s->sao_pixel_buffer_v[i]);
2993     }
2994     av_frame_free(&s->output_frame);
2995
2996     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2997         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2998         av_frame_free(&s->DPB[i].frame);
2999     }
3000
3001     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
3002         av_buffer_unref(&s->ps.vps_list[i]);
3003     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3004         av_buffer_unref(&s->ps.sps_list[i]);
3005     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3006         av_buffer_unref(&s->ps.pps_list[i]);
3007     s->ps.sps = NULL;
3008     s->ps.pps = NULL;
3009     s->ps.vps = NULL;
3010
3011     av_freep(&s->sh.entry_point_offset);
3012     av_freep(&s->sh.offset);
3013     av_freep(&s->sh.size);
3014
3015     for (i = 1; i < s->threads_number; i++) {
3016         HEVCLocalContext *lc = s->HEVClcList[i];
3017         if (lc) {
3018             av_freep(&s->HEVClcList[i]);
3019             av_freep(&s->sList[i]);
3020         }
3021     }
3022     if (s->HEVClc == s->HEVClcList[0])
3023         s->HEVClc = NULL;
3024     av_freep(&s->HEVClcList[0]);
3025
3026     for (i = 0; i < s->pkt.nals_allocated; i++) {
3027         av_freep(&s->pkt.nals[i].rbsp_buffer);
3028         av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3029     }
3030     av_freep(&s->pkt.nals);
3031     s->pkt.nals_allocated = 0;
3032
3033     return 0;
3034 }
3035
3036 static av_cold int hevc_init_context(AVCodecContext *avctx)
3037 {
3038     HEVCContext *s = avctx->priv_data;
3039     int i;
3040
3041     s->avctx = avctx;
3042
3043     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3044     if (!s->HEVClc)
3045         goto fail;
3046     s->HEVClcList[0] = s->HEVClc;
3047     s->sList[0] = s;
3048
3049     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3050     if (!s->cabac_state)
3051         goto fail;
3052
3053     s->output_frame = av_frame_alloc();
3054     if (!s->output_frame)
3055         goto fail;
3056
3057     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3058         s->DPB[i].frame = av_frame_alloc();
3059         if (!s->DPB[i].frame)
3060             goto fail;
3061         s->DPB[i].tf.f = s->DPB[i].frame;
3062     }
3063
3064     s->max_ra = INT_MAX;
3065
3066     s->md5_ctx = av_md5_alloc();
3067     if (!s->md5_ctx)
3068         goto fail;
3069
3070     ff_bswapdsp_init(&s->bdsp);
3071
3072     s->context_initialized = 1;
3073     s->eos = 0;
3074
3075     return 0;
3076
3077 fail:
3078     hevc_decode_free(avctx);
3079     return AVERROR(ENOMEM);
3080 }
3081
3082 static int hevc_update_thread_context(AVCodecContext *dst,
3083                                       const AVCodecContext *src)
3084 {
3085     HEVCContext *s  = dst->priv_data;
3086     HEVCContext *s0 = src->priv_data;
3087     int i, ret;
3088
3089     if (!s->context_initialized) {
3090         ret = hevc_init_context(dst);
3091         if (ret < 0)
3092             return ret;
3093     }
3094
3095     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3096         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3097         if (s0->DPB[i].frame->buf[0]) {
3098             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3099             if (ret < 0)
3100                 return ret;
3101         }
3102     }
3103
3104     if (s->ps.sps != s0->ps.sps)
3105         s->ps.sps = NULL;
3106     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3107         av_buffer_unref(&s->ps.vps_list[i]);
3108         if (s0->ps.vps_list[i]) {
3109             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3110             if (!s->ps.vps_list[i])
3111                 return AVERROR(ENOMEM);
3112         }
3113     }
3114
3115     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3116         av_buffer_unref(&s->ps.sps_list[i]);
3117         if (s0->ps.sps_list[i]) {
3118             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3119             if (!s->ps.sps_list[i])
3120                 return AVERROR(ENOMEM);
3121         }
3122     }
3123
3124     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3125         av_buffer_unref(&s->ps.pps_list[i]);
3126         if (s0->ps.pps_list[i]) {
3127             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3128             if (!s->ps.pps_list[i])
3129                 return AVERROR(ENOMEM);
3130         }
3131     }
3132
3133     if (s->ps.sps != s0->ps.sps)
3134         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3135             return ret;
3136
3137     s->seq_decode = s0->seq_decode;
3138     s->seq_output = s0->seq_output;
3139     s->pocTid0    = s0->pocTid0;
3140     s->max_ra     = s0->max_ra;
3141     s->eos        = s0->eos;
3142     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3143
3144     s->is_nalff        = s0->is_nalff;
3145     s->nal_length_size = s0->nal_length_size;
3146
3147     s->threads_number      = s0->threads_number;
3148     s->threads_type        = s0->threads_type;
3149
3150     if (s0->eos) {
3151         s->seq_decode = (s->seq_decode + 1) & 0xff;
3152         s->max_ra = INT_MAX;
3153     }
3154
3155     return 0;
3156 }
3157
3158 static int hevc_decode_extradata(HEVCContext *s)
3159 {
3160     AVCodecContext *avctx = s->avctx;
3161     GetByteContext gb;
3162     int ret, i;
3163
3164     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3165
3166     if (avctx->extradata_size > 3 &&
3167         (avctx->extradata[0] || avctx->extradata[1] ||
3168          avctx->extradata[2] > 1)) {
3169         /* It seems the extradata is encoded as hvcC format.
3170          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3171          * is finalized. When finalized, configurationVersion will be 1 and we
3172          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3173         int i, j, num_arrays, nal_len_size;
3174
3175         s->is_nalff = 1;
3176
3177         bytestream2_skip(&gb, 21);
3178         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3179         num_arrays   = bytestream2_get_byte(&gb);
3180
3181         /* nal units in the hvcC always have length coded with 2 bytes,
3182          * so put a fake nal_length_size = 2 while parsing them */
3183         s->nal_length_size = 2;
3184
3185         /* Decode nal units from hvcC. */
3186         for (i = 0; i < num_arrays; i++) {
3187             int type = bytestream2_get_byte(&gb) & 0x3f;
3188             int cnt  = bytestream2_get_be16(&gb);
3189
3190             for (j = 0; j < cnt; j++) {
3191                 // +2 for the nal size field
3192                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3193                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3194                     av_log(s->avctx, AV_LOG_ERROR,
3195                            "Invalid NAL unit size in extradata.\n");
3196                     return AVERROR_INVALIDDATA;
3197                 }
3198
3199                 ret = decode_nal_units(s, gb.buffer, nalsize);
3200                 if (ret < 0) {
3201                     av_log(avctx, AV_LOG_ERROR,
3202                            "Decoding nal unit %d %d from hvcC failed\n",
3203                            type, i);
3204                     return ret;
3205                 }
3206                 bytestream2_skip(&gb, nalsize);
3207             }
3208         }
3209
3210         /* Now store right nal length size, that will be used to parse
3211          * all other nals */
3212         s->nal_length_size = nal_len_size;
3213     } else {
3214         s->is_nalff = 0;
3215         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3216         if (ret < 0)
3217             return ret;
3218     }
3219
3220     /* export stream parameters from the first SPS */
3221     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3222         if (s->ps.sps_list[i]) {
3223             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3224             export_stream_params(s->avctx, &s->ps, sps);
3225             break;
3226         }
3227     }
3228
3229     return 0;
3230 }
3231
3232 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3233 {
3234     HEVCContext *s = avctx->priv_data;
3235     int ret;
3236
3237     ff_init_cabac_states();
3238
3239     avctx->internal->allocate_progress = 1;
3240
3241     ret = hevc_init_context(avctx);
3242     if (ret < 0)
3243         return ret;
3244
3245     s->enable_parallel_tiles = 0;
3246     s->picture_struct = 0;
3247     s->eos = 1;
3248
3249     if(avctx->active_thread_type & FF_THREAD_SLICE)
3250         s->threads_number = avctx->thread_count;
3251     else
3252         s->threads_number = 1;
3253
3254     if (avctx->extradata_size > 0 && avctx->extradata) {
3255         ret = hevc_decode_extradata(s);
3256         if (ret < 0) {
3257             hevc_decode_free(avctx);
3258             return ret;
3259         }
3260     }
3261
3262     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3263             s->threads_type = FF_THREAD_FRAME;
3264         else
3265             s->threads_type = FF_THREAD_SLICE;
3266
3267     return 0;
3268 }
3269
3270 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3271 {
3272     HEVCContext *s = avctx->priv_data;
3273     int ret;
3274
3275     memset(s, 0, sizeof(*s));
3276
3277     ret = hevc_init_context(avctx);
3278     if (ret < 0)
3279         return ret;
3280
3281     return 0;
3282 }
3283
3284 static void hevc_decode_flush(AVCodecContext *avctx)
3285 {
3286     HEVCContext *s = avctx->priv_data;
3287     ff_hevc_flush_dpb(s);
3288     s->max_ra = INT_MAX;
3289     s->eos = 1;
3290 }
3291
3292 #define OFFSET(x) offsetof(HEVCContext, x)
3293 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3294
3295 static const AVProfile profiles[] = {
3296     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3297     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3298     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3299     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3300     { FF_PROFILE_UNKNOWN },
3301 };
3302
3303 static const AVOption options[] = {
3304     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3305         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3306     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3307         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3308     { NULL },
3309 };
3310
3311 static const AVClass hevc_decoder_class = {
3312     .class_name = "HEVC decoder",
3313     .item_name  = av_default_item_name,
3314     .option     = options,
3315     .version    = LIBAVUTIL_VERSION_INT,
3316 };
3317
3318 AVCodec ff_hevc_decoder = {
3319     .name                  = "hevc",
3320     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3321     .type                  = AVMEDIA_TYPE_VIDEO,
3322     .id                    = AV_CODEC_ID_HEVC,
3323     .priv_data_size        = sizeof(HEVCContext),
3324     .priv_class            = &hevc_decoder_class,
3325     .init                  = hevc_decode_init,
3326     .close                 = hevc_decode_free,
3327     .decode                = hevc_decode_frame,
3328     .flush                 = hevc_decode_flush,
3329     .update_thread_context = hevc_update_thread_context,
3330     .init_thread_copy      = hevc_init_thread_copy,
3331     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3332                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3333     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3334 };