git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 static const uint8_t diag_scan4x4_inv[4][4] = {
  86     { 0,  2,  5,  9, },
  87     { 1,  4,  8, 12, },
  88     { 3,  7, 11, 14, },
  89     { 6, 10, 13, 15, },
  90 };
  91
  92 static const uint8_t diag_scan8x8_inv[8][8] = {
  93     {  0,  2,  5,  9, 14, 20, 27, 35, },
  94     {  1,  4,  8, 13, 19, 26, 34, 42, },
  95     {  3,  7, 12, 18, 25, 33, 41, 48, },
  96     {  6, 11, 17, 24, 32, 40, 47, 53, },
  97     { 10, 16, 23, 31, 39, 46, 52, 57, },
  98     { 15, 22, 30, 38, 45, 51, 56, 60, },
  99     { 21, 29, 37, 44, 50, 55, 59, 62, },
 100     { 28, 36, 43, 49, 54, 58, 61, 63, },
 101 };
 102
 103 /**
 104  * NOTE: Each function hls_foo correspond to the function foo in the
 105  * specification (HLS stands for High Level Syntax).
 106  */
 107
 108 /**
 109  * Section 5.7
 110  */
 111
 112 /* free everything allocated  by pic_arrays_init() */
 113 static void pic_arrays_free(HEVCContext *s)
 114 {
 115     av_freep(&s->sao);
 116     av_freep(&s->deblock);
 117
 118     av_freep(&s->skip_flag);
 119     av_freep(&s->tab_ct_depth);
 120
 121     av_freep(&s->tab_ipm);
 122     av_freep(&s->cbf_luma);
 123     av_freep(&s->is_pcm);
 124
 125     av_freep(&s->qp_y_tab);
 126     av_freep(&s->tab_slice_address);
 127     av_freep(&s->filter_slice_edges);
 128
 129     av_freep(&s->horizontal_bs);
 130     av_freep(&s->vertical_bs);
 131
 132     av_buffer_pool_uninit(&s->tab_mvf_pool);
 133     av_buffer_pool_uninit(&s->rpl_tab_pool);
 134 }
 135
 136 /* allocate arrays that depend on frame dimensions */
 137 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 138 {
 139     int log2_min_cb_size = sps->log2_min_cb_size;
 140     int width            = sps->width;
 141     int height           = sps->height;
 142     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 143                            ((height >> log2_min_cb_size) + 1);
 144     int ctb_count        = sps->ctb_width * sps->ctb_height;
 145     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 146
 147     s->bs_width  = width  >> 3;
 148     s->bs_height = height >> 3;
 149
 150     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 151     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 152     if (!s->sao || !s->deblock)
 153         goto fail;
 154
 155     s->skip_flag    = av_malloc(pic_size_in_ctb);
 156     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 157     if (!s->skip_flag || !s->tab_ct_depth)
 158         goto fail;
 159
 160     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 161     s->tab_ipm  = av_mallocz(min_pu_size);
 162     s->is_pcm   = av_malloc(min_pu_size);
 163     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 164         goto fail;
 165
 166     s->filter_slice_edges = av_malloc(ctb_count);
 167     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 168                                       sizeof(*s->tab_slice_address));
 169     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 170                                       sizeof(*s->qp_y_tab));
 171     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 172         goto fail;
 173
 174     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 175     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 176     if (!s->horizontal_bs || !s->vertical_bs)
 177         goto fail;
 178
 179     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 180                                           av_buffer_alloc);
 181     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 182                                           av_buffer_allocz);
 183     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 184         goto fail;
 185
 186     return 0;
 187
 188 fail:
 189     pic_arrays_free(s);
 190     return AVERROR(ENOMEM);
 191 }
 192
 193 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 194 {
 195     int i = 0;
 196     int j = 0;
 197     uint8_t luma_weight_l0_flag[16];
 198     uint8_t chroma_weight_l0_flag[16];
 199     uint8_t luma_weight_l1_flag[16];
 200     uint8_t chroma_weight_l1_flag[16];
 201
 202     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 203     if (s->ps.sps->chroma_format_idc != 0) {
 204         int delta = get_se_golomb(gb);
 205         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 206     }
 207
 208     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 209         luma_weight_l0_flag[i] = get_bits1(gb);
 210         if (!luma_weight_l0_flag[i]) {
 211             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 212             s->sh.luma_offset_l0[i] = 0;
 213         }
 214     }
 215     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 216         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 217             chroma_weight_l0_flag[i] = get_bits1(gb);
 218     } else {
 219         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 220             chroma_weight_l0_flag[i] = 0;
 221     }
 222     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 223         if (luma_weight_l0_flag[i]) {
 224             int delta_luma_weight_l0 = get_se_golomb(gb);
 225             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 226             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 227         }
 228         if (chroma_weight_l0_flag[i]) {
 229             for (j = 0; j < 2; j++) {
 230                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 231                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 232                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 233                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 234                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 235             }
 236         } else {
 237             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 238             s->sh.chroma_offset_l0[i][0] = 0;
 239             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 240             s->sh.chroma_offset_l0[i][1] = 0;
 241         }
 242     }
 243     if (s->sh.slice_type == B_SLICE) {
 244         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 245             luma_weight_l1_flag[i] = get_bits1(gb);
 246             if (!luma_weight_l1_flag[i]) {
 247                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 248                 s->sh.luma_offset_l1[i] = 0;
 249             }
 250         }
 251         if (s->ps.sps->chroma_format_idc != 0) {
 252             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 253                 chroma_weight_l1_flag[i] = get_bits1(gb);
 254         } else {
 255             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 256                 chroma_weight_l1_flag[i] = 0;
 257         }
 258         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 259             if (luma_weight_l1_flag[i]) {
 260                 int delta_luma_weight_l1 = get_se_golomb(gb);
 261                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 262                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 263             }
 264             if (chroma_weight_l1_flag[i]) {
 265                 for (j = 0; j < 2; j++) {
 266                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 267                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 268                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 269                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 270                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 271                 }
 272             } else {
 273                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 274                 s->sh.chroma_offset_l1[i][0] = 0;
 275                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 276                 s->sh.chroma_offset_l1[i][1] = 0;
 277             }
 278         }
 279     }
 280 }
 281
 282 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 283 {
 284     const HEVCSPS *sps = s->ps.sps;
 285     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 286     int prev_delta_msb = 0;
 287     unsigned int nb_sps = 0, nb_sh;
 288     int i;
 289
 290     rps->nb_refs = 0;
 291     if (!sps->long_term_ref_pics_present_flag)
 292         return 0;
 293
 294     if (sps->num_long_term_ref_pics_sps > 0)
 295         nb_sps = get_ue_golomb_long(gb);
 296     nb_sh = get_ue_golomb_long(gb);
 297
 298     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 299         return AVERROR_INVALIDDATA;
 300
 301     rps->nb_refs = nb_sh + nb_sps;
 302
 303     for (i = 0; i < rps->nb_refs; i++) {
 304         uint8_t delta_poc_msb_present;
 305
 306         if (i < nb_sps) {
 307             uint8_t lt_idx_sps = 0;
 308
 309             if (sps->num_long_term_ref_pics_sps > 1)
 310                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 311
 312             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 313             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 314         } else {
 315             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 316             rps->used[i] = get_bits1(gb);
 317         }
 318
 319         delta_poc_msb_present = get_bits1(gb);
 320         if (delta_poc_msb_present) {
 321             int delta = get_ue_golomb_long(gb);
 322
 323             if (i && i != nb_sps)
 324                 delta += prev_delta_msb;
 325
 326             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 327             prev_delta_msb = delta;
 328         }
 329     }
 330
 331     return 0;
 332 }
 333
 334 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 335                                  const HEVCSPS *sps)
 336 {
 337     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 338     unsigned int num = 0, den = 0;
 339
 340     avctx->pix_fmt             = sps->pix_fmt;
 341     avctx->coded_width         = sps->width;
 342     avctx->coded_height        = sps->height;
 343     avctx->width               = sps->output_width;
 344     avctx->height              = sps->output_height;
 345     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 346     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 347     avctx->level               = sps->ptl.general_ptl.level_idc;
 348
 349     ff_set_sar(avctx, sps->vui.sar);
 350
 351     if (sps->vui.video_signal_type_present_flag)
 352         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 353                                                             : AVCOL_RANGE_MPEG;
 354     else
 355         avctx->color_range = AVCOL_RANGE_MPEG;
 356
 357     if (sps->vui.colour_description_present_flag) {
 358         avctx->color_primaries = sps->vui.colour_primaries;
 359         avctx->color_trc       = sps->vui.transfer_characteristic;
 360         avctx->colorspace      = sps->vui.matrix_coeffs;
 361     } else {
 362         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 363         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 364         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 365     }
 366
 367     if (vps->vps_timing_info_present_flag) {
 368         num = vps->vps_num_units_in_tick;
 369         den = vps->vps_time_scale;
 370     } else if (sps->vui.vui_timing_info_present_flag) {
 371         num = sps->vui.vui_num_units_in_tick;
 372         den = sps->vui.vui_time_scale;
 373     }
 374
 375     if (num != 0 && den != 0)
 376         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 377                   num, den, 1 << 30);
 378 }
 379
 380 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 381 {
 382     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
 383     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 384     int ret;
 385
 386     pic_arrays_free(s);
 387     s->ps.sps = NULL;
 388     s->ps.vps = NULL;
 389
 390     if (!sps)
 391         return 0;
 392
 393     ret = pic_arrays_init(s, sps);
 394     if (ret < 0)
 395         goto fail;
 396
 397     export_stream_params(s->avctx, &s->ps, sps);
 398
 399     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 400 #if CONFIG_HEVC_DXVA2_HWACCEL
 401         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 402 #endif
 403 #if CONFIG_HEVC_D3D11VA_HWACCEL
 404         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 405 #endif
 406     }
 407
 408     *fmt++ = sps->pix_fmt;
 409     *fmt = AV_PIX_FMT_NONE;
 410
 411     ret = ff_get_format(s->avctx, pix_fmts);
 412     if (ret < 0)
 413         goto fail;
 414     s->avctx->pix_fmt = ret;
 415
 416     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 417     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 418     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 419
 420     if (sps->sao_enabled && !s->avctx->hwaccel) {
 421         av_frame_unref(s->tmp_frame);
 422         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 423         if (ret < 0)
 424             goto fail;
 425         s->frame = s->tmp_frame;
 426     }
 427
 428     s->ps.sps = sps;
 429     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 430
 431     return 0;
 432
 433 fail:
 434     pic_arrays_free(s);
 435     s->ps.sps = NULL;
 436     return ret;
 437 }
 438
 439 static int hls_slice_header(HEVCContext *s)
 440 {
 441     GetBitContext *gb = &s->HEVClc.gb;
 442     SliceHeader *sh   = &s->sh;
 443     int i, ret;
 444
 445     // Coded parameters
 446     sh->first_slice_in_pic_flag = get_bits1(gb);
 447     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 448         s->seq_decode = (s->seq_decode + 1) & 0xff;
 449         s->max_ra     = INT_MAX;
 450         if (IS_IDR(s))
 451             ff_hevc_clear_refs(s);
 452     }
 453     if (IS_IRAP(s))
 454         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 455
 456     sh->pps_id = get_ue_golomb_long(gb);
 457     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 458         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 459         return AVERROR_INVALIDDATA;
 460     }
 461     if (!sh->first_slice_in_pic_flag &&
 462         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 463         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 464         return AVERROR_INVALIDDATA;
 465     }
 466     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 467
 468     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 469         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 470
 471         ff_hevc_clear_refs(s);
 472         ret = set_sps(s, s->ps.sps);
 473         if (ret < 0)
 474             return ret;
 475
 476         s->seq_decode = (s->seq_decode + 1) & 0xff;
 477         s->max_ra     = INT_MAX;
 478     }
 479
 480     sh->dependent_slice_segment_flag = 0;
 481     if (!sh->first_slice_in_pic_flag) {
 482         int slice_address_length;
 483
 484         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 485             sh->dependent_slice_segment_flag = get_bits1(gb);
 486
 487         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 488                                             s->ps.sps->ctb_height);
 489         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 490         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 491             av_log(s->avctx, AV_LOG_ERROR,
 492                    "Invalid slice segment address: %u.\n",
 493                    sh->slice_segment_addr);
 494             return AVERROR_INVALIDDATA;
 495         }
 496
 497         if (!sh->dependent_slice_segment_flag) {
 498             sh->slice_addr = sh->slice_segment_addr;
 499             s->slice_idx++;
 500         }
 501     } else {
 502         sh->slice_segment_addr = sh->slice_addr = 0;
 503         s->slice_idx           = 0;
 504         s->slice_initialized   = 0;
 505     }
 506
 507     if (!sh->dependent_slice_segment_flag) {
 508         s->slice_initialized = 0;
 509
 510         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 511             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 512
 513         sh->slice_type = get_ue_golomb_long(gb);
 514         if (!(sh->slice_type == I_SLICE ||
 515               sh->slice_type == P_SLICE ||
 516               sh->slice_type == B_SLICE)) {
 517             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 518                    sh->slice_type);
 519             return AVERROR_INVALIDDATA;
 520         }
 521         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 522             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 523             return AVERROR_INVALIDDATA;
 524         }
 525
 526         // when flag is not present, picture is inferred to be output
 527         sh->pic_output_flag = 1;
 528         if (s->ps.pps->output_flag_present_flag)
 529             sh->pic_output_flag = get_bits1(gb);
 530
 531         if (s->ps.sps->separate_colour_plane_flag)
 532             sh->colour_plane_id = get_bits(gb, 2);
 533
 534         if (!IS_IDR(s)) {
 535             int poc;
 536
 537             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 538             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 539             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 540                 av_log(s->avctx, AV_LOG_WARNING,
 541                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 542                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 543                     return AVERROR_INVALIDDATA;
 544                 poc = s->poc;
 545             }
 546             s->poc = poc;
 547
 548             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 549             if (!sh->short_term_ref_pic_set_sps_flag) {
 550                 int pos = get_bits_left(gb);
 551                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 552                 if (ret < 0)
 553                     return ret;
 554
 555                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 556                 sh->short_term_rps = &sh->slice_rps;
 557             } else {
 558                 int numbits, rps_idx;
 559
 560                 if (!s->ps.sps->nb_st_rps) {
 561                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 562                     return AVERROR_INVALIDDATA;
 563                 }
 564
 565                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 566                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 567                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 568             }
 569
 570             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 571             if (ret < 0) {
 572                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 573                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 574                     return AVERROR_INVALIDDATA;
 575             }
 576
 577             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 578                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 579             else
 580                 sh->slice_temporal_mvp_enabled_flag = 0;
 581         } else {
 582             s->sh.short_term_rps = NULL;
 583             s->poc               = 0;
 584         }
 585
 586         /* 8.3.1 */
 587         if (s->temporal_id == 0 &&
 588             s->nal_unit_type != NAL_TRAIL_N &&
 589             s->nal_unit_type != NAL_TSA_N   &&
 590             s->nal_unit_type != NAL_STSA_N  &&
 591             s->nal_unit_type != NAL_RADL_N  &&
 592             s->nal_unit_type != NAL_RADL_R  &&
 593             s->nal_unit_type != NAL_RASL_N  &&
 594             s->nal_unit_type != NAL_RASL_R)
 595             s->pocTid0 = s->poc;
 596
 597         if (s->ps.sps->sao_enabled) {
 598             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 599             sh->slice_sample_adaptive_offset_flag[1] =
 600             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 601         } else {
 602             sh->slice_sample_adaptive_offset_flag[0] = 0;
 603             sh->slice_sample_adaptive_offset_flag[1] = 0;
 604             sh->slice_sample_adaptive_offset_flag[2] = 0;
 605         }
 606
 607         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 608         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 609             int nb_refs;
 610
 611             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 612             if (sh->slice_type == B_SLICE)
 613                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 614
 615             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 616                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 617                 if (sh->slice_type == B_SLICE)
 618                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 619             }
 620             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 621                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 622                        sh->nb_refs[L0], sh->nb_refs[L1]);
 623                 return AVERROR_INVALIDDATA;
 624             }
 625
 626             sh->rpl_modification_flag[0] = 0;
 627             sh->rpl_modification_flag[1] = 0;
 628             nb_refs = ff_hevc_frame_nb_refs(s);
 629             if (!nb_refs) {
 630                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 631                 return AVERROR_INVALIDDATA;
 632             }
 633
 634             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 635                 sh->rpl_modification_flag[0] = get_bits1(gb);
 636                 if (sh->rpl_modification_flag[0]) {
 637                     for (i = 0; i < sh->nb_refs[L0]; i++)
 638                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 639                 }
 640
 641                 if (sh->slice_type == B_SLICE) {
 642                     sh->rpl_modification_flag[1] = get_bits1(gb);
 643                     if (sh->rpl_modification_flag[1] == 1)
 644                         for (i = 0; i < sh->nb_refs[L1]; i++)
 645                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 646                 }
 647             }
 648
 649             if (sh->slice_type == B_SLICE)
 650                 sh->mvd_l1_zero_flag = get_bits1(gb);
 651
 652             if (s->ps.pps->cabac_init_present_flag)
 653                 sh->cabac_init_flag = get_bits1(gb);
 654             else
 655                 sh->cabac_init_flag = 0;
 656
 657             sh->collocated_ref_idx = 0;
 658             if (sh->slice_temporal_mvp_enabled_flag) {
 659                 sh->collocated_list = L0;
 660                 if (sh->slice_type == B_SLICE)
 661                     sh->collocated_list = !get_bits1(gb);
 662
 663                 if (sh->nb_refs[sh->collocated_list] > 1) {
 664                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 665                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 666                         av_log(s->avctx, AV_LOG_ERROR,
 667                                "Invalid collocated_ref_idx: %d.\n",
 668                                sh->collocated_ref_idx);
 669                         return AVERROR_INVALIDDATA;
 670                     }
 671                 }
 672             }
 673
 674             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 675                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 676                 pred_weight_table(s, gb);
 677             }
 678
 679             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 680             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 681                 av_log(s->avctx, AV_LOG_ERROR,
 682                        "Invalid number of merging MVP candidates: %d.\n",
 683                        sh->max_num_merge_cand);
 684                 return AVERROR_INVALIDDATA;
 685             }
 686         }
 687
 688         sh->slice_qp_delta = get_se_golomb(gb);
 689
 690         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 691             sh->slice_cb_qp_offset = get_se_golomb(gb);
 692             sh->slice_cr_qp_offset = get_se_golomb(gb);
 693         } else {
 694             sh->slice_cb_qp_offset = 0;
 695             sh->slice_cr_qp_offset = 0;
 696         }
 697
 698         if (s->ps.pps->deblocking_filter_control_present_flag) {
 699             int deblocking_filter_override_flag = 0;
 700
 701             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 702                 deblocking_filter_override_flag = get_bits1(gb);
 703
 704             if (deblocking_filter_override_flag) {
 705                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 706                 if (!sh->disable_deblocking_filter_flag) {
 707                     sh->beta_offset = get_se_golomb(gb) * 2;
 708                     sh->tc_offset   = get_se_golomb(gb) * 2;
 709                 }
 710             } else {
 711                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 712                 sh->beta_offset                    = s->ps.pps->beta_offset;
 713                 sh->tc_offset                      = s->ps.pps->tc_offset;
 714             }
 715         } else {
 716             sh->disable_deblocking_filter_flag = 0;
 717             sh->beta_offset                    = 0;
 718             sh->tc_offset                      = 0;
 719         }
 720
 721         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 722             (sh->slice_sample_adaptive_offset_flag[0] ||
 723              sh->slice_sample_adaptive_offset_flag[1] ||
 724              !sh->disable_deblocking_filter_flag)) {
 725             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 726         } else {
 727             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 728         }
 729     } else if (!s->slice_initialized) {
 730         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 731         return AVERROR_INVALIDDATA;
 732     }
 733
 734     sh->num_entry_point_offsets = 0;
 735     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 736         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 737         if (sh->num_entry_point_offsets > 0) {
 738             int offset_len = get_ue_golomb_long(gb) + 1;
 739
 740             for (i = 0; i < sh->num_entry_point_offsets; i++)
 741                 skip_bits(gb, offset_len);
 742         }
 743     }
 744
 745     if (s->ps.pps->slice_header_extension_present_flag) {
 746         unsigned int length = get_ue_golomb_long(gb);
 747         for (i = 0; i < length; i++)
 748             skip_bits(gb, 8);  // slice_header_extension_data_byte
 749     }
 750
 751     // Inferred parameters
 752     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 753     if (sh->slice_qp > 51 ||
 754         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 755         av_log(s->avctx, AV_LOG_ERROR,
 756                "The slice_qp %d is outside the valid range "
 757                "[%d, 51].\n",
 758                sh->slice_qp,
 759                -s->ps.sps->qp_bd_offset);
 760         return AVERROR_INVALIDDATA;
 761     }
 762
 763     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 764
 765     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 766         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 767         return AVERROR_INVALIDDATA;
 768     }
 769
 770     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 771
 772     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 773         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
 774                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 775
 776     s->slice_initialized = 1;
 777
 778     return 0;
 779 }
 780
 781 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 782
 783 #define SET_SAO(elem, value)                            \
 784 do {                                                    \
 785     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 786         sao->elem = value;                              \
 787     else if (sao_merge_left_flag)                       \
 788         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 789     else if (sao_merge_up_flag)                         \
 790         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 791     else                                                \
 792         sao->elem = 0;                                  \
 793 } while (0)
 794
 795 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 796 {
 797     HEVCLocalContext *lc    = &s->HEVClc;
 798     int sao_merge_left_flag = 0;
 799     int sao_merge_up_flag   = 0;
 800     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
 801     SAOParams *sao          = &CTB(s->sao, rx, ry);
 802     int c_idx, i;
 803
 804     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 805         s->sh.slice_sample_adaptive_offset_flag[1]) {
 806         if (rx > 0) {
 807             if (lc->ctb_left_flag)
 808                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 809         }
 810         if (ry > 0 && !sao_merge_left_flag) {
 811             if (lc->ctb_up_flag)
 812                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 813         }
 814     }
 815
 816     for (c_idx = 0; c_idx < 3; c_idx++) {
 817         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 818             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 819             continue;
 820         }
 821
 822         if (c_idx == 2) {
 823             sao->type_idx[2] = sao->type_idx[1];
 824             sao->eo_class[2] = sao->eo_class[1];
 825         } else {
 826             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 827         }
 828
 829         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 830             continue;
 831
 832         for (i = 0; i < 4; i++)
 833             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 834
 835         if (sao->type_idx[c_idx] == SAO_BAND) {
 836             for (i = 0; i < 4; i++) {
 837                 if (sao->offset_abs[c_idx][i]) {
 838                     SET_SAO(offset_sign[c_idx][i],
 839                             ff_hevc_sao_offset_sign_decode(s));
 840                 } else {
 841                     sao->offset_sign[c_idx][i] = 0;
 842                 }
 843             }
 844             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 845         } else if (c_idx != 2) {
 846             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 847         }
 848
 849         // Inferred parameters
 850         sao->offset_val[c_idx][0] = 0;
 851         for (i = 0; i < 4; i++) {
 852             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 853             if (sao->type_idx[c_idx] == SAO_EDGE) {
 854                 if (i > 1)
 855                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 856             } else if (sao->offset_sign[c_idx][i]) {
 857                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 858             }
 859         }
 860     }
 861 }
 862
 863 #undef SET_SAO
 864 #undef CTB
 865
 866 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 867                                 int log2_trafo_size, enum ScanType scan_idx,
 868                                 int c_idx)
 869 {
 870 #define GET_COORD(offset, n)                                    \
 871     do {                                                        \
 872         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 873         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 874     } while (0)
 875     HEVCLocalContext *lc    = &s->HEVClc;
 876     int transform_skip_flag = 0;
 877
 878     int last_significant_coeff_x, last_significant_coeff_y;
 879     int last_scan_pos;
 880     int n_end;
 881     int num_coeff    = 0;
 882     int greater1_ctx = 1;
 883
 884     int num_last_subset;
 885     int x_cg_last_sig, y_cg_last_sig;
 886
 887     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 888
 889     ptrdiff_t stride = s->frame->linesize[c_idx];
 890     int hshift       = s->ps.sps->hshift[c_idx];
 891     int vshift       = s->ps.sps->vshift[c_idx];
 892     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 893                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
 894     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 895     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 896
 897     int trafo_size = 1 << log2_trafo_size;
 898     int i, qp, shift, add, scale, scale_m;
 899     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 900     const uint8_t *scale_matrix;
 901     uint8_t dc_scale;
 902
 903     // Derive QP for dequant
 904     if (!lc->cu.cu_transquant_bypass_flag) {
 905         static const int qp_c[] = {
 906             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 907         };
 908
 909         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 910             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 911             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 912             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 913         };
 914
 915         static const uint8_t div6[51 + 2 * 6 + 1] = {
 916             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 917             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 918             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 919         };
 920         int qp_y = lc->qp_y;
 921
 922         if (c_idx == 0) {
 923             qp = qp_y + s->ps.sps->qp_bd_offset;
 924         } else {
 925             int qp_i, offset;
 926
 927             if (c_idx == 1)
 928                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 929             else
 930                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 931
 932             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
 933             if (qp_i < 30)
 934                 qp = qp_i;
 935             else if (qp_i > 43)
 936                 qp = qp_i - 6;
 937             else
 938                 qp = qp_c[qp_i - 30];
 939
 940             qp += s->ps.sps->qp_bd_offset;
 941         }
 942
 943         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
 944         add      = 1 << (shift - 1);
 945         scale    = level_scale[rem6[qp]] << (div6[qp]);
 946         scale_m  = 16; // default when no custom scaling lists.
 947         dc_scale = 16;
 948
 949         if (s->ps.sps->scaling_list_enable_flag) {
 950             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
 951                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
 952             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 953
 954             if (log2_trafo_size != 5)
 955                 matrix_id = 3 * matrix_id + c_idx;
 956
 957             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 958             if (log2_trafo_size >= 4)
 959                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 960         }
 961     }
 962
 963     if (s->ps.pps->transform_skip_enabled_flag &&
 964         !lc->cu.cu_transquant_bypass_flag   &&
 965         log2_trafo_size == 2) {
 966         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 967     }
 968
 969     last_significant_coeff_x =
 970         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 971     last_significant_coeff_y =
 972         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 973
 974     if (last_significant_coeff_x > 3) {
 975         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 976         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 977                                    (2 + (last_significant_coeff_x & 1)) +
 978                                    suffix;
 979     }
 980
 981     if (last_significant_coeff_y > 3) {
 982         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 983         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 984                                    (2 + (last_significant_coeff_y & 1)) +
 985                                    suffix;
 986     }
 987
 988     if (scan_idx == SCAN_VERT)
 989         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 990
 991     x_cg_last_sig = last_significant_coeff_x >> 2;
 992     y_cg_last_sig = last_significant_coeff_y >> 2;
 993
 994     switch (scan_idx) {
 995     case SCAN_DIAG: {
 996         int last_x_c = last_significant_coeff_x & 3;
 997         int last_y_c = last_significant_coeff_y & 3;
 998
 999         scan_x_off = ff_hevc_diag_scan4x4_x;
1000         scan_y_off = ff_hevc_diag_scan4x4_y;
1001         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1002         if (trafo_size == 4) {
1003             scan_x_cg = scan_1x1;
1004             scan_y_cg = scan_1x1;
1005         } else if (trafo_size == 8) {
1006             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1007             scan_x_cg  = diag_scan2x2_x;
1008             scan_y_cg  = diag_scan2x2_y;
1009         } else if (trafo_size == 16) {
1010             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1011             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1012             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1013         } else { // trafo_size == 32
1014             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1015             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1016             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1017         }
1018         break;
1019     }
1020     case SCAN_HORIZ:
1021         scan_x_cg  = horiz_scan2x2_x;
1022         scan_y_cg  = horiz_scan2x2_y;
1023         scan_x_off = horiz_scan4x4_x;
1024         scan_y_off = horiz_scan4x4_y;
1025         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1026         break;
1027     default: //SCAN_VERT
1028         scan_x_cg  = horiz_scan2x2_y;
1029         scan_y_cg  = horiz_scan2x2_x;
1030         scan_x_off = horiz_scan4x4_y;
1031         scan_y_off = horiz_scan4x4_x;
1032         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1033         break;
1034     }
1035     num_coeff++;
1036     num_last_subset = (num_coeff - 1) >> 4;
1037
1038     for (i = num_last_subset; i >= 0; i--) {
1039         int n, m;
1040         int x_cg, y_cg, x_c, y_c;
1041         int implicit_non_zero_coeff = 0;
1042         int64_t trans_coeff_level;
1043         int prev_sig = 0;
1044         int offset   = i << 4;
1045
1046         uint8_t significant_coeff_flag_idx[16];
1047         uint8_t nb_significant_coeff_flag = 0;
1048
1049         x_cg = scan_x_cg[i];
1050         y_cg = scan_y_cg[i];
1051
1052         if (i < num_last_subset && i > 0) {
1053             int ctx_cg = 0;
1054             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1055                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1056             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1057                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1058
1059             significant_coeff_group_flag[x_cg][y_cg] =
1060                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1061             implicit_non_zero_coeff = 1;
1062         } else {
1063             significant_coeff_group_flag[x_cg][y_cg] =
1064                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1065                  (x_cg == 0 && y_cg == 0));
1066         }
1067
1068         last_scan_pos = num_coeff - offset - 1;
1069
1070         if (i == num_last_subset) {
1071             n_end                         = last_scan_pos - 1;
1072             significant_coeff_flag_idx[0] = last_scan_pos;
1073             nb_significant_coeff_flag     = 1;
1074         } else {
1075             n_end = 15;
1076         }
1077
1078         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1079             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1080         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1081             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1082
1083         for (n = n_end; n >= 0; n--) {
1084             GET_COORD(offset, n);
1085
1086             if (significant_coeff_group_flag[x_cg][y_cg] &&
1087                 (n > 0 || implicit_non_zero_coeff == 0)) {
1088                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1089                                                           log2_trafo_size,
1090                                                           scan_idx,
1091                                                           prev_sig) == 1) {
1092                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1093                     nb_significant_coeff_flag++;
1094                     implicit_non_zero_coeff = 0;
1095                 }
1096             } else {
1097                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1098                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1099                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1100                     nb_significant_coeff_flag++;
1101                 }
1102             }
1103         }
1104
1105         n_end = nb_significant_coeff_flag;
1106
1107         if (n_end) {
1108             int first_nz_pos_in_cg = 16;
1109             int last_nz_pos_in_cg = -1;
1110             int c_rice_param = 0;
1111             int first_greater1_coeff_idx = -1;
1112             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1113             uint16_t coeff_sign_flag;
1114             int sum_abs = 0;
1115             int sign_hidden = 0;
1116
1117             // initialize first elem of coeff_bas_level_greater1_flag
1118             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1119
1120             if (!(i == num_last_subset) && greater1_ctx == 0)
1121                 ctx_set++;
1122             greater1_ctx      = 1;
1123             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1124
1125             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1126                 int n_idx = significant_coeff_flag_idx[m];
1127                 int inc   = (ctx_set << 2) + greater1_ctx;
1128                 coeff_abs_level_greater1_flag[n_idx] =
1129                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1130                 if (coeff_abs_level_greater1_flag[n_idx]) {
1131                     greater1_ctx = 0;
1132                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1133                     greater1_ctx++;
1134                 }
1135
1136                 if (coeff_abs_level_greater1_flag[n_idx] &&
1137                     first_greater1_coeff_idx == -1)
1138                     first_greater1_coeff_idx = n_idx;
1139             }
1140             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1141             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1142                                  !lc->cu.cu_transquant_bypass_flag;
1143
1144             if (first_greater1_coeff_idx != -1) {
1145                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1146             }
1147             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1148                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1149             } else {
1150                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1151             }
1152
1153             for (m = 0; m < n_end; m++) {
1154                 n = significant_coeff_flag_idx[m];
1155                 GET_COORD(offset, n);
1156                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1157                 if (trans_coeff_level == ((m < 8) ?
1158                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1159                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1160
1161                     trans_coeff_level += last_coeff_abs_level_remaining;
1162                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1163                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1164                 }
1165                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1166                     sum_abs += trans_coeff_level;
1167                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1168                         trans_coeff_level = -trans_coeff_level;
1169                 }
1170                 if (coeff_sign_flag >> 15)
1171                     trans_coeff_level = -trans_coeff_level;
1172                 coeff_sign_flag <<= 1;
1173                 if (!lc->cu.cu_transquant_bypass_flag) {
1174                     if (s->ps.sps->scaling_list_enable_flag) {
1175                         if (y_c || x_c || log2_trafo_size < 4) {
1176                             int pos;
1177                             switch (log2_trafo_size) {
1178                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1179                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1180                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1181                             default: pos = (y_c        << 2) +  x_c;
1182                             }
1183                             scale_m = scale_matrix[pos];
1184                         } else {
1185                             scale_m = dc_scale;
1186                         }
1187                     }
1188                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1189                     if(trans_coeff_level < 0) {
1190                         if((~trans_coeff_level) & 0xFffffffffff8000)
1191                             trans_coeff_level = -32768;
1192                     } else {
1193                         if (trans_coeff_level & 0xffffffffffff8000)
1194                             trans_coeff_level = 32767;
1195                     }
1196                 }
1197                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1198             }
1199         }
1200     }
1201
1202     if (lc->cu.cu_transquant_bypass_flag) {
1203         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1204     } else {
1205         if (transform_skip_flag)
1206             s->hevcdsp.transform_skip(dst, coeffs, stride);
1207         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1208                  log2_trafo_size == 2)
1209             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1210         else
1211             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1212     }
1213 }
1214
1215 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1216                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1217                               int log2_cb_size, int log2_trafo_size,
1218                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1219 {
1220     HEVCLocalContext *lc = &s->HEVClc;
1221
1222     if (lc->cu.pred_mode == MODE_INTRA) {
1223         int trafo_size = 1 << log2_trafo_size;
1224         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1225
1226         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1227         if (log2_trafo_size > 2) {
1228             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1229             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1230             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1231             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1232         } else if (blk_idx == 3) {
1233             trafo_size = trafo_size << s->ps.sps->hshift[1];
1234             ff_hevc_set_neighbour_available(s, xBase, yBase,
1235                                             trafo_size, trafo_size);
1236             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1237             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1238         }
1239     }
1240
1241     if (cbf_luma || cbf_cb || cbf_cr) {
1242         int scan_idx   = SCAN_DIAG;
1243         int scan_idx_c = SCAN_DIAG;
1244
1245         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1246             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1247             if (lc->tu.cu_qp_delta != 0)
1248                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1249                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1250             lc->tu.is_cu_qp_delta_coded = 1;
1251
1252             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1253                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1254                 av_log(s->avctx, AV_LOG_ERROR,
1255                        "The cu_qp_delta %d is outside the valid range "
1256                        "[%d, %d].\n",
1257                        lc->tu.cu_qp_delta,
1258                        -(26 + s->ps.sps->qp_bd_offset / 2),
1259                         (25 + s->ps.sps->qp_bd_offset / 2));
1260                 return AVERROR_INVALIDDATA;
1261             }
1262
1263             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1264         }
1265
1266         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1267             if (lc->tu.cur_intra_pred_mode >= 6 &&
1268                 lc->tu.cur_intra_pred_mode <= 14) {
1269                 scan_idx = SCAN_VERT;
1270             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1271                        lc->tu.cur_intra_pred_mode <= 30) {
1272                 scan_idx = SCAN_HORIZ;
1273             }
1274
1275             if (lc->pu.intra_pred_mode_c >=  6 &&
1276                 lc->pu.intra_pred_mode_c <= 14) {
1277                 scan_idx_c = SCAN_VERT;
1278             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1279                        lc->pu.intra_pred_mode_c <= 30) {
1280                 scan_idx_c = SCAN_HORIZ;
1281             }
1282         }
1283
1284         if (cbf_luma)
1285             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1286         if (log2_trafo_size > 2) {
1287             if (cbf_cb)
1288                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1289             if (cbf_cr)
1290                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1291         } else if (blk_idx == 3) {
1292             if (cbf_cb)
1293                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1294             if (cbf_cr)
1295                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1296         }
1297     }
1298     return 0;
1299 }
1300
1301 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1302 {
1303     int cb_size          = 1 << log2_cb_size;
1304     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1305
1306     int min_pu_width     = s->ps.sps->min_pu_width;
1307     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1308     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1309     int i, j;
1310
1311     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1312         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1313             s->is_pcm[i + j * min_pu_width] = 2;
1314 }
1315
1316 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1317                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1318                               int log2_cb_size, int log2_trafo_size,
1319                               int trafo_depth, int blk_idx,
1320                               int cbf_cb, int cbf_cr)
1321 {
1322     HEVCLocalContext *lc = &s->HEVClc;
1323     uint8_t split_transform_flag;
1324     int ret;
1325
1326     if (lc->cu.intra_split_flag) {
1327         if (trafo_depth == 1)
1328             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1329     } else {
1330         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1331     }
1332
1333     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1334         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1335         trafo_depth     < lc->cu.max_trafo_depth       &&
1336         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1337         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1338     } else {
1339         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1340                           lc->cu.pred_mode == MODE_INTER &&
1341                           lc->cu.part_mode != PART_2Nx2N &&
1342                           trafo_depth == 0;
1343
1344         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1345                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1346                                inter_split;
1347     }
1348
1349     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1350         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1351     else if (log2_trafo_size > 2 || trafo_depth == 0)
1352         cbf_cb = 0;
1353     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1354         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1355     else if (log2_trafo_size > 2 || trafo_depth == 0)
1356         cbf_cr = 0;
1357
1358     if (split_transform_flag) {
1359         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1360         const int x1 = x0 + trafo_size_split;
1361         const int y1 = y0 + trafo_size_split;
1362
1363 #define SUBDIVIDE(x, y, idx)                                                    \
1364 do {                                                                            \
1365     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1366                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1367                              cbf_cb, cbf_cr);                                   \
1368     if (ret < 0)                                                                \
1369         return ret;                                                             \
1370 } while (0)
1371
1372         SUBDIVIDE(x0, y0, 0);
1373         SUBDIVIDE(x1, y0, 1);
1374         SUBDIVIDE(x0, y1, 2);
1375         SUBDIVIDE(x1, y1, 3);
1376
1377 #undef SUBDIVIDE
1378     } else {
1379         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1380         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1381         int min_tu_width     = s->ps.sps->min_tb_width;
1382         int cbf_luma         = 1;
1383
1384         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1385             cbf_cb || cbf_cr)
1386             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1387
1388         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1389                                  log2_cb_size, log2_trafo_size,
1390                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1391         if (ret < 0)
1392             return ret;
1393         // TODO: store cbf_luma somewhere else
1394         if (cbf_luma) {
1395             int i, j;
1396             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1397                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1398                     int x_tu = (x0 + j) >> log2_min_tu_size;
1399                     int y_tu = (y0 + i) >> log2_min_tu_size;
1400                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1401                 }
1402         }
1403         if (!s->sh.disable_deblocking_filter_flag) {
1404             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1405             if (s->ps.pps->transquant_bypass_enable_flag &&
1406                 lc->cu.cu_transquant_bypass_flag)
1407                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1408         }
1409     }
1410     return 0;
1411 }
1412
1413 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1414 {
1415     //TODO: non-4:2:0 support
1416     HEVCLocalContext *lc = &s->HEVClc;
1417     GetBitContext gb;
1418     int cb_size   = 1 << log2_cb_size;
1419     int stride0   = s->frame->linesize[0];
1420     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1421     int   stride1 = s->frame->linesize[1];
1422     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1423     int   stride2 = s->frame->linesize[2];
1424     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1425
1426     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1427     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1428     int ret;
1429
1430     if (!s->sh.disable_deblocking_filter_flag)
1431         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1432
1433     ret = init_get_bits(&gb, pcm, length);
1434     if (ret < 0)
1435         return ret;
1436
1437     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1438     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1439     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1440     return 0;
1441 }
1442
1443 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1444 {
1445     HEVCLocalContext *lc = &s->HEVClc;
1446     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1447     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1448
1449     if (x)
1450         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1451     if (y)
1452         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1453
1454     switch (x) {
1455     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1456     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1457     case 0: lc->pu.mvd.x = 0;                               break;
1458     }
1459
1460     switch (y) {
1461     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1462     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1463     case 0: lc->pu.mvd.y = 0;                               break;
1464     }
1465 }
1466
1467 /**
1468  * 8.5.3.2.2.1 Luma sample interpolation process
1469  *
1470  * @param s HEVC decoding context
1471  * @param dst target buffer for block data at block position
1472  * @param dststride stride of the dst buffer
1473  * @param ref reference picture buffer at origin (0, 0)
1474  * @param mv motion vector (relative to block position) to get pixel data from
1475  * @param x_off horizontal position of block from origin (0, 0)
1476  * @param y_off vertical position of block from origin (0, 0)
1477  * @param block_w width of block
1478  * @param block_h height of block
1479  */
1480 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1481                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1482                     int block_w, int block_h, int pred_idx)
1483 {
1484     HEVCLocalContext *lc = &s->HEVClc;
1485     uint8_t *src         = ref->data[0];
1486     ptrdiff_t srcstride  = ref->linesize[0];
1487     int pic_width        = s->ps.sps->width;
1488     int pic_height       = s->ps.sps->height;
1489
1490     int mx         = mv->x & 3;
1491     int my         = mv->y & 3;
1492     int extra_left = ff_hevc_qpel_extra_before[mx];
1493     int extra_top  = ff_hevc_qpel_extra_before[my];
1494
1495     x_off += mv->x >> 2;
1496     y_off += mv->y >> 2;
1497     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1498
1499     if (x_off < extra_left || y_off < extra_top ||
1500         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1501         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1502         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1503         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1504         int buf_offset = extra_top *
1505                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1506
1507         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1508                                  edge_emu_stride, srcstride,
1509                                  block_w + ff_hevc_qpel_extra[mx],
1510                                  block_h + ff_hevc_qpel_extra[my],
1511                                  x_off - extra_left, y_off - extra_top,
1512                                  pic_width, pic_height);
1513         src = lc->edge_emu_buffer + buf_offset;
1514         srcstride = edge_emu_stride;
1515     }
1516     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1517                                                    block_h, mx, my, lc->mc_buffer);
1518 }
1519
1520 /**
1521  * 8.5.3.2.2.2 Chroma sample interpolation process
1522  *
1523  * @param s HEVC decoding context
1524  * @param dst1 target buffer for block data at block position (U plane)
1525  * @param dst2 target buffer for block data at block position (V plane)
1526  * @param dststride stride of the dst1 and dst2 buffers
1527  * @param ref reference picture buffer at origin (0, 0)
1528  * @param mv motion vector (relative to block position) to get pixel data from
1529  * @param x_off horizontal position of block from origin (0, 0)
1530  * @param y_off vertical position of block from origin (0, 0)
1531  * @param block_w width of block
1532  * @param block_h height of block
1533  */
1534 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1535                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1536                       int x_off, int y_off, int block_w, int block_h)
1537 {
1538     HEVCLocalContext *lc = &s->HEVClc;
1539     uint8_t *src1        = ref->data[1];
1540     uint8_t *src2        = ref->data[2];
1541     ptrdiff_t src1stride = ref->linesize[1];
1542     ptrdiff_t src2stride = ref->linesize[2];
1543     int pic_width        = s->ps.sps->width >> 1;
1544     int pic_height       = s->ps.sps->height >> 1;
1545
1546     int mx = mv->x & 7;
1547     int my = mv->y & 7;
1548
1549     x_off += mv->x >> 3;
1550     y_off += mv->y >> 3;
1551     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1552     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1553
1554     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1555         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1556         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1557         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1558         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1559         int buf_offset1 = EPEL_EXTRA_BEFORE *
1560                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1561         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1562         int buf_offset2 = EPEL_EXTRA_BEFORE *
1563                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1564
1565         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1566                                  edge_emu_stride, src1stride,
1567                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1568                                  x_off - EPEL_EXTRA_BEFORE,
1569                                  y_off - EPEL_EXTRA_BEFORE,
1570                                  pic_width, pic_height);
1571
1572         src1 = lc->edge_emu_buffer + buf_offset1;
1573         src1stride = edge_emu_stride;
1574         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1575                                              block_w, block_h, mx, my, lc->mc_buffer);
1576
1577         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1578                                  edge_emu_stride, src2stride,
1579                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1580                                  x_off - EPEL_EXTRA_BEFORE,
1581                                  y_off - EPEL_EXTRA_BEFORE,
1582                                  pic_width, pic_height);
1583         src2 = lc->edge_emu_buffer + buf_offset2;
1584         src2stride = edge_emu_stride;
1585
1586         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1587                                              block_w, block_h, mx, my,
1588                                              lc->mc_buffer);
1589     } else {
1590         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1591                                              block_w, block_h, mx, my,
1592                                              lc->mc_buffer);
1593         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1594                                              block_w, block_h, mx, my,
1595                                              lc->mc_buffer);
1596     }
1597 }
1598
1599 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1600                                 const Mv *mv, int y0, int height)
1601 {
1602     int y = (mv->y >> 2) + y0 + height + 9;
1603     ff_thread_await_progress(&ref->tf, y, 0);
1604 }
1605
1606 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1607                                   int nPbH, int log2_cb_size, int part_idx,
1608                                   int merge_idx, MvField *mv)
1609 {
1610     HEVCLocalContext *lc             = &s->HEVClc;
1611     enum InterPredIdc inter_pred_idc = PRED_L0;
1612     int mvp_flag;
1613
1614     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1615     if (s->sh.slice_type == B_SLICE)
1616         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1617
1618     if (inter_pred_idc != PRED_L1) {
1619         if (s->sh.nb_refs[L0])
1620             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1621
1622         mv->pred_flag[0] = 1;
1623         hls_mvd_coding(s, x0, y0, 0);
1624         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1625         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1626                                  part_idx, merge_idx, mv, mvp_flag, 0);
1627         mv->mv[0].x += lc->pu.mvd.x;
1628         mv->mv[0].y += lc->pu.mvd.y;
1629     }
1630
1631     if (inter_pred_idc != PRED_L0) {
1632         if (s->sh.nb_refs[L1])
1633             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1634
1635         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1636             AV_ZERO32(&lc->pu.mvd);
1637         } else {
1638             hls_mvd_coding(s, x0, y0, 1);
1639         }
1640
1641         mv->pred_flag[1] = 1;
1642         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1643         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1644                                  part_idx, merge_idx, mv, mvp_flag, 1);
1645         mv->mv[1].x += lc->pu.mvd.x;
1646         mv->mv[1].y += lc->pu.mvd.y;
1647     }
1648 }
1649
1650 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1651                                 int nPbW, int nPbH,
1652                                 int log2_cb_size, int partIdx)
1653 {
1654     static const int pred_indices[] = {
1655         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1656     };
1657     const int pred_idx = pred_indices[nPbW];
1658
1659 #define POS(c_idx, x, y)                                                              \
1660     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1661                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1662     HEVCLocalContext *lc = &s->HEVClc;
1663     int merge_idx = 0;
1664     struct MvField current_mv = {{{ 0 }}};
1665
1666     int min_pu_width = s->ps.sps->min_pu_width;
1667
1668     MvField *tab_mvf = s->ref->tab_mvf;
1669     RefPicList  *refPicList = s->ref->refPicList;
1670     HEVCFrame *ref0, *ref1;
1671
1672     int tmpstride = MAX_PB_SIZE;
1673
1674     uint8_t *dst0 = POS(0, x0, y0);
1675     uint8_t *dst1 = POS(1, x0, y0);
1676     uint8_t *dst2 = POS(2, x0, y0);
1677     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1678     int min_cb_width     = s->ps.sps->min_cb_width;
1679     int x_cb             = x0 >> log2_min_cb_size;
1680     int y_cb             = y0 >> log2_min_cb_size;
1681     int x_pu, y_pu;
1682     int i, j;
1683
1684     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1685
1686     if (!skip_flag)
1687         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1688
1689     if (skip_flag || lc->pu.merge_flag) {
1690         if (s->sh.max_num_merge_cand > 1)
1691             merge_idx = ff_hevc_merge_idx_decode(s);
1692         else
1693             merge_idx = 0;
1694
1695         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1696                                    partIdx, merge_idx, &current_mv);
1697     } else {
1698         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1699                               partIdx, merge_idx, &current_mv);
1700     }
1701
1702     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1703     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1704
1705     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1706         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1707             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1708
1709     if (current_mv.pred_flag[0]) {
1710         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1711         if (!ref0)
1712             return;
1713         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1714     }
1715     if (current_mv.pred_flag[1]) {
1716         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1717         if (!ref1)
1718             return;
1719         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1720     }
1721
1722     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1723         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1724         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1725
1726         luma_mc(s, tmp, tmpstride, ref0->frame,
1727                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1728
1729         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1730             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1731             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1732                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1733                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1734                                      dst0, s->frame->linesize[0], tmp,
1735                                      tmpstride, nPbW, nPbH);
1736         } else {
1737             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1738         }
1739         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1740                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1741
1742         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1743             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1744             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1745                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1746                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1747                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1748                                      nPbW / 2, nPbH / 2);
1749             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1750                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1751                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1752                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1753                                      nPbW / 2, nPbH / 2);
1754         } else {
1755             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1756             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1757         }
1758     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1759         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1760         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1761
1762         luma_mc(s, tmp, tmpstride, ref1->frame,
1763                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1764
1765         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1766             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1767             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1768                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1769                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1770                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1771                                       nPbW, nPbH);
1772         } else {
1773             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1774         }
1775
1776         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1777                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1778
1779         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1780             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1781             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1782                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1783                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1784                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1785             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1786                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1787                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1788                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1789         } else {
1790             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1791             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1792         }
1793     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1794         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1795         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1796         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1797         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1798
1799         luma_mc(s, tmp, tmpstride, ref0->frame,
1800                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1801         luma_mc(s, tmp2, tmpstride, ref1->frame,
1802                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1803
1804         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1805             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1806             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1807                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1808                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1809                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1810                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1811                                          dst0, s->frame->linesize[0],
1812                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1813         } else {
1814             s->hevcdsp.put_unweighted_pred_avg(dst0, s->frame->linesize[0],
1815                                                tmp, tmp2, tmpstride, nPbW, nPbH);
1816         }
1817
1818         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1819                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1820         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1821                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1822
1823         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1824             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1825             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1826                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1827                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1828                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1829                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1830                                          dst1, s->frame->linesize[1], tmp, tmp3,
1831                                          tmpstride, nPbW / 2, nPbH / 2);
1832             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1833                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1834                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1835                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1836                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1837                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1838                                          tmpstride, nPbW / 2, nPbH / 2);
1839         } else {
1840             s->hevcdsp.put_unweighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1841             s->hevcdsp.put_unweighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1842         }
1843     }
1844 }
1845
1846 /**
1847  * 8.4.1
1848  */
1849 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1850                                 int prev_intra_luma_pred_flag)
1851 {
1852     HEVCLocalContext *lc = &s->HEVClc;
1853     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1854     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1855     int min_pu_width     = s->ps.sps->min_pu_width;
1856     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1857     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1858     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1859
1860     int cand_up   = (lc->ctb_up_flag || y0b) ?
1861                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1862     int cand_left = (lc->ctb_left_flag || x0b) ?
1863                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1864
1865     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1866
1867     MvField *tab_mvf = s->ref->tab_mvf;
1868     int intra_pred_mode;
1869     int candidate[3];
1870     int i, j;
1871
1872     // intra_pred_mode prediction does not cross vertical CTB boundaries
1873     if ((y0 - 1) < y_ctb)
1874         cand_up = INTRA_DC;
1875
1876     if (cand_left == cand_up) {
1877         if (cand_left < 2) {
1878             candidate[0] = INTRA_PLANAR;
1879             candidate[1] = INTRA_DC;
1880             candidate[2] = INTRA_ANGULAR_26;
1881         } else {
1882             candidate[0] = cand_left;
1883             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1884             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1885         }
1886     } else {
1887         candidate[0] = cand_left;
1888         candidate[1] = cand_up;
1889         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1890             candidate[2] = INTRA_PLANAR;
1891         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1892             candidate[2] = INTRA_DC;
1893         } else {
1894             candidate[2] = INTRA_ANGULAR_26;
1895         }
1896     }
1897
1898     if (prev_intra_luma_pred_flag) {
1899         intra_pred_mode = candidate[lc->pu.mpm_idx];
1900     } else {
1901         if (candidate[0] > candidate[1])
1902             FFSWAP(uint8_t, candidate[0], candidate[1]);
1903         if (candidate[0] > candidate[2])
1904             FFSWAP(uint8_t, candidate[0], candidate[2]);
1905         if (candidate[1] > candidate[2])
1906             FFSWAP(uint8_t, candidate[1], candidate[2]);
1907
1908         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1909         for (i = 0; i < 3; i++)
1910             if (intra_pred_mode >= candidate[i])
1911                 intra_pred_mode++;
1912     }
1913
1914     /* write the intra prediction units into the mv array */
1915     if (!size_in_pus)
1916         size_in_pus = 1;
1917     for (i = 0; i < size_in_pus; i++) {
1918         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1919                intra_pred_mode, size_in_pus);
1920
1921         for (j = 0; j < size_in_pus; j++) {
1922             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1923             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1924             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1925             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1926             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1927             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1928             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1929             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1930             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1931         }
1932     }
1933
1934     return intra_pred_mode;
1935 }
1936
1937 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1938                                           int log2_cb_size, int ct_depth)
1939 {
1940     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1941     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1942     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1943     int y;
1944
1945     for (y = 0; y < length; y++)
1946         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1947                ct_depth, length);
1948 }
1949
1950 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1951                                   int log2_cb_size)
1952 {
1953     HEVCLocalContext *lc = &s->HEVClc;
1954     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1955     uint8_t prev_intra_luma_pred_flag[4];
1956     int split   = lc->cu.part_mode == PART_NxN;
1957     int pb_size = (1 << log2_cb_size) >> split;
1958     int side    = split + 1;
1959     int chroma_mode;
1960     int i, j;
1961
1962     for (i = 0; i < side; i++)
1963         for (j = 0; j < side; j++)
1964             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1965
1966     for (i = 0; i < side; i++) {
1967         for (j = 0; j < side; j++) {
1968             if (prev_intra_luma_pred_flag[2 * i + j])
1969                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1970             else
1971                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1972
1973             lc->pu.intra_pred_mode[2 * i + j] =
1974                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1975                                      prev_intra_luma_pred_flag[2 * i + j]);
1976         }
1977     }
1978
1979     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1980     if (chroma_mode != 4) {
1981         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1982             lc->pu.intra_pred_mode_c = 34;
1983         else
1984             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1985     } else {
1986         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1987     }
1988 }
1989
1990 static void intra_prediction_unit_default_value(HEVCContext *s,
1991                                                 int x0, int y0,
1992                                                 int log2_cb_size)
1993 {
1994     HEVCLocalContext *lc = &s->HEVClc;
1995     int pb_size          = 1 << log2_cb_size;
1996     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1997     int min_pu_width     = s->ps.sps->min_pu_width;
1998     MvField *tab_mvf     = s->ref->tab_mvf;
1999     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2000     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2001     int j, k;
2002
2003     if (size_in_pus == 0)
2004         size_in_pus = 1;
2005     for (j = 0; j < size_in_pus; j++) {
2006         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2007         for (k = 0; k < size_in_pus; k++)
2008             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2009     }
2010 }
2011
2012 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2013 {
2014     int cb_size          = 1 << log2_cb_size;
2015     HEVCLocalContext *lc = &s->HEVClc;
2016     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2017     int length           = cb_size >> log2_min_cb_size;
2018     int min_cb_width     = s->ps.sps->min_cb_width;
2019     int x_cb             = x0 >> log2_min_cb_size;
2020     int y_cb             = y0 >> log2_min_cb_size;
2021     int x, y, ret;
2022
2023     lc->cu.x                = x0;
2024     lc->cu.y                = y0;
2025     lc->cu.pred_mode        = MODE_INTRA;
2026     lc->cu.part_mode        = PART_2Nx2N;
2027     lc->cu.intra_split_flag = 0;
2028
2029     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2030     for (x = 0; x < 4; x++)
2031         lc->pu.intra_pred_mode[x] = 1;
2032     if (s->ps.pps->transquant_bypass_enable_flag) {
2033         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2034         if (lc->cu.cu_transquant_bypass_flag)
2035             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2036     } else
2037         lc->cu.cu_transquant_bypass_flag = 0;
2038
2039     if (s->sh.slice_type != I_SLICE) {
2040         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2041
2042         x = y_cb * min_cb_width + x_cb;
2043         for (y = 0; y < length; y++) {
2044             memset(&s->skip_flag[x], skip_flag, length);
2045             x += min_cb_width;
2046         }
2047         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2048     }
2049
2050     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2051         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2052         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2053
2054         if (!s->sh.disable_deblocking_filter_flag)
2055             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2056     } else {
2057         int pcm_flag = 0;
2058
2059         if (s->sh.slice_type != I_SLICE)
2060             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2061         if (lc->cu.pred_mode != MODE_INTRA ||
2062             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2063             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2064             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2065                                       lc->cu.pred_mode == MODE_INTRA;
2066         }
2067
2068         if (lc->cu.pred_mode == MODE_INTRA) {
2069             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2070                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2071                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2072                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2073             }
2074             if (pcm_flag) {
2075                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2076                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2077                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2078                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2079
2080                 if (ret < 0)
2081                     return ret;
2082             } else {
2083                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2084             }
2085         } else {
2086             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2087             switch (lc->cu.part_mode) {
2088             case PART_2Nx2N:
2089                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2090                 break;
2091             case PART_2NxN:
2092                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2093                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2094                 break;
2095             case PART_Nx2N:
2096                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2097                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2098                 break;
2099             case PART_2NxnU:
2100                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2101                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2102                 break;
2103             case PART_2NxnD:
2104                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2105                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2106                 break;
2107             case PART_nLx2N:
2108                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2109                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2110                 break;
2111             case PART_nRx2N:
2112                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2113                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2114                 break;
2115             case PART_NxN:
2116                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2117                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2118                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2119                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2120                 break;
2121             }
2122         }
2123
2124         if (!pcm_flag) {
2125             int rqt_root_cbf = 1;
2126
2127             if (lc->cu.pred_mode != MODE_INTRA &&
2128                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2129                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2130             }
2131             if (rqt_root_cbf) {
2132                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2133                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2134                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2135                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2136                                          log2_cb_size,
2137                                          log2_cb_size, 0, 0, 0, 0);
2138                 if (ret < 0)
2139                     return ret;
2140             } else {
2141                 if (!s->sh.disable_deblocking_filter_flag)
2142                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2143             }
2144         }
2145     }
2146
2147     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2148         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2149
2150     x = y_cb * min_cb_width + x_cb;
2151     for (y = 0; y < length; y++) {
2152         memset(&s->qp_y_tab[x], lc->qp_y, length);
2153         x += min_cb_width;
2154     }
2155
2156     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2157
2158     return 0;
2159 }
2160
2161 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2162                                int log2_cb_size, int cb_depth)
2163 {
2164     HEVCLocalContext *lc = &s->HEVClc;
2165     const int cb_size    = 1 << log2_cb_size;
2166     int split_cu;
2167
2168     lc->ct.depth = cb_depth;
2169     if (x0 + cb_size <= s->ps.sps->width  &&
2170         y0 + cb_size <= s->ps.sps->height &&
2171         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2172         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2173     } else {
2174         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2175     }
2176     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2177         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2178         lc->tu.is_cu_qp_delta_coded = 0;
2179         lc->tu.cu_qp_delta          = 0;
2180     }
2181
2182     if (split_cu) {
2183         const int cb_size_split = cb_size >> 1;
2184         const int x1 = x0 + cb_size_split;
2185         const int y1 = y0 + cb_size_split;
2186
2187         log2_cb_size--;
2188         cb_depth++;
2189
2190 #define SUBDIVIDE(x, y)                                                \
2191 do {                                                                   \
2192     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2193         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2194         if (ret < 0)                                                   \
2195             return ret;                                                \
2196     }                                                                  \
2197 } while (0)
2198
2199         SUBDIVIDE(x0, y0);
2200         SUBDIVIDE(x1, y0);
2201         SUBDIVIDE(x0, y1);
2202         SUBDIVIDE(x1, y1);
2203     } else {
2204         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2205         if (ret < 0)
2206             return ret;
2207     }
2208
2209     return 0;
2210 }
2211
2212 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2213                                  int ctb_addr_ts)
2214 {
2215     HEVCLocalContext *lc  = &s->HEVClc;
2216     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2217     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2218     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2219
2220     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2221
2222     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2223         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2224             lc->first_qp_group = 1;
2225         lc->end_of_tiles_x = s->ps.sps->width;
2226     } else if (s->ps.pps->tiles_enabled_flag) {
2227         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2228             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2229             lc->start_of_tiles_x = x_ctb;
2230             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2231             lc->first_qp_group   = 1;
2232         }
2233     } else {
2234         lc->end_of_tiles_x = s->ps.sps->width;
2235     }
2236
2237     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2238
2239     lc->boundary_flags = 0;
2240     if (s->ps.pps->tiles_enabled_flag) {
2241         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2242             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2243         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2244             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2245         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2246             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2247         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2248             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2249     } else {
2250         if (!ctb_addr_in_slice)
2251             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2252         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2253             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2254     }
2255
2256     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2257     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2258     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2259     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2260 }
2261
2262 static int hls_slice_data(HEVCContext *s)
2263 {
2264     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2265     int more_data   = 1;
2266     int x_ctb       = 0;
2267     int y_ctb       = 0;
2268     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2269     int ret;
2270
2271     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2272         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2273
2274         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2275         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2276         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2277
2278         ff_hevc_cabac_init(s, ctb_addr_ts);
2279
2280         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2281
2282         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2283         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2284         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2285
2286         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2287         if (ret < 0)
2288             return ret;
2289         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2290
2291         ctb_addr_ts++;
2292         ff_hevc_save_states(s, ctb_addr_ts);
2293         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2294     }
2295
2296     if (x_ctb + ctb_size >= s->ps.sps->width &&
2297         y_ctb + ctb_size >= s->ps.sps->height)
2298         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2299
2300     return ctb_addr_ts;
2301 }
2302
2303 static void restore_tqb_pixels(HEVCContext *s)
2304 {
2305     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2306     int x, y, c_idx;
2307
2308     for (c_idx = 0; c_idx < 3; c_idx++) {
2309         ptrdiff_t stride = s->frame->linesize[c_idx];
2310         int hshift       = s->ps.sps->hshift[c_idx];
2311         int vshift       = s->ps.sps->vshift[c_idx];
2312         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2313             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2314                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2315                     int n;
2316                     int len      = min_pu_size >> hshift;
2317                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2318                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2319                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2320                         memcpy(dst, src, len);
2321                         src += stride;
2322                         dst += stride;
2323                     }
2324                 }
2325             }
2326         }
2327     }
2328 }
2329
2330 static int set_side_data(HEVCContext *s)
2331 {
2332     AVFrame *out = s->ref->frame;
2333
2334     if (s->sei_frame_packing_present &&
2335         s->frame_packing_arrangement_type >= 3 &&
2336         s->frame_packing_arrangement_type <= 5 &&
2337         s->content_interpretation_type > 0 &&
2338         s->content_interpretation_type < 3) {
2339         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2340         if (!stereo)
2341             return AVERROR(ENOMEM);
2342
2343         switch (s->frame_packing_arrangement_type) {
2344         case 3:
2345             if (s->quincunx_subsampling)
2346                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2347             else
2348                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2349             break;
2350         case 4:
2351             stereo->type = AV_STEREO3D_TOPBOTTOM;
2352             break;
2353         case 5:
2354             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2355             break;
2356         }
2357
2358         if (s->content_interpretation_type == 2)
2359             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2360     }
2361
2362     if (s->sei_display_orientation_present &&
2363         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2364         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2365         AVFrameSideData *rotation = av_frame_new_side_data(out,
2366                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2367                                                            sizeof(int32_t) * 9);
2368         if (!rotation)
2369             return AVERROR(ENOMEM);
2370
2371         av_display_rotation_set((int32_t *)rotation->data, angle);
2372         av_display_matrix_flip((int32_t *)rotation->data,
2373                                s->sei_hflip, s->sei_vflip);
2374     }
2375
2376     return 0;
2377 }
2378
2379 static int hevc_frame_start(HEVCContext *s)
2380 {
2381     HEVCLocalContext *lc = &s->HEVClc;
2382     int ret;
2383
2384     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2385     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2386     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2387     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2388
2389     lc->start_of_tiles_x = 0;
2390     s->is_decoded        = 0;
2391     s->first_nal_type    = s->nal_unit_type;
2392
2393     if (s->ps.pps->tiles_enabled_flag)
2394         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2395
2396     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2397                               s->poc);
2398     if (ret < 0)
2399         goto fail;
2400
2401     ret = ff_hevc_frame_rps(s);
2402     if (ret < 0) {
2403         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2404         goto fail;
2405     }
2406
2407     s->ref->frame->key_frame = IS_IRAP(s);
2408
2409     ret = set_side_data(s);
2410     if (ret < 0)
2411         goto fail;
2412
2413     av_frame_unref(s->output_frame);
2414     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2415     if (ret < 0)
2416         goto fail;
2417
2418     ff_thread_finish_setup(s->avctx);
2419
2420     return 0;
2421
2422 fail:
2423     if (s->ref)
2424         ff_hevc_unref_frame(s, s->ref, ~0);
2425     s->ref = NULL;
2426     return ret;
2427 }
2428
2429 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2430 {
2431     HEVCLocalContext *lc = &s->HEVClc;
2432     GetBitContext *gb    = &lc->gb;
2433     int ctb_addr_ts, ret;
2434
2435     *gb              = nal->gb;
2436     s->nal_unit_type = nal->type;
2437     s->temporal_id   = nal->temporal_id;
2438
2439     switch (s->nal_unit_type) {
2440     case NAL_VPS:
2441         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2442         if (ret < 0)
2443             goto fail;
2444         break;
2445     case NAL_SPS:
2446         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2447                                      s->apply_defdispwin);
2448         if (ret < 0)
2449             goto fail;
2450         break;
2451     case NAL_PPS:
2452         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2453         if (ret < 0)
2454             goto fail;
2455         break;
2456     case NAL_SEI_PREFIX:
2457     case NAL_SEI_SUFFIX:
2458         ret = ff_hevc_decode_nal_sei(s);
2459         if (ret < 0)
2460             goto fail;
2461         break;
2462     case NAL_TRAIL_R:
2463     case NAL_TRAIL_N:
2464     case NAL_TSA_N:
2465     case NAL_TSA_R:
2466     case NAL_STSA_N:
2467     case NAL_STSA_R:
2468     case NAL_BLA_W_LP:
2469     case NAL_BLA_W_RADL:
2470     case NAL_BLA_N_LP:
2471     case NAL_IDR_W_RADL:
2472     case NAL_IDR_N_LP:
2473     case NAL_CRA_NUT:
2474     case NAL_RADL_N:
2475     case NAL_RADL_R:
2476     case NAL_RASL_N:
2477     case NAL_RASL_R:
2478         ret = hls_slice_header(s);
2479         if (ret < 0)
2480             return ret;
2481
2482         if (s->max_ra == INT_MAX) {
2483             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2484                 s->max_ra = s->poc;
2485             } else {
2486                 if (IS_IDR(s))
2487                     s->max_ra = INT_MIN;
2488             }
2489         }
2490
2491         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2492             s->poc <= s->max_ra) {
2493             s->is_decoded = 0;
2494             break;
2495         } else {
2496             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2497                 s->max_ra = INT_MIN;
2498         }
2499
2500         if (s->sh.first_slice_in_pic_flag) {
2501             ret = hevc_frame_start(s);
2502             if (ret < 0)
2503                 return ret;
2504         } else if (!s->ref) {
2505             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2506             goto fail;
2507         }
2508
2509         if (s->nal_unit_type != s->first_nal_type) {
2510             av_log(s->avctx, AV_LOG_ERROR,
2511                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2512                    s->first_nal_type, s->nal_unit_type);
2513             return AVERROR_INVALIDDATA;
2514         }
2515
2516         if (!s->sh.dependent_slice_segment_flag &&
2517             s->sh.slice_type != I_SLICE) {
2518             ret = ff_hevc_slice_rpl(s);
2519             if (ret < 0) {
2520                 av_log(s->avctx, AV_LOG_WARNING,
2521                        "Error constructing the reference lists for the current slice.\n");
2522                 goto fail;
2523             }
2524         }
2525
2526         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2527             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2528             if (ret < 0)
2529                 goto fail;
2530         }
2531
2532         if (s->avctx->hwaccel) {
2533             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2534             if (ret < 0)
2535                 goto fail;
2536         } else {
2537             ctb_addr_ts = hls_slice_data(s);
2538             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2539                 s->is_decoded = 1;
2540                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2541                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2542                     s->ps.sps->sao_enabled)
2543                     restore_tqb_pixels(s);
2544             }
2545
2546             if (ctb_addr_ts < 0) {
2547                 ret = ctb_addr_ts;
2548                 goto fail;
2549             }
2550         }
2551         break;
2552     case NAL_EOS_NUT:
2553     case NAL_EOB_NUT:
2554         s->seq_decode = (s->seq_decode + 1) & 0xff;
2555         s->max_ra     = INT_MAX;
2556         break;
2557     case NAL_AUD:
2558     case NAL_FD_NUT:
2559         break;
2560     default:
2561         av_log(s->avctx, AV_LOG_INFO,
2562                "Skipping NAL unit %d\n", s->nal_unit_type);
2563     }
2564
2565     return 0;
2566 fail:
2567     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2568         return ret;
2569     return 0;
2570 }
2571
2572 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2573 {
2574     int i, ret = 0;
2575
2576     s->ref = NULL;
2577     s->eos = 0;
2578
2579     /* split the input packet into NAL units, so we know the upper bound on the
2580      * number of slices in the frame */
2581     ret = ff_hevc_split_packet(&s->pkt, buf, length, s->avctx, s->is_nalff,
2582                                s->nal_length_size);
2583     if (ret < 0) {
2584         av_log(s->avctx, AV_LOG_ERROR,
2585                "Error splitting the input into NAL units.\n");
2586         return ret;
2587     }
2588
2589     for (i = 0; i < s->pkt.nb_nals; i++) {
2590         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2591             s->pkt.nals[i].type == NAL_EOS_NUT)
2592             s->eos = 1;
2593     }
2594
2595     /* decode the NAL units */
2596     for (i = 0; i < s->pkt.nb_nals; i++) {
2597         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2598         if (ret < 0) {
2599             av_log(s->avctx, AV_LOG_WARNING,
2600                    "Error parsing NAL unit #%d.\n", i);
2601             goto fail;
2602         }
2603     }
2604
2605 fail:
2606     if (s->ref)
2607         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2608
2609     return ret;
2610 }
2611
2612 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2613 {
2614     int i;
2615     for (i = 0; i < 16; i++)
2616         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2617 }
2618
2619 static int verify_md5(HEVCContext *s, AVFrame *frame)
2620 {
2621     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2622     int pixel_shift;
2623     int i, j;
2624
2625     if (!desc)
2626         return AVERROR(EINVAL);
2627
2628     pixel_shift = desc->comp[0].depth > 8;
2629
2630     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2631            s->poc);
2632
2633     /* the checksums are LE, so we have to byteswap for >8bpp formats
2634      * on BE arches */
2635 #if HAVE_BIGENDIAN
2636     if (pixel_shift && !s->checksum_buf) {
2637         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2638                        FFMAX3(frame->linesize[0], frame->linesize[1],
2639                               frame->linesize[2]));
2640         if (!s->checksum_buf)
2641             return AVERROR(ENOMEM);
2642     }
2643 #endif
2644
2645     for (i = 0; frame->data[i]; i++) {
2646         int width  = s->avctx->coded_width;
2647         int height = s->avctx->coded_height;
2648         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2649         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2650         uint8_t md5[16];
2651
2652         av_md5_init(s->md5_ctx);
2653         for (j = 0; j < h; j++) {
2654             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2655 #if HAVE_BIGENDIAN
2656             if (pixel_shift) {
2657                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2658                                     (const uint16_t *) src, w);
2659                 src = s->checksum_buf;
2660             }
2661 #endif
2662             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2663         }
2664         av_md5_final(s->md5_ctx, md5);
2665
2666         if (!memcmp(md5, s->md5[i], 16)) {
2667             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2668             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2669             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2670         } else {
2671             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2672             print_md5(s->avctx, AV_LOG_ERROR, md5);
2673             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2674             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2675             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2676             return AVERROR_INVALIDDATA;
2677         }
2678     }
2679
2680     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2681
2682     return 0;
2683 }
2684
2685 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2686                              AVPacket *avpkt)
2687 {
2688     int ret;
2689     HEVCContext *s = avctx->priv_data;
2690
2691     if (!avpkt->size) {
2692         ret = ff_hevc_output_frame(s, data, 1);
2693         if (ret < 0)
2694             return ret;
2695
2696         *got_output = ret;
2697         return 0;
2698     }
2699
2700     s->ref = NULL;
2701     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2702     if (ret < 0)
2703         return ret;
2704
2705     if (avctx->hwaccel) {
2706         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2707             av_log(avctx, AV_LOG_ERROR,
2708                    "hardware accelerator failed to decode picture\n");
2709     } else {
2710         /* verify the SEI checksum */
2711         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2712             s->is_md5) {
2713             ret = verify_md5(s, s->ref->frame);
2714             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2715                 ff_hevc_unref_frame(s, s->ref, ~0);
2716                 return ret;
2717             }
2718         }
2719     }
2720     s->is_md5 = 0;
2721
2722     if (s->is_decoded) {
2723         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2724         s->is_decoded = 0;
2725     }
2726
2727     if (s->output_frame->buf[0]) {
2728         av_frame_move_ref(data, s->output_frame);
2729         *got_output = 1;
2730     }
2731
2732     return avpkt->size;
2733 }
2734
2735 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2736 {
2737     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2738     if (ret < 0)
2739         return ret;
2740
2741     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2742     if (!dst->tab_mvf_buf)
2743         goto fail;
2744     dst->tab_mvf = src->tab_mvf;
2745
2746     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2747     if (!dst->rpl_tab_buf)
2748         goto fail;
2749     dst->rpl_tab = src->rpl_tab;
2750
2751     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2752     if (!dst->rpl_buf)
2753         goto fail;
2754
2755     dst->poc        = src->poc;
2756     dst->ctb_count  = src->ctb_count;
2757     dst->window     = src->window;
2758     dst->flags      = src->flags;
2759     dst->sequence   = src->sequence;
2760
2761     if (src->hwaccel_picture_private) {
2762         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2763         if (!dst->hwaccel_priv_buf)
2764             goto fail;
2765         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2766     }
2767
2768     return 0;
2769 fail:
2770     ff_hevc_unref_frame(s, dst, ~0);
2771     return AVERROR(ENOMEM);
2772 }
2773
2774 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2775 {
2776     HEVCContext       *s = avctx->priv_data;
2777     int i;
2778
2779     pic_arrays_free(s);
2780
2781     av_freep(&s->md5_ctx);
2782
2783     av_frame_free(&s->tmp_frame);
2784     av_frame_free(&s->output_frame);
2785
2786     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2787         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2788         av_frame_free(&s->DPB[i].frame);
2789     }
2790
2791     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2792         av_buffer_unref(&s->ps.vps_list[i]);
2793     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2794         av_buffer_unref(&s->ps.sps_list[i]);
2795     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2796         av_buffer_unref(&s->ps.pps_list[i]);
2797
2798     for (i = 0; i < s->pkt.nals_allocated; i++)
2799         av_freep(&s->pkt.nals[i].rbsp_buffer);
2800     av_freep(&s->pkt.nals);
2801     s->pkt.nals_allocated = 0;
2802
2803     return 0;
2804 }
2805
2806 static av_cold int hevc_init_context(AVCodecContext *avctx)
2807 {
2808     HEVCContext *s = avctx->priv_data;
2809     int i;
2810
2811     s->avctx = avctx;
2812
2813     s->tmp_frame = av_frame_alloc();
2814     if (!s->tmp_frame)
2815         goto fail;
2816
2817     s->output_frame = av_frame_alloc();
2818     if (!s->output_frame)
2819         goto fail;
2820
2821     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2822         s->DPB[i].frame = av_frame_alloc();
2823         if (!s->DPB[i].frame)
2824             goto fail;
2825         s->DPB[i].tf.f = s->DPB[i].frame;
2826     }
2827
2828     s->max_ra = INT_MAX;
2829
2830     s->md5_ctx = av_md5_alloc();
2831     if (!s->md5_ctx)
2832         goto fail;
2833
2834     ff_bswapdsp_init(&s->bdsp);
2835
2836     s->context_initialized = 1;
2837
2838     return 0;
2839
2840 fail:
2841     hevc_decode_free(avctx);
2842     return AVERROR(ENOMEM);
2843 }
2844
2845 static int hevc_update_thread_context(AVCodecContext *dst,
2846                                       const AVCodecContext *src)
2847 {
2848     HEVCContext *s  = dst->priv_data;
2849     HEVCContext *s0 = src->priv_data;
2850     int i, ret;
2851
2852     if (!s->context_initialized) {
2853         ret = hevc_init_context(dst);
2854         if (ret < 0)
2855             return ret;
2856     }
2857
2858     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2859         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2860         if (s0->DPB[i].frame->buf[0]) {
2861             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2862             if (ret < 0)
2863                 return ret;
2864         }
2865     }
2866
2867     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2868         av_buffer_unref(&s->ps.vps_list[i]);
2869         if (s0->ps.vps_list[i]) {
2870             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2871             if (!s->ps.vps_list[i])
2872                 return AVERROR(ENOMEM);
2873         }
2874     }
2875
2876     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2877         av_buffer_unref(&s->ps.sps_list[i]);
2878         if (s0->ps.sps_list[i]) {
2879             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2880             if (!s->ps.sps_list[i])
2881                 return AVERROR(ENOMEM);
2882         }
2883     }
2884
2885     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2886         av_buffer_unref(&s->ps.pps_list[i]);
2887         if (s0->ps.pps_list[i]) {
2888             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2889             if (!s->ps.pps_list[i])
2890                 return AVERROR(ENOMEM);
2891         }
2892     }
2893
2894     if (s->ps.sps != s0->ps.sps)
2895         ret = set_sps(s, s0->ps.sps);
2896
2897     s->seq_decode = s0->seq_decode;
2898     s->seq_output = s0->seq_output;
2899     s->pocTid0    = s0->pocTid0;
2900     s->max_ra     = s0->max_ra;
2901
2902     s->is_nalff        = s0->is_nalff;
2903     s->nal_length_size = s0->nal_length_size;
2904
2905     if (s0->eos) {
2906         s->seq_decode = (s->seq_decode + 1) & 0xff;
2907         s->max_ra = INT_MAX;
2908     }
2909
2910     return 0;
2911 }
2912
2913 static int hevc_decode_extradata(HEVCContext *s)
2914 {
2915     AVCodecContext *avctx = s->avctx;
2916     GetByteContext gb;
2917     int ret, i;
2918
2919     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2920
2921     if (avctx->extradata_size > 3 &&
2922         (avctx->extradata[0] || avctx->extradata[1] ||
2923          avctx->extradata[2] > 1)) {
2924         /* It seems the extradata is encoded as hvcC format.
2925          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2926          * is finalized. When finalized, configurationVersion will be 1 and we
2927          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2928         int i, j, num_arrays, nal_len_size;
2929
2930         s->is_nalff = 1;
2931
2932         bytestream2_skip(&gb, 21);
2933         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2934         num_arrays   = bytestream2_get_byte(&gb);
2935
2936         /* nal units in the hvcC always have length coded with 2 bytes,
2937          * so put a fake nal_length_size = 2 while parsing them */
2938         s->nal_length_size = 2;
2939
2940         /* Decode nal units from hvcC. */
2941         for (i = 0; i < num_arrays; i++) {
2942             int type = bytestream2_get_byte(&gb) & 0x3f;
2943             int cnt  = bytestream2_get_be16(&gb);
2944
2945             for (j = 0; j < cnt; j++) {
2946                 // +2 for the nal size field
2947                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2948                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2949                     av_log(s->avctx, AV_LOG_ERROR,
2950                            "Invalid NAL unit size in extradata.\n");
2951                     return AVERROR_INVALIDDATA;
2952                 }
2953
2954                 ret = decode_nal_units(s, gb.buffer, nalsize);
2955                 if (ret < 0) {
2956                     av_log(avctx, AV_LOG_ERROR,
2957                            "Decoding nal unit %d %d from hvcC failed\n",
2958                            type, i);
2959                     return ret;
2960                 }
2961                 bytestream2_skip(&gb, nalsize);
2962             }
2963         }
2964
2965         /* Now store right nal length size, that will be used to parse
2966          * all other nals */
2967         s->nal_length_size = nal_len_size;
2968     } else {
2969         s->is_nalff = 0;
2970         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
2971         if (ret < 0)
2972             return ret;
2973     }
2974
2975     /* export stream parameters from the first SPS */
2976     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2977         if (s->ps.sps_list[i]) {
2978             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2979             export_stream_params(s->avctx, &s->ps, sps);
2980             break;
2981         }
2982     }
2983
2984     return 0;
2985 }
2986
2987 static av_cold int hevc_decode_init(AVCodecContext *avctx)
2988 {
2989     HEVCContext *s = avctx->priv_data;
2990     int ret;
2991
2992     avctx->internal->allocate_progress = 1;
2993
2994     ret = hevc_init_context(avctx);
2995     if (ret < 0)
2996         return ret;
2997
2998     if (avctx->extradata_size > 0 && avctx->extradata) {
2999         ret = hevc_decode_extradata(s);
3000         if (ret < 0) {
3001             hevc_decode_free(avctx);
3002             return ret;
3003         }
3004     }
3005
3006     return 0;
3007 }
3008
3009 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3010 {
3011     HEVCContext *s = avctx->priv_data;
3012     int ret;
3013
3014     memset(s, 0, sizeof(*s));
3015
3016     ret = hevc_init_context(avctx);
3017     if (ret < 0)
3018         return ret;
3019
3020     return 0;
3021 }
3022
3023 static void hevc_decode_flush(AVCodecContext *avctx)
3024 {
3025     HEVCContext *s = avctx->priv_data;
3026     ff_hevc_flush_dpb(s);
3027     s->max_ra = INT_MAX;
3028 }
3029
3030 #define OFFSET(x) offsetof(HEVCContext, x)
3031 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3032
3033 static const AVProfile profiles[] = {
3034     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3035     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3036     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3037     { FF_PROFILE_UNKNOWN },
3038 };
3039
3040 static const AVOption options[] = {
3041     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3042         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3043     { NULL },
3044 };
3045
3046 static const AVClass hevc_decoder_class = {
3047     .class_name = "HEVC decoder",
3048     .item_name  = av_default_item_name,
3049     .option     = options,
3050     .version    = LIBAVUTIL_VERSION_INT,
3051 };
3052
3053 AVCodec ff_hevc_decoder = {
3054     .name                  = "hevc",
3055     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3056     .type                  = AVMEDIA_TYPE_VIDEO,
3057     .id                    = AV_CODEC_ID_HEVC,
3058     .priv_data_size        = sizeof(HEVCContext),
3059     .priv_class            = &hevc_decoder_class,
3060     .init                  = hevc_decode_init,
3061     .close                 = hevc_decode_free,
3062     .decode                = hevc_decode_frame,
3063     .flush                 = hevc_decode_flush,
3064     .update_thread_context = hevc_update_thread_context,
3065     .init_thread_copy      = hevc_init_thread_copy,
3066     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3067                              AV_CODEC_CAP_FRAME_THREADS,
3068     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3069 };