]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
avcodec/hevc: add "apply_defdispwin" alias for compatibilty with 064698d381e1e7790f21...
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33
34 #include "bytestream.h"
35 #include "cabac_functions.h"
36 #include "dsputil.h"
37 #include "golomb.h"
38 #include "hevc.h"
39
40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
43
44 /**
45  * NOTE: Each function hls_foo correspond to the function foo in the
46  * specification (HLS stands for High Level Syntax).
47  */
48
49 /**
50  * Section 5.7
51  */
52
53 /* free everything allocated  by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
55 {
56     av_freep(&s->sao);
57     av_freep(&s->deblock);
58     av_freep(&s->split_cu_flag);
59
60     av_freep(&s->skip_flag);
61     av_freep(&s->tab_ct_depth);
62
63     av_freep(&s->tab_ipm);
64     av_freep(&s->cbf_luma);
65     av_freep(&s->is_pcm);
66
67     av_freep(&s->qp_y_tab);
68     av_freep(&s->tab_slice_address);
69     av_freep(&s->filter_slice_edges);
70
71     av_freep(&s->horizontal_bs);
72     av_freep(&s->vertical_bs);
73
74     av_freep(&s->sh.entry_point_offset);
75     av_freep(&s->sh.size);
76     av_freep(&s->sh.offset);
77
78     av_buffer_pool_uninit(&s->tab_mvf_pool);
79     av_buffer_pool_uninit(&s->rpl_tab_pool);
80 }
81
82 /* allocate arrays that depend on frame dimensions */
83 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 {
85     int log2_min_cb_size = sps->log2_min_cb_size;
86     int width            = sps->width;
87     int height           = sps->height;
88     int pic_size         = width * height;
89     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
90                            ((height >> log2_min_cb_size) + 1);
91     int ctb_count        = sps->ctb_width * sps->ctb_height;
92     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
93
94     s->bs_width  = width  >> 3;
95     s->bs_height = height >> 3;
96
97     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
98     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
99     s->split_cu_flag = av_malloc(pic_size);
100     if (!s->sao || !s->deblock || !s->split_cu_flag)
101         goto fail;
102
103     s->skip_flag    = av_malloc(pic_size_in_ctb);
104     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
105     if (!s->skip_flag || !s->tab_ct_depth)
106         goto fail;
107
108     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
109     s->tab_ipm  = av_malloc(min_pu_size);
110     s->is_pcm   = av_malloc(min_pu_size);
111     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
112         goto fail;
113
114     s->filter_slice_edges = av_malloc(ctb_count);
115     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
116                                       sizeof(*s->tab_slice_address));
117     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
118                                       sizeof(*s->qp_y_tab));
119     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
120         goto fail;
121
122     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
123     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
124     if (!s->horizontal_bs || !s->vertical_bs)
125         goto fail;
126
127     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
128                                           av_buffer_alloc);
129     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
130                                           av_buffer_allocz);
131     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
132         goto fail;
133
134     return 0;
135
136 fail:
137     pic_arrays_free(s);
138     return AVERROR(ENOMEM);
139 }
140
141 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 {
143     int i = 0;
144     int j = 0;
145     uint8_t luma_weight_l0_flag[16];
146     uint8_t chroma_weight_l0_flag[16];
147     uint8_t luma_weight_l1_flag[16];
148     uint8_t chroma_weight_l1_flag[16];
149
150     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
151     if (s->sps->chroma_format_idc != 0) {
152         int delta = get_se_golomb(gb);
153         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
154     }
155
156     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
157         luma_weight_l0_flag[i] = get_bits1(gb);
158         if (!luma_weight_l0_flag[i]) {
159             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
160             s->sh.luma_offset_l0[i] = 0;
161         }
162     }
163     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
164         for (i = 0; i < s->sh.nb_refs[L0]; i++)
165             chroma_weight_l0_flag[i] = get_bits1(gb);
166     } else {
167         for (i = 0; i < s->sh.nb_refs[L0]; i++)
168             chroma_weight_l0_flag[i] = 0;
169     }
170     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
171         if (luma_weight_l0_flag[i]) {
172             int delta_luma_weight_l0 = get_se_golomb(gb);
173             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
174             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
175         }
176         if (chroma_weight_l0_flag[i]) {
177             for (j = 0; j < 2; j++) {
178                 int delta_chroma_weight_l0 = get_se_golomb(gb);
179                 int delta_chroma_offset_l0 = get_se_golomb(gb);
180                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
181                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
182                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
183             }
184         } else {
185             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
186             s->sh.chroma_offset_l0[i][0] = 0;
187             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
188             s->sh.chroma_offset_l0[i][1] = 0;
189         }
190     }
191     if (s->sh.slice_type == B_SLICE) {
192         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
193             luma_weight_l1_flag[i] = get_bits1(gb);
194             if (!luma_weight_l1_flag[i]) {
195                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
196                 s->sh.luma_offset_l1[i] = 0;
197             }
198         }
199         if (s->sps->chroma_format_idc != 0) {
200             for (i = 0; i < s->sh.nb_refs[L1]; i++)
201                 chroma_weight_l1_flag[i] = get_bits1(gb);
202         } else {
203             for (i = 0; i < s->sh.nb_refs[L1]; i++)
204                 chroma_weight_l1_flag[i] = 0;
205         }
206         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
207             if (luma_weight_l1_flag[i]) {
208                 int delta_luma_weight_l1 = get_se_golomb(gb);
209                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
210                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
211             }
212             if (chroma_weight_l1_flag[i]) {
213                 for (j = 0; j < 2; j++) {
214                     int delta_chroma_weight_l1 = get_se_golomb(gb);
215                     int delta_chroma_offset_l1 = get_se_golomb(gb);
216                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
217                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
218                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
219                 }
220             } else {
221                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
222                 s->sh.chroma_offset_l1[i][0] = 0;
223                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
224                 s->sh.chroma_offset_l1[i][1] = 0;
225             }
226         }
227     }
228 }
229
230 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
231 {
232     const HEVCSPS *sps = s->sps;
233     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
234     int prev_delta_msb = 0;
235     int nb_sps = 0, nb_sh;
236     int i;
237
238     rps->nb_refs = 0;
239     if (!sps->long_term_ref_pics_present_flag)
240         return 0;
241
242     if (sps->num_long_term_ref_pics_sps > 0)
243         nb_sps = get_ue_golomb_long(gb);
244     nb_sh = get_ue_golomb_long(gb);
245
246     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
247         return AVERROR_INVALIDDATA;
248
249     rps->nb_refs = nb_sh + nb_sps;
250
251     for (i = 0; i < rps->nb_refs; i++) {
252         uint8_t delta_poc_msb_present;
253
254         if (i < nb_sps) {
255             uint8_t lt_idx_sps = 0;
256
257             if (sps->num_long_term_ref_pics_sps > 1)
258                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
259
260             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
261             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
262         } else {
263             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
264             rps->used[i] = get_bits1(gb);
265         }
266
267         delta_poc_msb_present = get_bits1(gb);
268         if (delta_poc_msb_present) {
269             int delta = get_ue_golomb_long(gb);
270
271             if (i && i != nb_sps)
272                 delta += prev_delta_msb;
273
274             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
275             prev_delta_msb = delta;
276         }
277     }
278
279     return 0;
280 }
281
282 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
283 {
284     int ret;
285
286     pic_arrays_free(s);
287     ret = pic_arrays_init(s, sps);
288     if (ret < 0)
289         goto fail;
290
291     s->avctx->coded_width         = sps->width;
292     s->avctx->coded_height        = sps->height;
293     s->avctx->width               = sps->output_width;
294     s->avctx->height              = sps->output_height;
295     s->avctx->pix_fmt             = sps->pix_fmt;
296     s->avctx->sample_aspect_ratio = sps->vui.sar;
297     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
298
299     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
300     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
301     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
302
303     if (sps->sao_enabled) {
304         av_frame_unref(s->tmp_frame);
305         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
306         if (ret < 0)
307             goto fail;
308         s->frame = s->tmp_frame;
309     }
310
311     s->sps = sps;
312     s->vps = s->vps_list[s->sps->vps_id];
313     return 0;
314
315 fail:
316     pic_arrays_free(s);
317     s->sps = NULL;
318     return ret;
319 }
320
321 static int hls_slice_header(HEVCContext *s)
322 {
323     GetBitContext *gb = &s->HEVClc->gb;
324     SliceHeader *sh   = &s->sh;
325     int i, j, ret;
326
327     // Coded parameters
328     sh->first_slice_in_pic_flag = get_bits1(gb);
329     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
330         s->seq_decode = (s->seq_decode + 1) & 0xff;
331         s->max_ra     = INT_MAX;
332         if (IS_IDR(s))
333             ff_hevc_clear_refs(s);
334     }
335     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
336         sh->no_output_of_prior_pics_flag = get_bits1(gb);
337
338     sh->pps_id = get_ue_golomb_long(gb);
339     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
340         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
341         return AVERROR_INVALIDDATA;
342     }
343     if (!sh->first_slice_in_pic_flag &&
344         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
345         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
346         return AVERROR_INVALIDDATA;
347     }
348     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
349
350     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
351         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
352
353         ff_hevc_clear_refs(s);
354         ret = set_sps(s, s->sps);
355         if (ret < 0)
356             return ret;
357
358         s->seq_decode = (s->seq_decode + 1) & 0xff;
359         s->max_ra     = INT_MAX;
360     }
361
362     sh->dependent_slice_segment_flag = 0;
363     if (!sh->first_slice_in_pic_flag) {
364         int slice_address_length;
365
366         if (s->pps->dependent_slice_segments_enabled_flag)
367             sh->dependent_slice_segment_flag = get_bits1(gb);
368
369         slice_address_length = av_ceil_log2(s->sps->ctb_width *
370                                             s->sps->ctb_height);
371         sh->slice_segment_addr = get_bits(gb, slice_address_length);
372         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
373             av_log(s->avctx, AV_LOG_ERROR,
374                    "Invalid slice segment address: %u.\n",
375                    sh->slice_segment_addr);
376             return AVERROR_INVALIDDATA;
377         }
378
379         if (!sh->dependent_slice_segment_flag) {
380             sh->slice_addr = sh->slice_segment_addr;
381             s->slice_idx++;
382         }
383     } else {
384         sh->slice_segment_addr = sh->slice_addr = 0;
385         s->slice_idx           = 0;
386         s->slice_initialized   = 0;
387     }
388
389     if (!sh->dependent_slice_segment_flag) {
390         s->slice_initialized = 0;
391
392         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
393             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
394
395         sh->slice_type = get_ue_golomb_long(gb);
396         if (!(sh->slice_type == I_SLICE ||
397               sh->slice_type == P_SLICE ||
398               sh->slice_type == B_SLICE)) {
399             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
400                    sh->slice_type);
401             return AVERROR_INVALIDDATA;
402         }
403         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
404             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
405             return AVERROR_INVALIDDATA;
406         }
407
408         if (s->pps->output_flag_present_flag)
409             sh->pic_output_flag = get_bits1(gb);
410
411         if (s->sps->separate_colour_plane_flag)
412             sh->colour_plane_id = get_bits(gb, 2);
413
414         if (!IS_IDR(s)) {
415             int short_term_ref_pic_set_sps_flag, poc;
416
417             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
418             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
419             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
420                 av_log(s->avctx, AV_LOG_WARNING,
421                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
422                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
423                     return AVERROR_INVALIDDATA;
424                 poc = s->poc;
425             }
426             s->poc = poc;
427
428             short_term_ref_pic_set_sps_flag = get_bits1(gb);
429             if (!short_term_ref_pic_set_sps_flag) {
430                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
431                 if (ret < 0)
432                     return ret;
433
434                 sh->short_term_rps = &sh->slice_rps;
435             } else {
436                 int numbits, rps_idx;
437
438                 if (!s->sps->nb_st_rps) {
439                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
440                     return AVERROR_INVALIDDATA;
441                 }
442
443                 numbits = av_ceil_log2(s->sps->nb_st_rps);
444                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
445                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
446             }
447
448             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
449             if (ret < 0) {
450                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
451                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
452                     return AVERROR_INVALIDDATA;
453             }
454
455             if (s->sps->sps_temporal_mvp_enabled_flag)
456                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
457             else
458                 sh->slice_temporal_mvp_enabled_flag = 0;
459         } else {
460             s->sh.short_term_rps = NULL;
461             s->poc               = 0;
462         }
463
464         /* 8.3.1 */
465         if (s->temporal_id == 0 &&
466             s->nal_unit_type != NAL_TRAIL_N &&
467             s->nal_unit_type != NAL_TSA_N   &&
468             s->nal_unit_type != NAL_STSA_N  &&
469             s->nal_unit_type != NAL_RADL_N  &&
470             s->nal_unit_type != NAL_RADL_R  &&
471             s->nal_unit_type != NAL_RASL_N  &&
472             s->nal_unit_type != NAL_RASL_R)
473             s->pocTid0 = s->poc;
474
475         if (s->sps->sao_enabled) {
476             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
477             sh->slice_sample_adaptive_offset_flag[1] =
478             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
479         } else {
480             sh->slice_sample_adaptive_offset_flag[0] = 0;
481             sh->slice_sample_adaptive_offset_flag[1] = 0;
482             sh->slice_sample_adaptive_offset_flag[2] = 0;
483         }
484
485         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
486         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
487             int nb_refs;
488
489             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
490             if (sh->slice_type == B_SLICE)
491                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
492
493             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
494                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
495                 if (sh->slice_type == B_SLICE)
496                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
497             }
498             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
499                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
500                        sh->nb_refs[L0], sh->nb_refs[L1]);
501                 return AVERROR_INVALIDDATA;
502             }
503
504             sh->rpl_modification_flag[0] = 0;
505             sh->rpl_modification_flag[1] = 0;
506             nb_refs = ff_hevc_frame_nb_refs(s);
507             if (!nb_refs) {
508                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
509                 return AVERROR_INVALIDDATA;
510             }
511
512             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
513                 sh->rpl_modification_flag[0] = get_bits1(gb);
514                 if (sh->rpl_modification_flag[0]) {
515                     for (i = 0; i < sh->nb_refs[L0]; i++)
516                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
517                 }
518
519                 if (sh->slice_type == B_SLICE) {
520                     sh->rpl_modification_flag[1] = get_bits1(gb);
521                     if (sh->rpl_modification_flag[1] == 1)
522                         for (i = 0; i < sh->nb_refs[L1]; i++)
523                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
524                 }
525             }
526
527             if (sh->slice_type == B_SLICE)
528                 sh->mvd_l1_zero_flag = get_bits1(gb);
529
530             if (s->pps->cabac_init_present_flag)
531                 sh->cabac_init_flag = get_bits1(gb);
532             else
533                 sh->cabac_init_flag = 0;
534
535             sh->collocated_ref_idx = 0;
536             if (sh->slice_temporal_mvp_enabled_flag) {
537                 sh->collocated_list = L0;
538                 if (sh->slice_type == B_SLICE)
539                     sh->collocated_list = !get_bits1(gb);
540
541                 if (sh->nb_refs[sh->collocated_list] > 1) {
542                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
543                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
544                         av_log(s->avctx, AV_LOG_ERROR,
545                                "Invalid collocated_ref_idx: %d.\n",
546                                sh->collocated_ref_idx);
547                         return AVERROR_INVALIDDATA;
548                     }
549                 }
550             }
551
552             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
553                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
554                 pred_weight_table(s, gb);
555             }
556
557             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
558             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
559                 av_log(s->avctx, AV_LOG_ERROR,
560                        "Invalid number of merging MVP candidates: %d.\n",
561                        sh->max_num_merge_cand);
562                 return AVERROR_INVALIDDATA;
563             }
564         }
565
566         sh->slice_qp_delta = get_se_golomb(gb);
567         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
568             sh->slice_cb_qp_offset = get_se_golomb(gb);
569             sh->slice_cr_qp_offset = get_se_golomb(gb);
570         } else {
571             sh->slice_cb_qp_offset = 0;
572             sh->slice_cr_qp_offset = 0;
573         }
574
575         if (s->pps->deblocking_filter_control_present_flag) {
576             int deblocking_filter_override_flag = 0;
577
578             if (s->pps->deblocking_filter_override_enabled_flag)
579                 deblocking_filter_override_flag = get_bits1(gb);
580
581             if (deblocking_filter_override_flag) {
582                 sh->disable_deblocking_filter_flag = get_bits1(gb);
583                 if (!sh->disable_deblocking_filter_flag) {
584                     sh->beta_offset = get_se_golomb(gb) * 2;
585                     sh->tc_offset   = get_se_golomb(gb) * 2;
586                 }
587             } else {
588                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
589                 sh->beta_offset                    = s->pps->beta_offset;
590                 sh->tc_offset                      = s->pps->tc_offset;
591             }
592         } else {
593             sh->disable_deblocking_filter_flag = 0;
594             sh->beta_offset                    = 0;
595             sh->tc_offset                      = 0;
596         }
597
598         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
599             (sh->slice_sample_adaptive_offset_flag[0] ||
600              sh->slice_sample_adaptive_offset_flag[1] ||
601              !sh->disable_deblocking_filter_flag)) {
602             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
603         } else {
604             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
605         }
606     } else if (!s->slice_initialized) {
607         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
608         return AVERROR_INVALIDDATA;
609     }
610
611     sh->num_entry_point_offsets = 0;
612     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
613         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
614         if (sh->num_entry_point_offsets > 0) {
615             int offset_len = get_ue_golomb_long(gb) + 1;
616             int segments = offset_len >> 4;
617             int rest = (offset_len & 15);
618             av_freep(&sh->entry_point_offset);
619             av_freep(&sh->offset);
620             av_freep(&sh->size);
621             sh->entry_point_offset = av_malloc(sh->num_entry_point_offsets * sizeof(int));
622             sh->offset = av_malloc(sh->num_entry_point_offsets * sizeof(int));
623             sh->size = av_malloc(sh->num_entry_point_offsets * sizeof(int));
624             for (i = 0; i < sh->num_entry_point_offsets; i++) {
625                 int val = 0;
626                 for (j = 0; j < segments; j++) {
627                     val <<= 16;
628                     val += get_bits(gb, 16);
629                 }
630                 if (rest) {
631                     val <<= rest;
632                     val += get_bits(gb, rest);
633                 }
634                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
635             }
636             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
637                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
638                 s->threads_number = 1;
639             } else
640                 s->enable_parallel_tiles = 0;
641         } else
642             s->enable_parallel_tiles = 0;
643     }
644
645     if (s->pps->slice_header_extension_present_flag) {
646         int length = get_ue_golomb_long(gb);
647         for (i = 0; i < length; i++)
648             skip_bits(gb, 8);  // slice_header_extension_data_byte
649     }
650
651     // Inferred parameters
652     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
653     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
654
655     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
656
657     if (!s->pps->cu_qp_delta_enabled_flag)
658         s->HEVClc->qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
659                           (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
660
661     s->slice_initialized = 1;
662
663     return 0;
664 }
665
666 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
667
668 #define SET_SAO(elem, value)                            \
669 do {                                                    \
670     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
671         sao->elem = value;                              \
672     else if (sao_merge_left_flag)                       \
673         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
674     else if (sao_merge_up_flag)                         \
675         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
676     else                                                \
677         sao->elem = 0;                                  \
678 } while (0)
679
680 static void hls_sao_param(HEVCContext *s, int rx, int ry)
681 {
682     HEVCLocalContext *lc    = s->HEVClc;
683     int sao_merge_left_flag = 0;
684     int sao_merge_up_flag   = 0;
685     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
686     SAOParams *sao          = &CTB(s->sao, rx, ry);
687     int c_idx, i;
688
689     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
690         s->sh.slice_sample_adaptive_offset_flag[1]) {
691         if (rx > 0) {
692             if (lc->ctb_left_flag)
693                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
694         }
695         if (ry > 0 && !sao_merge_left_flag) {
696             if (lc->ctb_up_flag)
697                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
698         }
699     }
700
701     for (c_idx = 0; c_idx < 3; c_idx++) {
702         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
703             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
704             continue;
705         }
706
707         if (c_idx == 2) {
708             sao->type_idx[2] = sao->type_idx[1];
709             sao->eo_class[2] = sao->eo_class[1];
710         } else {
711             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
712         }
713
714         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
715             continue;
716
717         for (i = 0; i < 4; i++)
718             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
719
720         if (sao->type_idx[c_idx] == SAO_BAND) {
721             for (i = 0; i < 4; i++) {
722                 if (sao->offset_abs[c_idx][i]) {
723                     SET_SAO(offset_sign[c_idx][i],
724                             ff_hevc_sao_offset_sign_decode(s));
725                 } else {
726                     sao->offset_sign[c_idx][i] = 0;
727                 }
728             }
729             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
730         } else if (c_idx != 2) {
731             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
732         }
733
734         // Inferred parameters
735         sao->offset_val[c_idx][0] = 0;
736         for (i = 0; i < 4; i++) {
737             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
738             if (sao->type_idx[c_idx] == SAO_EDGE) {
739                 if (i > 1)
740                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
741             } else if (sao->offset_sign[c_idx][i]) {
742                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
743             }
744         }
745     }
746 }
747
748 #undef SET_SAO
749 #undef CTB
750
751 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
752                                int xBase, int yBase, int cb_xBase, int cb_yBase,
753                                int log2_cb_size, int log2_trafo_size,
754                                int trafo_depth, int blk_idx)
755 {
756     HEVCLocalContext *lc = s->HEVClc;
757
758     if (lc->cu.pred_mode == MODE_INTRA) {
759         int trafo_size = 1 << log2_trafo_size;
760         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
761
762         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
763         if (log2_trafo_size > 2) {
764             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
765             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
766             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
767             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
768         } else if (blk_idx == 3) {
769             trafo_size = trafo_size << s->sps->hshift[1];
770             ff_hevc_set_neighbour_available(s, xBase, yBase,
771                                             trafo_size, trafo_size);
772             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
773             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
774         }
775     }
776
777     if (lc->tt.cbf_luma ||
778         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
779         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
780         int scan_idx   = SCAN_DIAG;
781         int scan_idx_c = SCAN_DIAG;
782
783         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
784             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
785             if (lc->tu.cu_qp_delta != 0)
786                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
787                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
788             lc->tu.is_cu_qp_delta_coded = 1;
789             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
790         }
791
792         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
793             if (lc->tu.cur_intra_pred_mode >= 6 &&
794                 lc->tu.cur_intra_pred_mode <= 14) {
795                 scan_idx = SCAN_VERT;
796             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
797                        lc->tu.cur_intra_pred_mode <= 30) {
798                 scan_idx = SCAN_HORIZ;
799             }
800
801             if (lc->pu.intra_pred_mode_c >=  6 &&
802                 lc->pu.intra_pred_mode_c <= 14) {
803                 scan_idx_c = SCAN_VERT;
804             } else if (lc->pu.intra_pred_mode_c >= 22 &&
805                        lc->pu.intra_pred_mode_c <= 30) {
806                 scan_idx_c = SCAN_HORIZ;
807             }
808         }
809
810         if (lc->tt.cbf_luma)
811             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
812         if (log2_trafo_size > 2) {
813             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
814                 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
815             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
816                 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
817         } else if (blk_idx == 3) {
818             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
819                 ff_hevc_hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
820             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
821                 ff_hevc_hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
822         }
823     }
824 }
825
826 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
827 {
828     int cb_size          = 1 << log2_cb_size;
829     int log2_min_pu_size = s->sps->log2_min_pu_size;
830
831     int min_pu_width     = s->sps->min_pu_width;
832     int x_end = FFMIN(x0 + cb_size, s->sps->width);
833     int y_end = FFMIN(y0 + cb_size, s->sps->height);
834     int i, j;
835
836     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
837         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
838             s->is_pcm[i + j * min_pu_width] = 2;
839 }
840
841 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
842                                int xBase, int yBase, int cb_xBase, int cb_yBase,
843                                int log2_cb_size, int log2_trafo_size,
844                                int trafo_depth, int blk_idx)
845 {
846     HEVCLocalContext *lc = s->HEVClc;
847     uint8_t split_transform_flag;
848
849     if (trafo_depth > 0 && log2_trafo_size == 2) {
850         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
851             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
852         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
853             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
854     } else {
855         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
856         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
857     }
858
859     if (lc->cu.intra_split_flag) {
860         if (trafo_depth == 1)
861             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
862     } else {
863         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
864     }
865
866     lc->tt.cbf_luma = 1;
867
868     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
869                               lc->cu.pred_mode == MODE_INTER &&
870                               lc->cu.part_mode != PART_2Nx2N &&
871                               trafo_depth == 0;
872
873     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
874         log2_trafo_size >  s->sps->log2_min_tb_size    &&
875         trafo_depth     < lc->cu.max_trafo_depth       &&
876         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
877         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
878     } else {
879         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
880                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
881                                lc->tt.inter_split_flag;
882     }
883
884     if (log2_trafo_size > 2) {
885         if (trafo_depth == 0 ||
886             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
887             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
888                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
889         }
890
891         if (trafo_depth == 0 ||
892             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
893             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
894                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
895         }
896     }
897
898     if (split_transform_flag) {
899         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
900         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
901
902         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
903                            log2_trafo_size - 1, trafo_depth + 1, 0);
904         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
905                            log2_trafo_size - 1, trafo_depth + 1, 1);
906         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
907                            log2_trafo_size - 1, trafo_depth + 1, 2);
908         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
909                            log2_trafo_size - 1, trafo_depth + 1, 3);
910     } else {
911         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
912         int log2_min_tu_size = s->sps->log2_min_tb_size;
913         int min_tu_width     = s->sps->min_tb_width;
914
915         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
916             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
917             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
918             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
919         }
920
921         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
922                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
923
924         // TODO: store cbf_luma somewhere else
925         if (lc->tt.cbf_luma) {
926             int i, j;
927             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
928                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
929                     int x_tu = (x0 + j) >> log2_min_tu_size;
930                     int y_tu = (y0 + i) >> log2_min_tu_size;
931                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
932                 }
933         }
934         if (!s->sh.disable_deblocking_filter_flag) {
935             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
936                                                   lc->slice_or_tiles_up_boundary,
937                                                   lc->slice_or_tiles_left_boundary);
938             if (s->pps->transquant_bypass_enable_flag &&
939                 lc->cu.cu_transquant_bypass_flag)
940                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
941         }
942     }
943 }
944
945 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
946 {
947     //TODO: non-4:2:0 support
948     HEVCLocalContext *lc = s->HEVClc;
949     GetBitContext gb;
950     int cb_size   = 1 << log2_cb_size;
951     int stride0   = s->frame->linesize[0];
952     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
953     int   stride1 = s->frame->linesize[1];
954     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
955     int   stride2 = s->frame->linesize[2];
956     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
957
958     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth;
959     const uint8_t *pcm = skip_bytes(&s->HEVClc->cc, (length + 7) >> 3);
960     int ret;
961
962     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
963                                           lc->slice_or_tiles_up_boundary,
964                                           lc->slice_or_tiles_left_boundary);
965
966     ret = init_get_bits(&gb, pcm, length);
967     if (ret < 0)
968         return ret;
969
970     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
971     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
972     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
973     return 0;
974 }
975
976 /**
977  * 8.5.3.2.2.1 Luma sample interpolation process
978  *
979  * @param s HEVC decoding context
980  * @param dst target buffer for block data at block position
981  * @param dststride stride of the dst buffer
982  * @param ref reference picture buffer at origin (0, 0)
983  * @param mv motion vector (relative to block position) to get pixel data from
984  * @param x_off horizontal position of block from origin (0, 0)
985  * @param y_off vertical position of block from origin (0, 0)
986  * @param block_w width of block
987  * @param block_h height of block
988  */
989 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
990                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
991                     int block_w, int block_h)
992 {
993     HEVCLocalContext *lc = s->HEVClc;
994     uint8_t *src         = ref->data[0];
995     ptrdiff_t srcstride  = ref->linesize[0];
996     int pic_width        = s->sps->width;
997     int pic_height       = s->sps->height;
998
999     int mx         = mv->x & 3;
1000     int my         = mv->y & 3;
1001     int extra_left = ff_hevc_qpel_extra_before[mx];
1002     int extra_top  = ff_hevc_qpel_extra_before[my];
1003
1004     x_off += mv->x >> 2;
1005     y_off += mv->y >> 2;
1006     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1007
1008     if (x_off < extra_left || y_off < extra_top ||
1009         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1010         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1011         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1012
1013         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, srcstride, src - offset, srcstride,
1014                                  block_w + ff_hevc_qpel_extra[mx],
1015                                  block_h + ff_hevc_qpel_extra[my],
1016                                  x_off - extra_left, y_off - extra_top,
1017                                  pic_width, pic_height);
1018         src = lc->edge_emu_buffer + offset;
1019     }
1020     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1021                                      block_h, lc->mc_buffer);
1022 }
1023
1024 /**
1025  * 8.5.3.2.2.2 Chroma sample interpolation process
1026  *
1027  * @param s HEVC decoding context
1028  * @param dst1 target buffer for block data at block position (U plane)
1029  * @param dst2 target buffer for block data at block position (V plane)
1030  * @param dststride stride of the dst1 and dst2 buffers
1031  * @param ref reference picture buffer at origin (0, 0)
1032  * @param mv motion vector (relative to block position) to get pixel data from
1033  * @param x_off horizontal position of block from origin (0, 0)
1034  * @param y_off vertical position of block from origin (0, 0)
1035  * @param block_w width of block
1036  * @param block_h height of block
1037  */
1038 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1039                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1040                       int x_off, int y_off, int block_w, int block_h)
1041 {
1042     HEVCLocalContext *lc = s->HEVClc;
1043     uint8_t *src1        = ref->data[1];
1044     uint8_t *src2        = ref->data[2];
1045     ptrdiff_t src1stride = ref->linesize[1];
1046     ptrdiff_t src2stride = ref->linesize[2];
1047     int pic_width        = s->sps->width >> 1;
1048     int pic_height       = s->sps->height >> 1;
1049
1050     int mx = mv->x & 7;
1051     int my = mv->y & 7;
1052
1053     x_off += mv->x >> 3;
1054     y_off += mv->y >> 3;
1055     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1056     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1057
1058     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1059         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1060         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1061         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1062         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1063
1064         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1stride, src1 - offset1, src1stride,
1065                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1066                                  x_off - EPEL_EXTRA_BEFORE,
1067                                  y_off - EPEL_EXTRA_BEFORE,
1068                                  pic_width, pic_height);
1069
1070         src1 = lc->edge_emu_buffer + offset1;
1071         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1072                                              block_w, block_h, mx, my, lc->mc_buffer);
1073
1074         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2stride, src2 - offset2, src2stride,
1075                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1076                                  x_off - EPEL_EXTRA_BEFORE,
1077                                  y_off - EPEL_EXTRA_BEFORE,
1078                                  pic_width, pic_height);
1079         src2 = lc->edge_emu_buffer + offset2;
1080         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1081                                              block_w, block_h, mx, my,
1082                                              lc->mc_buffer);
1083     } else {
1084         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1085                                              block_w, block_h, mx, my,
1086                                              lc->mc_buffer);
1087         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1088                                              block_w, block_h, mx, my,
1089                                              lc->mc_buffer);
1090     }
1091 }
1092
1093 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1094                                 const Mv *mv, int y0, int height)
1095 {
1096     int y = (mv->y >> 2) + y0 + height + 9;
1097
1098     if (s->threads_type == FF_THREAD_FRAME )
1099         ff_thread_await_progress(&ref->tf, y, 0);
1100 }
1101
1102 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1103                                 int nPbW, int nPbH,
1104                                 int log2_cb_size, int partIdx)
1105 {
1106 #define POS(c_idx, x, y)                                                              \
1107     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1108                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1109     HEVCLocalContext *lc = s->HEVClc;
1110     int merge_idx = 0;
1111     struct MvField current_mv = {{{ 0 }}};
1112
1113     int min_pu_width = s->sps->min_pu_width;
1114
1115     MvField *tab_mvf = s->ref->tab_mvf;
1116     RefPicList  *refPicList = s->ref->refPicList;
1117     HEVCFrame *ref0, *ref1;
1118
1119     int tmpstride = MAX_PB_SIZE;
1120
1121     uint8_t *dst0 = POS(0, x0, y0);
1122     uint8_t *dst1 = POS(1, x0, y0);
1123     uint8_t *dst2 = POS(2, x0, y0);
1124     int log2_min_cb_size = s->sps->log2_min_cb_size;
1125     int min_cb_width     = s->sps->min_cb_width;
1126     int x_cb             = x0 >> log2_min_cb_size;
1127     int y_cb             = y0 >> log2_min_cb_size;
1128     int ref_idx[2];
1129     int mvp_flag[2];
1130     int x_pu, y_pu;
1131     int i, j;
1132
1133     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1134         if (s->sh.max_num_merge_cand > 1)
1135             merge_idx = ff_hevc_merge_idx_decode(s);
1136         else
1137             merge_idx = 0;
1138
1139         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1140                                    1 << log2_cb_size,
1141                                    1 << log2_cb_size,
1142                                    log2_cb_size, partIdx,
1143                                    merge_idx, &current_mv);
1144         x_pu = x0 >> s->sps->log2_min_pu_size;
1145         y_pu = y0 >> s->sps->log2_min_pu_size;
1146
1147         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1148             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1149                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1150     } else { /* MODE_INTER */
1151         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1152         if (lc->pu.merge_flag) {
1153             if (s->sh.max_num_merge_cand > 1)
1154                 merge_idx = ff_hevc_merge_idx_decode(s);
1155             else
1156                 merge_idx = 0;
1157
1158             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1159                                        partIdx, merge_idx, &current_mv);
1160             x_pu = x0 >> s->sps->log2_min_pu_size;
1161             y_pu = y0 >> s->sps->log2_min_pu_size;
1162
1163             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1164                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1165                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1166         } else {
1167             enum InterPredIdc inter_pred_idc = PRED_L0;
1168             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1169             if (s->sh.slice_type == B_SLICE)
1170                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1171
1172             if (inter_pred_idc != PRED_L1) {
1173                 if (s->sh.nb_refs[L0]) {
1174                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1175                     current_mv.ref_idx[0] = ref_idx[0];
1176                 }
1177                 current_mv.pred_flag[0] = 1;
1178                 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1179                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1180                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1181                                          partIdx, merge_idx, &current_mv,
1182                                          mvp_flag[0], 0);
1183                 current_mv.mv[0].x += lc->pu.mvd.x;
1184                 current_mv.mv[0].y += lc->pu.mvd.y;
1185             }
1186
1187             if (inter_pred_idc != PRED_L0) {
1188                 if (s->sh.nb_refs[L1]) {
1189                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1190                     current_mv.ref_idx[1] = ref_idx[1];
1191                 }
1192
1193                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1194                     lc->pu.mvd.x = 0;
1195                     lc->pu.mvd.y = 0;
1196                 } else {
1197                     ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1198                 }
1199
1200                 current_mv.pred_flag[1] = 1;
1201                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1202                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1203                                          partIdx, merge_idx, &current_mv,
1204                                          mvp_flag[1], 1);
1205                 current_mv.mv[1].x += lc->pu.mvd.x;
1206                 current_mv.mv[1].y += lc->pu.mvd.y;
1207             }
1208
1209             x_pu = x0 >> s->sps->log2_min_pu_size;
1210             y_pu = y0 >> s->sps->log2_min_pu_size;
1211
1212             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1213                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1214                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1215         }
1216     }
1217
1218     if (current_mv.pred_flag[0]) {
1219         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1220         if (!ref0)
1221             return;
1222         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1223     }
1224     if (current_mv.pred_flag[1]) {
1225         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1226         if (!ref1)
1227             return;
1228         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1229     }
1230
1231     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1232         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1233         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1234
1235         luma_mc(s, tmp, tmpstride, ref0->frame,
1236                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1237
1238         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1239             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1240             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1241                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1242                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1243                                      dst0, s->frame->linesize[0], tmp,
1244                                      tmpstride, nPbW, nPbH);
1245         } else {
1246             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1247         }
1248         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1249                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1250
1251         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1252             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1253             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1254                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1255                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1256                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1257                                      nPbW / 2, nPbH / 2);
1258             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1259                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1260                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1261                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1262                                      nPbW / 2, nPbH / 2);
1263         } else {
1264             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1265             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1266         }
1267     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1268         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1269         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1270
1271         if (!ref1)
1272             return;
1273
1274         luma_mc(s, tmp, tmpstride, ref1->frame,
1275                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1276
1277         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1278             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1279             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1280                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1281                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1282                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1283                                       nPbW, nPbH);
1284         } else {
1285             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1286         }
1287
1288         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1289                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1290
1291         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1292             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1293             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1294                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1295                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1296                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1297             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1298                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1299                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1300                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1301         } else {
1302             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1303             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1304         }
1305     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1306         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1307         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1308         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1309         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1310         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1311         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1312
1313         if (!ref0 || !ref1)
1314             return;
1315
1316         luma_mc(s, tmp, tmpstride, ref0->frame,
1317                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1318         luma_mc(s, tmp2, tmpstride, ref1->frame,
1319                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1320
1321         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1322             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1323             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1324                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1325                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1326                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1327                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1328                                          dst0, s->frame->linesize[0],
1329                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1330         } else {
1331             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1332                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1333         }
1334
1335         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1336                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1337         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1338                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1339
1340         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1341             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1342             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1343                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1344                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1345                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1346                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1347                                          dst1, s->frame->linesize[1], tmp, tmp3,
1348                                          tmpstride, nPbW / 2, nPbH / 2);
1349             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1350                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1351                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1352                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1353                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1354                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1355                                          tmpstride, nPbW / 2, nPbH / 2);
1356         } else {
1357             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1358             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1359         }
1360     }
1361 }
1362
1363 /**
1364  * 8.4.1
1365  */
1366 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1367                                 int prev_intra_luma_pred_flag)
1368 {
1369     HEVCLocalContext *lc = s->HEVClc;
1370     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1371     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1372     int min_pu_width     = s->sps->min_pu_width;
1373     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1374     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1375     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1376
1377     int cand_up   = (lc->ctb_up_flag || y0b) ?
1378                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1379     int cand_left = (lc->ctb_left_flag || x0b) ?
1380                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1381
1382     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1383
1384     MvField *tab_mvf = s->ref->tab_mvf;
1385     int intra_pred_mode;
1386     int candidate[3];
1387     int i, j;
1388
1389     // intra_pred_mode prediction does not cross vertical CTB boundaries
1390     if ((y0 - 1) < y_ctb)
1391         cand_up = INTRA_DC;
1392
1393     if (cand_left == cand_up) {
1394         if (cand_left < 2) {
1395             candidate[0] = INTRA_PLANAR;
1396             candidate[1] = INTRA_DC;
1397             candidate[2] = INTRA_ANGULAR_26;
1398         } else {
1399             candidate[0] = cand_left;
1400             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1401             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1402         }
1403     } else {
1404         candidate[0] = cand_left;
1405         candidate[1] = cand_up;
1406         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1407             candidate[2] = INTRA_PLANAR;
1408         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1409             candidate[2] = INTRA_DC;
1410         } else {
1411             candidate[2] = INTRA_ANGULAR_26;
1412         }
1413     }
1414
1415     if (prev_intra_luma_pred_flag) {
1416         intra_pred_mode = candidate[lc->pu.mpm_idx];
1417     } else {
1418         if (candidate[0] > candidate[1])
1419             FFSWAP(uint8_t, candidate[0], candidate[1]);
1420         if (candidate[0] > candidate[2])
1421             FFSWAP(uint8_t, candidate[0], candidate[2]);
1422         if (candidate[1] > candidate[2])
1423             FFSWAP(uint8_t, candidate[1], candidate[2]);
1424
1425         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1426         for (i = 0; i < 3; i++)
1427             if (intra_pred_mode >= candidate[i])
1428                 intra_pred_mode++;
1429     }
1430
1431     /* write the intra prediction units into the mv array */
1432     if (!size_in_pus)
1433         size_in_pus = 1;
1434     for (i = 0; i < size_in_pus; i++) {
1435         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1436                intra_pred_mode, size_in_pus);
1437
1438         for (j = 0; j < size_in_pus; j++) {
1439             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1440             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1441             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1442             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1443             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1444             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1445             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1446             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1447             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1448         }
1449     }
1450
1451     return intra_pred_mode;
1452 }
1453
1454 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1455                                           int log2_cb_size, int ct_depth)
1456 {
1457     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1458     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1459     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1460     int y;
1461
1462     for (y = 0; y < length; y++)
1463         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1464                ct_depth, length);
1465 }
1466
1467 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1468                                   int log2_cb_size)
1469 {
1470     HEVCLocalContext *lc = s->HEVClc;
1471     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1472     uint8_t prev_intra_luma_pred_flag[4];
1473     int split   = lc->cu.part_mode == PART_NxN;
1474     int pb_size = (1 << log2_cb_size) >> split;
1475     int side    = split + 1;
1476     int chroma_mode;
1477     int i, j;
1478
1479     for (i = 0; i < side; i++)
1480         for (j = 0; j < side; j++)
1481             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1482
1483     for (i = 0; i < side; i++) {
1484         for (j = 0; j < side; j++) {
1485             if (prev_intra_luma_pred_flag[2 * i + j])
1486                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1487             else
1488                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1489
1490             lc->pu.intra_pred_mode[2 * i + j] =
1491                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1492                                      prev_intra_luma_pred_flag[2 * i + j]);
1493         }
1494     }
1495
1496     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1497     if (chroma_mode != 4) {
1498         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1499             lc->pu.intra_pred_mode_c = 34;
1500         else
1501             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1502     } else {
1503         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1504     }
1505 }
1506
1507 static void intra_prediction_unit_default_value(HEVCContext *s,
1508                                                 int x0, int y0,
1509                                                 int log2_cb_size)
1510 {
1511     HEVCLocalContext *lc = s->HEVClc;
1512     int pb_size          = 1 << log2_cb_size;
1513     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1514     int min_pu_width     = s->sps->min_pu_width;
1515     MvField *tab_mvf     = s->ref->tab_mvf;
1516     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1517     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1518     int j, k;
1519
1520     if (size_in_pus == 0)
1521         size_in_pus = 1;
1522     for (j = 0; j < size_in_pus; j++) {
1523         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1524         for (k = 0; k < size_in_pus; k++)
1525             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
1526     }
1527 }
1528
1529 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1530 {
1531     int cb_size          = 1 << log2_cb_size;
1532     HEVCLocalContext *lc = s->HEVClc;
1533     int log2_min_cb_size = s->sps->log2_min_cb_size;
1534     int length           = cb_size >> log2_min_cb_size;
1535     int min_cb_width     = s->sps->min_cb_width;
1536     int x_cb             = x0 >> log2_min_cb_size;
1537     int y_cb             = y0 >> log2_min_cb_size;
1538     int x, y;
1539
1540     lc->cu.x                = x0;
1541     lc->cu.y                = y0;
1542     lc->cu.rqt_root_cbf     = 1;
1543     lc->cu.pred_mode        = MODE_INTRA;
1544     lc->cu.part_mode        = PART_2Nx2N;
1545     lc->cu.intra_split_flag = 0;
1546     lc->cu.pcm_flag         = 0;
1547
1548     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
1549     for (x = 0; x < 4; x++)
1550         lc->pu.intra_pred_mode[x] = 1;
1551     if (s->pps->transquant_bypass_enable_flag) {
1552         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
1553         if (lc->cu.cu_transquant_bypass_flag)
1554             set_deblocking_bypass(s, x0, y0, log2_cb_size);
1555     } else
1556         lc->cu.cu_transquant_bypass_flag = 0;
1557
1558     if (s->sh.slice_type != I_SLICE) {
1559         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
1560
1561         lc->cu.pred_mode = MODE_SKIP;
1562         x = y_cb * min_cb_width + x_cb;
1563         for (y = 0; y < length; y++) {
1564             memset(&s->skip_flag[x], skip_flag, length);
1565             x += min_cb_width;
1566         }
1567         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
1568     }
1569
1570     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1571         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
1572         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1573
1574         if (!s->sh.disable_deblocking_filter_flag)
1575             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1576                                                   lc->slice_or_tiles_up_boundary,
1577                                                   lc->slice_or_tiles_left_boundary);
1578     } else {
1579         if (s->sh.slice_type != I_SLICE)
1580             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
1581         if (lc->cu.pred_mode != MODE_INTRA ||
1582             log2_cb_size == s->sps->log2_min_cb_size) {
1583             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
1584             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
1585                                       lc->cu.pred_mode == MODE_INTRA;
1586         }
1587
1588         if (lc->cu.pred_mode == MODE_INTRA) {
1589             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
1590                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
1591                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
1592                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
1593             }
1594             if (lc->cu.pcm_flag) {
1595                 int ret;
1596                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1597                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
1598                 if (s->sps->pcm.loop_filter_disable_flag)
1599                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
1600
1601                 if (ret < 0)
1602                     return ret;
1603             } else {
1604                 intra_prediction_unit(s, x0, y0, log2_cb_size);
1605             }
1606         } else {
1607             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1608             switch (lc->cu.part_mode) {
1609             case PART_2Nx2N:
1610                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
1611                 break;
1612             case PART_2NxN:
1613                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
1614                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
1615                 break;
1616             case PART_Nx2N:
1617                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
1618                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
1619                 break;
1620             case PART_2NxnU:
1621                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
1622                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
1623                 break;
1624             case PART_2NxnD:
1625                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
1626                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
1627                 break;
1628             case PART_nLx2N:
1629                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
1630                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
1631                 break;
1632             case PART_nRx2N:
1633                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
1634                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
1635                 break;
1636             case PART_NxN:
1637                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
1638                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
1639                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
1640                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
1641                 break;
1642             }
1643         }
1644
1645         if (!lc->cu.pcm_flag) {
1646             if (lc->cu.pred_mode != MODE_INTRA &&
1647                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
1648                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
1649             }
1650             if (lc->cu.rqt_root_cbf) {
1651                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
1652                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
1653                                          s->sps->max_transform_hierarchy_depth_inter;
1654                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
1655                                    log2_cb_size, 0, 0);
1656             } else {
1657                 if (!s->sh.disable_deblocking_filter_flag)
1658                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1659                                                           lc->slice_or_tiles_up_boundary,
1660                                                           lc->slice_or_tiles_left_boundary);
1661             }
1662         }
1663     }
1664
1665     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
1666         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
1667
1668     x = y_cb * min_cb_width + x_cb;
1669     for (y = 0; y < length; y++) {
1670         memset(&s->qp_y_tab[x], lc->qp_y, length);
1671         x += min_cb_width;
1672     }
1673
1674     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
1675
1676     return 0;
1677 }
1678
1679 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
1680                                int log2_cb_size, int cb_depth)
1681 {
1682     HEVCLocalContext *lc = s->HEVClc;
1683     const int cb_size    = 1 << log2_cb_size;
1684     int ret;
1685
1686     lc->ct.depth = cb_depth;
1687     if (x0 + cb_size <= s->sps->width  &&
1688         y0 + cb_size <= s->sps->height &&
1689         log2_cb_size > s->sps->log2_min_cb_size) {
1690         SAMPLE(s->split_cu_flag, x0, y0) =
1691             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
1692     } else {
1693         SAMPLE(s->split_cu_flag, x0, y0) =
1694             (log2_cb_size > s->sps->log2_min_cb_size);
1695     }
1696     if (s->pps->cu_qp_delta_enabled_flag &&
1697         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
1698         lc->tu.is_cu_qp_delta_coded = 0;
1699         lc->tu.cu_qp_delta          = 0;
1700     }
1701
1702     if (SAMPLE(s->split_cu_flag, x0, y0)) {
1703         const int cb_size_split = cb_size >> 1;
1704         const int x1 = x0 + cb_size_split;
1705         const int y1 = y0 + cb_size_split;
1706
1707         int more_data = 0;
1708
1709         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
1710         if (more_data < 0)
1711             return more_data;
1712
1713         if (more_data && x1 < s->sps->width)
1714             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
1715         if (more_data && y1 < s->sps->height)
1716             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
1717         if (more_data && x1 < s->sps->width &&
1718             y1 < s->sps->height) {
1719             return hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
1720         }
1721         if (more_data)
1722             return ((x1 + cb_size_split) < s->sps->width ||
1723                     (y1 + cb_size_split) < s->sps->height);
1724         else
1725             return 0;
1726     } else {
1727         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
1728         if (ret < 0)
1729             return ret;
1730         if ((!((x0 + cb_size) %
1731                (1 << (s->sps->log2_ctb_size))) ||
1732              (x0 + cb_size >= s->sps->width)) &&
1733             (!((y0 + cb_size) %
1734                (1 << (s->sps->log2_ctb_size))) ||
1735              (y0 + cb_size >= s->sps->height))) {
1736             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
1737             return !end_of_slice_flag;
1738         } else {
1739             return 1;
1740         }
1741     }
1742
1743     return 0;
1744 }
1745
1746 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
1747                                  int ctb_addr_ts)
1748 {
1749     HEVCLocalContext *lc  = s->HEVClc;
1750     int ctb_size          = 1 << s->sps->log2_ctb_size;
1751     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
1752     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
1753
1754     int tile_left_boundary, tile_up_boundary;
1755     int slice_left_boundary, slice_up_boundary;
1756
1757     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
1758
1759     if (s->pps->entropy_coding_sync_enabled_flag) {
1760         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
1761             lc->first_qp_group = 1;
1762         lc->end_of_tiles_x = s->sps->width;
1763     } else if (s->pps->tiles_enabled_flag) {
1764         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
1765             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
1766             lc->start_of_tiles_x = x_ctb;
1767             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
1768             lc->first_qp_group   = 1;
1769         }
1770     } else {
1771         lc->end_of_tiles_x = s->sps->width;
1772     }
1773
1774     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
1775
1776     if (s->pps->tiles_enabled_flag) {
1777         tile_left_boundary  = x_ctb > 0 &&
1778                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
1779         slice_left_boundary = x_ctb > 0 &&
1780                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
1781         tile_up_boundary  = y_ctb > 0 &&
1782                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
1783         slice_up_boundary = y_ctb > 0 &&
1784                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
1785     } else {
1786         tile_left_boundary  =
1787         tile_up_boundary    = 1;
1788         slice_left_boundary = ctb_addr_in_slice > 0;
1789         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
1790     }
1791     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
1792     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
1793     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
1794     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
1795     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
1796     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
1797 }
1798
1799 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
1800 {
1801     HEVCContext *s  = avctxt->priv_data;
1802     int ctb_size    = 1 << s->sps->log2_ctb_size;
1803     int more_data   = 1;
1804     int x_ctb       = 0;
1805     int y_ctb       = 0;
1806     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
1807
1808     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
1809         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
1810
1811         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
1812         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
1813         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
1814
1815         ff_hevc_cabac_init(s, ctb_addr_ts);
1816
1817         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
1818
1819         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
1820         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
1821         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
1822
1823         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
1824         if (more_data < 0)
1825             return more_data;
1826
1827         ctb_addr_ts++;
1828         ff_hevc_save_states(s, ctb_addr_ts);
1829         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
1830     }
1831
1832     if (x_ctb + ctb_size >= s->sps->width &&
1833         y_ctb + ctb_size >= s->sps->height)
1834         ff_hevc_hls_filter(s, x_ctb, y_ctb);
1835
1836     return ctb_addr_ts;
1837 }
1838
1839 static int hls_slice_data(HEVCContext *s)
1840 {
1841     int arg[2];
1842     int ret[2];
1843
1844     arg[0] = 0;
1845     arg[1] = 1;
1846
1847     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
1848     return ret[0];
1849 }
1850 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
1851 {
1852     HEVCContext *s1  = avctxt->priv_data, *s;
1853     HEVCLocalContext *lc;
1854     int ctb_size    = 1<< s1->sps->log2_ctb_size;
1855     int more_data   = 1;
1856     int *ctb_row_p    = input_ctb_row;
1857     int ctb_row = ctb_row_p[job];
1858     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
1859     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
1860     int thread = ctb_row % s1->threads_number;
1861     int ret;
1862
1863     s = s1->sList[self_id];
1864     lc = s->HEVClc;
1865
1866     if(ctb_row) {
1867         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
1868
1869         if (ret < 0)
1870             return ret;
1871         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
1872     }
1873
1874     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
1875         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
1876         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
1877
1878         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
1879
1880         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
1881
1882         if (avpriv_atomic_int_get(&s1->wpp_err)){
1883             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
1884             return 0;
1885         }
1886
1887         ff_hevc_cabac_init(s, ctb_addr_ts);
1888         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
1889         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
1890
1891         if (more_data < 0)
1892             return more_data;
1893
1894         ctb_addr_ts++;
1895
1896         ff_hevc_save_states(s, ctb_addr_ts);
1897         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
1898         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
1899
1900         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
1901             avpriv_atomic_int_set(&s1->wpp_err,  1);
1902             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
1903             return 0;
1904         }
1905
1906         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
1907             ff_hevc_hls_filter(s, x_ctb, y_ctb);
1908             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
1909             return ctb_addr_ts;
1910         }
1911         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
1912         x_ctb+=ctb_size;
1913
1914         if(x_ctb >= s->sps->width) {
1915             break;
1916         }
1917     }
1918     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
1919
1920     return 0;
1921 }
1922
1923 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
1924 {
1925     HEVCLocalContext *lc = s->HEVClc;
1926     int *ret = av_malloc((s->sh.num_entry_point_offsets + 1) * sizeof(int));
1927     int *arg = av_malloc((s->sh.num_entry_point_offsets + 1) * sizeof(int));
1928     int offset;
1929     int startheader, cmpt = 0;
1930     int i, j, res = 0;
1931
1932
1933     if (!s->sList[1]) {
1934         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
1935
1936
1937         for (i = 1; i < s->threads_number; i++) {
1938             s->sList[i] = av_malloc(sizeof(HEVCContext));
1939             memcpy(s->sList[i], s, sizeof(HEVCContext));
1940             s->HEVClcList[i] = av_malloc(sizeof(HEVCLocalContext));
1941             s->HEVClcList[i]->edge_emu_buffer = av_malloc((MAX_PB_SIZE + 7) * s->frame->linesize[0]);
1942             s->sList[i]->HEVClc = s->HEVClcList[i];
1943         }
1944     }
1945
1946     offset = (lc->gb.index >> 3);
1947
1948     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
1949         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
1950             startheader--;
1951             cmpt++;
1952         }
1953     }
1954
1955     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
1956         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
1957         for (j = 0, cmpt = 0, startheader = offset
1958              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
1959             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
1960                 startheader--;
1961                 cmpt++;
1962             }
1963         }
1964         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
1965         s->sh.offset[i - 1] = offset;
1966
1967     }
1968     if (s->sh.num_entry_point_offsets != 0) {
1969         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
1970         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
1971         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
1972
1973     }
1974     s->data = nal;
1975
1976     for (i = 1; i < s->threads_number; i++) {
1977         s->sList[i]->HEVClc->first_qp_group = 1;
1978         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
1979         memcpy(s->sList[i], s, sizeof(HEVCContext));
1980         s->sList[i]->HEVClc = s->HEVClcList[i];
1981     }
1982
1983     avpriv_atomic_int_set(&s->wpp_err, 0);
1984     ff_reset_entries(s->avctx);
1985
1986     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
1987         arg[i] = i;
1988         ret[i] = 0;
1989     }
1990
1991     if (s->pps->entropy_coding_sync_enabled_flag)
1992         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
1993
1994     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
1995         res += ret[i];
1996     av_free(ret);
1997     av_free(arg);
1998     return res;
1999 }
2000
2001 /**
2002  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2003  * 0 if the unit should be skipped, 1 otherwise
2004  */
2005 static int hls_nal_unit(HEVCContext *s)
2006 {
2007     GetBitContext *gb = &s->HEVClc->gb;
2008     int nuh_layer_id;
2009
2010     if (get_bits1(gb) != 0)
2011         return AVERROR_INVALIDDATA;
2012
2013     s->nal_unit_type = get_bits(gb, 6);
2014
2015     nuh_layer_id   = get_bits(gb, 6);
2016     s->temporal_id = get_bits(gb, 3) - 1;
2017     if (s->temporal_id < 0)
2018         return AVERROR_INVALIDDATA;
2019
2020     av_log(s->avctx, AV_LOG_DEBUG,
2021            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2022            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2023
2024     return nuh_layer_id == 0;
2025 }
2026
2027 static void restore_tqb_pixels(HEVCContext *s)
2028 {
2029     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2030     int x, y, c_idx;
2031
2032     for (c_idx = 0; c_idx < 3; c_idx++) {
2033         ptrdiff_t stride = s->frame->linesize[c_idx];
2034         int hshift       = s->sps->hshift[c_idx];
2035         int vshift       = s->sps->vshift[c_idx];
2036         for (y = 0; y < s->sps->min_pu_height; y++) {
2037             for (x = 0; x < s->sps->min_pu_width; x++) {
2038                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2039                     int n;
2040                     int len      = min_pu_size >> hshift;
2041                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2042                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2043                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2044                         memcpy(dst, src, len);
2045                         src += stride;
2046                         dst += stride;
2047                     }
2048                 }
2049             }
2050         }
2051     }
2052 }
2053
2054 static int hevc_frame_start(HEVCContext *s)
2055 {
2056     HEVCLocalContext *lc = s->HEVClc;
2057     int ret;
2058
2059     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2060     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2061     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2062     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2063
2064     lc->start_of_tiles_x = 0;
2065     s->is_decoded        = 0;
2066
2067     if (s->pps->tiles_enabled_flag)
2068         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2069
2070     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2071                               s->poc);
2072     if (ret < 0)
2073         goto fail;
2074
2075     av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2076                    (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2077     if (!lc->edge_emu_buffer) {
2078         ret = AVERROR(ENOMEM);
2079         goto fail;
2080     }
2081
2082     ret = ff_hevc_frame_rps(s);
2083     if (ret < 0) {
2084         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2085         goto fail;
2086     }
2087
2088     av_frame_unref(s->output_frame);
2089     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2090     if (ret < 0)
2091         goto fail;
2092
2093     ff_thread_finish_setup(s->avctx);
2094
2095     return 0;
2096
2097 fail:
2098     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2099         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2100     s->ref = NULL;
2101     return ret;
2102 }
2103
2104 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2105 {
2106     HEVCLocalContext *lc = s->HEVClc;
2107     GetBitContext *gb    = &lc->gb;
2108     int ctb_addr_ts, ret;
2109
2110     ret = init_get_bits8(gb, nal, length);
2111     if (ret < 0)
2112         return ret;
2113
2114     ret = hls_nal_unit(s);
2115     if (ret < 0) {
2116         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2117                s->nal_unit_type);
2118         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2119             return ret;
2120         return 0;
2121     } else if (!ret)
2122         return 0;
2123
2124     switch (s->nal_unit_type) {
2125     case NAL_VPS:
2126         ret = ff_hevc_decode_nal_vps(s);
2127         if (ret < 0)
2128             return ret;
2129         break;
2130     case NAL_SPS:
2131         ret = ff_hevc_decode_nal_sps(s);
2132         if (ret < 0)
2133             return ret;
2134         break;
2135     case NAL_PPS:
2136         ret = ff_hevc_decode_nal_pps(s);
2137         if (ret < 0)
2138             return ret;
2139         break;
2140     case NAL_SEI_PREFIX:
2141     case NAL_SEI_SUFFIX:
2142         ret = ff_hevc_decode_nal_sei(s);
2143         if (ret < 0)
2144             return ret;
2145         break;
2146     case NAL_TRAIL_R:
2147     case NAL_TRAIL_N:
2148     case NAL_TSA_N:
2149     case NAL_TSA_R:
2150     case NAL_STSA_N:
2151     case NAL_STSA_R:
2152     case NAL_BLA_W_LP:
2153     case NAL_BLA_W_RADL:
2154     case NAL_BLA_N_LP:
2155     case NAL_IDR_W_RADL:
2156     case NAL_IDR_N_LP:
2157     case NAL_CRA_NUT:
2158     case NAL_RADL_N:
2159     case NAL_RADL_R:
2160     case NAL_RASL_N:
2161     case NAL_RASL_R:
2162         ret = hls_slice_header(s);
2163         if (ret < 0)
2164             return ret;
2165
2166         if (s->max_ra == INT_MAX) {
2167             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2168                 s->max_ra = s->poc;
2169             } else {
2170                 if (IS_IDR(s))
2171                     s->max_ra = INT_MIN;
2172             }
2173         }
2174
2175         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2176             s->poc <= s->max_ra) {
2177             s->is_decoded = 0;
2178             break;
2179         } else {
2180             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2181                 s->max_ra = INT_MIN;
2182         }
2183
2184         if (s->sh.first_slice_in_pic_flag) {
2185             ret = hevc_frame_start(s);
2186             if (ret < 0)
2187                 return ret;
2188         } else if (!s->ref) {
2189             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2190             return AVERROR_INVALIDDATA;
2191         }
2192
2193         if (!s->sh.dependent_slice_segment_flag &&
2194             s->sh.slice_type != I_SLICE) {
2195             ret = ff_hevc_slice_rpl(s);
2196             if (ret < 0) {
2197                 av_log(s->avctx, AV_LOG_WARNING,
2198                        "Error constructing the reference lists for the current slice.\n");
2199                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2200                     return ret;
2201             }
2202         }
2203
2204         if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2205             ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2206         else
2207             ctb_addr_ts = hls_slice_data(s);
2208         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2209             s->is_decoded = 1;
2210             if ((s->pps->transquant_bypass_enable_flag ||
2211                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2212                 s->sps->sao_enabled)
2213                 restore_tqb_pixels(s);
2214         }
2215
2216         if (ctb_addr_ts < 0)
2217             return ctb_addr_ts;
2218         break;
2219     case NAL_EOS_NUT:
2220     case NAL_EOB_NUT:
2221         s->seq_decode = (s->seq_decode + 1) & 0xff;
2222         s->max_ra     = INT_MAX;
2223         break;
2224     case NAL_AUD:
2225     case NAL_FD_NUT:
2226         break;
2227     default:
2228         av_log(s->avctx, AV_LOG_INFO,
2229                "Skipping NAL unit %d\n", s->nal_unit_type);
2230     }
2231
2232     return 0;
2233 }
2234
2235 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2236    between these functions would be nice. */
2237 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2238                          HEVCNAL *nal)
2239 {
2240     int i, si, di;
2241     uint8_t *dst;
2242
2243     s->skipped_bytes = 0;
2244 #define STARTCODE_TEST                                                  \
2245         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2246             if (src[i + 2] != 3) {                                      \
2247                 /* startcode, so we must be past the end */             \
2248                 length = i;                                             \
2249             }                                                           \
2250             break;                                                      \
2251         }
2252 #if HAVE_FAST_UNALIGNED
2253 #define FIND_FIRST_ZERO                                                 \
2254         if (i > 0 && !src[i])                                           \
2255             i--;                                                        \
2256         while (src[i])                                                  \
2257             i++
2258 #if HAVE_FAST_64BIT
2259     for (i = 0; i + 1 < length; i += 9) {
2260         if (!((~AV_RN64A(src + i) &
2261                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2262               0x8000800080008080ULL))
2263             continue;
2264         FIND_FIRST_ZERO;
2265         STARTCODE_TEST;
2266         i -= 7;
2267     }
2268 #else
2269     for (i = 0; i + 1 < length; i += 5) {
2270         if (!((~AV_RN32A(src + i) &
2271                (AV_RN32A(src + i) - 0x01000101U)) &
2272               0x80008080U))
2273             continue;
2274         FIND_FIRST_ZERO;
2275         STARTCODE_TEST;
2276         i -= 3;
2277     }
2278 #endif /* HAVE_FAST_64BIT */
2279 #else
2280     for (i = 0; i + 1 < length; i += 2) {
2281         if (src[i])
2282             continue;
2283         if (i > 0 && src[i - 1] == 0)
2284             i--;
2285         STARTCODE_TEST;
2286     }
2287 #endif /* HAVE_FAST_UNALIGNED */
2288
2289     if (i >= length - 1) { // no escaped 0
2290         nal->data = src;
2291         nal->size = length;
2292         return length;
2293     }
2294
2295     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2296                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2297     if (!nal->rbsp_buffer)
2298         return AVERROR(ENOMEM);
2299
2300     dst = nal->rbsp_buffer;
2301
2302     memcpy(dst, src, i);
2303     si = di = i;
2304     while (si + 2 < length) {
2305         // remove escapes (very rare 1:2^22)
2306         if (src[si + 2] > 3) {
2307             dst[di++] = src[si++];
2308             dst[di++] = src[si++];
2309         } else if (src[si] == 0 && src[si + 1] == 0) {
2310             if (src[si + 2] == 3) { // escape
2311                 dst[di++] = 0;
2312                 dst[di++] = 0;
2313                 si       += 3;
2314
2315                 s->skipped_bytes++;
2316                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2317                     s->skipped_bytes_pos_size *= 2;
2318                     av_reallocp_array(&s->skipped_bytes_pos,
2319                             s->skipped_bytes_pos_size,
2320                             sizeof(*s->skipped_bytes_pos));
2321                     if (!s->skipped_bytes_pos)
2322                         return AVERROR(ENOMEM);
2323                 }
2324                 if (s->skipped_bytes_pos)
2325                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2326                 continue;
2327             } else // next start code
2328                 goto nsc;
2329         }
2330
2331         dst[di++] = src[si++];
2332     }
2333     while (si < length)
2334         dst[di++] = src[si++];
2335
2336 nsc:
2337     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2338
2339     nal->data = dst;
2340     nal->size = di;
2341     return si;
2342 }
2343
2344 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2345 {
2346     int i, consumed, ret = 0;
2347
2348     s->ref = NULL;
2349     s->eos = 0;
2350
2351     /* split the input packet into NAL units, so we know the upper bound on the
2352      * number of slices in the frame */
2353     s->nb_nals = 0;
2354     while (length >= 4) {
2355         HEVCNAL *nal;
2356         int extract_length = 0;
2357
2358         if (s->is_nalff) {
2359             int i;
2360             for (i = 0; i < s->nal_length_size; i++)
2361                 extract_length = (extract_length << 8) | buf[i];
2362             buf    += s->nal_length_size;
2363             length -= s->nal_length_size;
2364
2365             if (extract_length > length) {
2366                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2367                 ret = AVERROR_INVALIDDATA;
2368                 goto fail;
2369             }
2370         } else {
2371             /* search start code */
2372             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2373                 ++buf;
2374                 --length;
2375                 if (length < 4) {
2376                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2377                     ret = AVERROR_INVALIDDATA;
2378                     goto fail;
2379                 }
2380             }
2381
2382             buf           += 3;
2383             length        -= 3;
2384         }
2385
2386         if (!s->is_nalff)
2387             extract_length = length;
2388
2389         if (s->nals_allocated < s->nb_nals + 1) {
2390             int new_size = s->nals_allocated + 1;
2391             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2392             if (!tmp) {
2393                 ret = AVERROR(ENOMEM);
2394                 goto fail;
2395             }
2396             s->nals = tmp;
2397             memset(s->nals + s->nals_allocated, 0,
2398                    (new_size - s->nals_allocated) * sizeof(*tmp));
2399             av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2400             av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2401             av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2402             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2403             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2404             s->nals_allocated = new_size;
2405         }
2406         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2407         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2408         nal = &s->nals[s->nb_nals];
2409
2410         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2411
2412         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2413         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2414         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2415
2416
2417         if (consumed < 0) {
2418             ret = consumed;
2419             goto fail;
2420         }
2421
2422         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2423         if (ret < 0)
2424             goto fail;
2425         hls_nal_unit(s);
2426
2427         if (s->nal_unit_type == NAL_EOB_NUT ||
2428             s->nal_unit_type == NAL_EOS_NUT)
2429             s->eos = 1;
2430
2431         buf    += consumed;
2432         length -= consumed;
2433     }
2434
2435     /* parse the NAL units */
2436     for (i = 0; i < s->nb_nals; i++) {
2437         int ret;
2438         s->skipped_bytes = s->skipped_bytes_nal[i];
2439         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2440
2441         ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2442         if (ret < 0) {
2443             av_log(s->avctx, AV_LOG_WARNING,
2444                    "Error parsing NAL unit #%d.\n", i);
2445             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2446                 goto fail;
2447         }
2448     }
2449
2450 fail:
2451     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2452         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2453
2454     return ret;
2455 }
2456
2457 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2458 {
2459     int i;
2460     for (i = 0; i < 16; i++)
2461         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2462 }
2463
2464 static int verify_md5(HEVCContext *s, AVFrame *frame)
2465 {
2466     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2467     int pixel_shift;
2468     int i, j;
2469
2470     if (!desc)
2471         return AVERROR(EINVAL);
2472
2473     pixel_shift = desc->comp[0].depth_minus1 > 7;
2474
2475     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2476            s->poc);
2477
2478     /* the checksums are LE, so we have to byteswap for >8bpp formats
2479      * on BE arches */
2480 #if HAVE_BIGENDIAN
2481     if (pixel_shift && !s->checksum_buf) {
2482         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2483                        FFMAX3(frame->linesize[0], frame->linesize[1],
2484                               frame->linesize[2]));
2485         if (!s->checksum_buf)
2486             return AVERROR(ENOMEM);
2487     }
2488 #endif
2489
2490     for (i = 0; frame->data[i]; i++) {
2491         int width  = s->avctx->coded_width;
2492         int height = s->avctx->coded_height;
2493         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2494         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2495         uint8_t md5[16];
2496
2497         av_md5_init(s->md5_ctx);
2498         for (j = 0; j < h; j++) {
2499             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2500 #if HAVE_BIGENDIAN
2501             if (pixel_shift) {
2502                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2503                                    (const uint16_t*)src, w);
2504                 src = s->checksum_buf;
2505             }
2506 #endif
2507             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2508         }
2509         av_md5_final(s->md5_ctx, md5);
2510
2511         if (!memcmp(md5, s->md5[i], 16)) {
2512             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2513             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2514             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2515         } else {
2516             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2517             print_md5(s->avctx, AV_LOG_ERROR, md5);
2518             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2519             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2520             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2521             return AVERROR_INVALIDDATA;
2522         }
2523     }
2524
2525     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2526
2527     return 0;
2528 }
2529
2530 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2531                              AVPacket *avpkt)
2532 {
2533     int ret;
2534     HEVCContext *s = avctx->priv_data;
2535
2536     if (!avpkt->size) {
2537         ret = ff_hevc_output_frame(s, data, 1);
2538         if (ret < 0)
2539             return ret;
2540
2541         *got_output = ret;
2542         return 0;
2543     }
2544
2545     s->ref = NULL;
2546     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2547     if (ret < 0)
2548         return ret;
2549
2550     /* verify the SEI checksum */
2551     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2552         avctx->err_recognition & AV_EF_EXPLODE &&
2553         s->is_md5) {
2554         ret = verify_md5(s, s->ref->frame);
2555         if (ret < 0) {
2556             ff_hevc_unref_frame(s, s->ref, ~0);
2557             return ret;
2558         }
2559     }
2560     s->is_md5 = 0;
2561
2562     if (s->is_decoded) {
2563         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2564         s->is_decoded = 0;
2565     }
2566
2567     if (s->output_frame->buf[0]) {
2568         av_frame_move_ref(data, s->output_frame);
2569         *got_output = 1;
2570     }
2571
2572     return avpkt->size;
2573 }
2574
2575 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2576 {
2577     int ret;
2578
2579     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2580     if (ret < 0)
2581         return ret;
2582
2583     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2584     if (!dst->tab_mvf_buf)
2585         goto fail;
2586     dst->tab_mvf = src->tab_mvf;
2587
2588     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2589     if (!dst->rpl_tab_buf)
2590         goto fail;
2591     dst->rpl_tab = src->rpl_tab;
2592
2593     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2594     if (!dst->rpl_buf)
2595         goto fail;
2596
2597     dst->poc        = src->poc;
2598     dst->ctb_count  = src->ctb_count;
2599     dst->window     = src->window;
2600     dst->flags      = src->flags;
2601     dst->sequence   = src->sequence;
2602
2603     return 0;
2604 fail:
2605     ff_hevc_unref_frame(s, dst, ~0);
2606     return AVERROR(ENOMEM);
2607 }
2608
2609 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2610 {
2611     HEVCContext       *s = avctx->priv_data;
2612     HEVCLocalContext *lc = s->HEVClc;
2613     int i;
2614
2615     pic_arrays_free(s);
2616
2617     if (lc)
2618         av_freep(&lc->edge_emu_buffer);
2619     av_freep(&s->md5_ctx);
2620
2621     for(i=0; i < s->nals_allocated; i++) {
2622         av_freep(&s->skipped_bytes_pos_nal[i]);
2623     }
2624     av_freep(&s->skipped_bytes_pos_size_nal);
2625     av_freep(&s->skipped_bytes_nal);
2626     av_freep(&s->skipped_bytes_pos_nal);
2627
2628     av_freep(&s->cabac_state);
2629
2630     av_frame_free(&s->tmp_frame);
2631     av_frame_free(&s->output_frame);
2632
2633     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2634         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2635         av_frame_free(&s->DPB[i].frame);
2636     }
2637
2638     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2639         av_freep(&s->vps_list[i]);
2640     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2641         av_buffer_unref(&s->sps_list[i]);
2642     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2643         av_buffer_unref(&s->pps_list[i]);
2644
2645     av_freep(&s->sh.entry_point_offset);
2646     av_freep(&s->sh.offset);
2647     av_freep(&s->sh.size);
2648
2649     for (i = 1; i < s->threads_number; i++) {
2650         lc = s->HEVClcList[i];
2651         if (lc) {
2652             av_freep(&lc->edge_emu_buffer);
2653
2654             av_freep(&s->HEVClcList[i]);
2655             av_freep(&s->sList[i]);
2656         }
2657     }
2658     av_freep(&s->HEVClcList[0]);
2659
2660     for (i = 0; i < s->nals_allocated; i++)
2661         av_freep(&s->nals[i].rbsp_buffer);
2662     av_freep(&s->nals);
2663     s->nals_allocated = 0;
2664
2665     return 0;
2666 }
2667
2668 static av_cold int hevc_init_context(AVCodecContext *avctx)
2669 {
2670     HEVCContext *s = avctx->priv_data;
2671     int i;
2672
2673     s->avctx = avctx;
2674
2675     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
2676     if (!s->HEVClc)
2677         goto fail;
2678     s->HEVClcList[0] = s->HEVClc;
2679     s->sList[0] = s;
2680
2681     s->cabac_state = av_malloc(HEVC_CONTEXTS);
2682     if (!s->cabac_state)
2683         goto fail;
2684
2685     s->tmp_frame = av_frame_alloc();
2686     if (!s->tmp_frame)
2687         goto fail;
2688
2689     s->output_frame = av_frame_alloc();
2690     if (!s->output_frame)
2691         goto fail;
2692
2693     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2694         s->DPB[i].frame = av_frame_alloc();
2695         if (!s->DPB[i].frame)
2696             goto fail;
2697         s->DPB[i].tf.f = s->DPB[i].frame;
2698     }
2699
2700     s->max_ra = INT_MAX;
2701
2702     s->md5_ctx = av_md5_alloc();
2703     if (!s->md5_ctx)
2704         goto fail;
2705
2706     ff_dsputil_init(&s->dsp, avctx);
2707
2708     s->context_initialized = 1;
2709
2710     return 0;
2711
2712 fail:
2713     hevc_decode_free(avctx);
2714     return AVERROR(ENOMEM);
2715 }
2716
2717 static int hevc_update_thread_context(AVCodecContext *dst,
2718                                       const AVCodecContext *src)
2719 {
2720     HEVCContext *s  = dst->priv_data;
2721     HEVCContext *s0 = src->priv_data;
2722     int i, ret;
2723
2724     if (!s->context_initialized) {
2725         ret = hevc_init_context(dst);
2726         if (ret < 0)
2727             return ret;
2728     }
2729
2730     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2731         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2732         if (s0->DPB[i].frame->buf[0]) {
2733             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2734             if (ret < 0)
2735                 return ret;
2736         }
2737     }
2738
2739     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
2740         av_buffer_unref(&s->sps_list[i]);
2741         if (s0->sps_list[i]) {
2742             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
2743             if (!s->sps_list[i])
2744                 return AVERROR(ENOMEM);
2745         }
2746     }
2747
2748     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
2749         av_buffer_unref(&s->pps_list[i]);
2750         if (s0->pps_list[i]) {
2751             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
2752             if (!s->pps_list[i])
2753                 return AVERROR(ENOMEM);
2754         }
2755     }
2756
2757     if (s->sps != s0->sps)
2758         ret = set_sps(s, s0->sps);
2759
2760     s->seq_decode = s0->seq_decode;
2761     s->seq_output = s0->seq_output;
2762     s->pocTid0    = s0->pocTid0;
2763     s->max_ra     = s0->max_ra;
2764
2765     s->is_nalff        = s0->is_nalff;
2766     s->nal_length_size = s0->nal_length_size;
2767
2768     s->threads_number      = s0->threads_number;
2769     s->threads_type        = s0->threads_type;
2770
2771     if (s0->eos) {
2772         s->seq_decode = (s->seq_decode + 1) & 0xff;
2773         s->max_ra = INT_MAX;
2774     }
2775
2776     return 0;
2777 }
2778
2779 static int hevc_decode_extradata(HEVCContext *s)
2780 {
2781     AVCodecContext *avctx = s->avctx;
2782     GetByteContext gb;
2783     int ret;
2784
2785     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2786
2787     if (avctx->extradata_size > 3 &&
2788         (avctx->extradata[0] || avctx->extradata[1] ||
2789          avctx->extradata[2] > 1)) {
2790         /* It seems the extradata is encoded as hvcC format.
2791          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2792          * finalized. When finalized, configurationVersion will be 1 and we
2793          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2794         int i, j, num_arrays, nal_len_size;
2795
2796         s->is_nalff = 1;
2797
2798         bytestream2_skip(&gb, 21);
2799         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2800         num_arrays   = bytestream2_get_byte(&gb);
2801
2802         /* nal units in the hvcC always have length coded with 2 bytes,
2803          * so put a fake nal_length_size = 2 while parsing them */
2804         s->nal_length_size = 2;
2805
2806         /* Decode nal units from hvcC. */
2807         for (i = 0; i < num_arrays; i++) {
2808             int type = bytestream2_get_byte(&gb) & 0x3f;
2809             int cnt  = bytestream2_get_be16(&gb);
2810
2811             for (j = 0; j < cnt; j++) {
2812                 // +2 for the nal size field
2813                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2814                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2815                     av_log(s->avctx, AV_LOG_ERROR,
2816                            "Invalid NAL unit size in extradata.\n");
2817                     return AVERROR_INVALIDDATA;
2818                 }
2819
2820                 ret = decode_nal_units(s, gb.buffer, nalsize);
2821                 if (ret < 0) {
2822                     av_log(avctx, AV_LOG_ERROR,
2823                            "Decoding nal unit %d %d from hvcC failed\n",
2824                            type, i);
2825                     return ret;
2826                 }
2827                 bytestream2_skip(&gb, nalsize);
2828             }
2829         }
2830
2831         /* Now store right nal length size, that will be used to parse
2832          * all other nals */
2833         s->nal_length_size = nal_len_size;
2834     } else {
2835         s->is_nalff = 0;
2836         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
2837         if (ret < 0)
2838             return ret;
2839     }
2840     return 0;
2841 }
2842
2843 static av_cold int hevc_decode_init(AVCodecContext *avctx)
2844 {
2845     HEVCContext *s = avctx->priv_data;
2846     int ret;
2847
2848     ff_init_cabac_states();
2849
2850     avctx->internal->allocate_progress = 1;
2851
2852     ret = hevc_init_context(avctx);
2853     if (ret < 0)
2854         return ret;
2855
2856     s->enable_parallel_tiles = 0;
2857     s->picture_struct = 0;
2858
2859     if(avctx->active_thread_type & FF_THREAD_SLICE)
2860         s->threads_number = avctx->thread_count;
2861     else
2862         s->threads_number = 1;
2863
2864     if (avctx->extradata_size > 0 && avctx->extradata) {
2865         ret = hevc_decode_extradata(s);
2866         if (ret < 0) {
2867             hevc_decode_free(avctx);
2868             return ret;
2869         }
2870     }
2871
2872     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
2873             s->threads_type = FF_THREAD_FRAME;
2874         else
2875             s->threads_type = FF_THREAD_SLICE;
2876
2877     return 0;
2878 }
2879
2880 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
2881 {
2882     HEVCContext *s = avctx->priv_data;
2883     int ret;
2884
2885     memset(s, 0, sizeof(*s));
2886
2887     ret = hevc_init_context(avctx);
2888     if (ret < 0)
2889         return ret;
2890
2891     return 0;
2892 }
2893
2894 static void hevc_decode_flush(AVCodecContext *avctx)
2895 {
2896     HEVCContext *s = avctx->priv_data;
2897     ff_hevc_flush_dpb(s);
2898     s->max_ra = INT_MAX;
2899 }
2900
2901 #define OFFSET(x) offsetof(HEVCContext, x)
2902 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
2903 static const AVOption options[] = {
2904     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
2905         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
2906     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
2907         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
2908     { NULL },
2909 };
2910
2911 static const AVClass hevc_decoder_class = {
2912     .class_name = "HEVC decoder",
2913     .item_name  = av_default_item_name,
2914     .option     = options,
2915     .version    = LIBAVUTIL_VERSION_INT,
2916 };
2917
2918 AVCodec ff_hevc_decoder = {
2919     .name                  = "hevc",
2920     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
2921     .type                  = AVMEDIA_TYPE_VIDEO,
2922     .id                    = AV_CODEC_ID_HEVC,
2923     .priv_data_size        = sizeof(HEVCContext),
2924     .priv_class            = &hevc_decoder_class,
2925     .init                  = hevc_decode_init,
2926     .close                 = hevc_decode_free,
2927     .decode                = hevc_decode_frame,
2928     .flush                 = hevc_decode_flush,
2929     .update_thread_context = hevc_update_thread_context,
2930     .init_thread_copy      = hevc_init_thread_copy,
2931     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
2932                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
2933 };