]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc_filter.c
avformat/avio: Add Metacube support
[ffmpeg] / libavcodec / hevc_filter.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2013 Seppo Tomperi
6  * Copyright (C) 2013 Wassim Hamidouche
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 #include "libavutil/common.h"
26 #include "libavutil/internal.h"
27
28 #include "cabac_functions.h"
29 #include "hevcdec.h"
30
31 #include "bit_depth_template.c"
32
33 #define LUMA 0
34 #define CB 1
35 #define CR 2
36
37 static const uint8_t tctable[54] = {
38     0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0, 1, // QP  0...18
39     1, 1, 1, 1, 1, 1, 1,  1,  2,  2,  2,  2,  3,  3,  3,  3, 4, 4, 4, // QP 19...37
40     5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24           // QP 38...53
41 };
42
43 static const uint8_t betatable[52] = {
44      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  7,  8, // QP 0...18
45      9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, // QP 19...37
46     38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64                      // QP 38...51
47 };
48
49 static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset)
50 {
51     static const int qp_c[] = {
52         29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
53     };
54     int qp, qp_i, offset, idxt;
55
56     // slice qp offset is not used for deblocking
57     if (c_idx == 1)
58         offset = s->ps.pps->cb_qp_offset;
59     else
60         offset = s->ps.pps->cr_qp_offset;
61
62     qp_i = av_clip(qp_y + offset, 0, 57);
63     if (s->ps.sps->chroma_format_idc == 1) {
64         if (qp_i < 30)
65             qp = qp_i;
66         else if (qp_i > 43)
67             qp = qp_i - 6;
68         else
69             qp = qp_c[qp_i - 30];
70     } else {
71         qp = av_clip(qp_i, 0, 51);
72     }
73
74     idxt = av_clip(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53);
75     return tctable[idxt];
76 }
77
78 static int get_qPy_pred(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
79 {
80     HEVCLocalContext *lc     = s->HEVClc;
81     int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
82     int MinCuQpDeltaSizeMask = (1 << (s->ps.sps->log2_ctb_size -
83                                       s->ps.pps->diff_cu_qp_delta_depth)) - 1;
84     int xQgBase              = xBase - (xBase & MinCuQpDeltaSizeMask);
85     int yQgBase              = yBase - (yBase & MinCuQpDeltaSizeMask);
86     int min_cb_width         = s->ps.sps->min_cb_width;
87     int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
88     int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
89     int availableA           = (xBase   & ctb_size_mask) &&
90                                (xQgBase & ctb_size_mask);
91     int availableB           = (yBase   & ctb_size_mask) &&
92                                (yQgBase & ctb_size_mask);
93     int qPy_pred, qPy_a, qPy_b;
94
95     // qPy_pred
96     if (lc->first_qp_group || (!xQgBase && !yQgBase)) {
97         lc->first_qp_group = !lc->tu.is_cu_qp_delta_coded;
98         qPy_pred = s->sh.slice_qp;
99     } else {
100         qPy_pred = lc->qPy_pred;
101     }
102
103     // qPy_a
104     if (availableA == 0)
105         qPy_a = qPy_pred;
106     else
107         qPy_a = s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width];
108
109     // qPy_b
110     if (availableB == 0)
111         qPy_b = qPy_pred;
112     else
113         qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];
114
115     av_assert2(qPy_a >= -s->ps.sps->qp_bd_offset && qPy_a < 52);
116     av_assert2(qPy_b >= -s->ps.sps->qp_bd_offset && qPy_b < 52);
117
118     return (qPy_a + qPy_b + 1) >> 1;
119 }
120
121 void ff_hevc_set_qPy(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
122 {
123     int qp_y = get_qPy_pred(s, xBase, yBase, log2_cb_size);
124
125     if (s->HEVClc->tu.cu_qp_delta != 0) {
126         int off = s->ps.sps->qp_bd_offset;
127         s->HEVClc->qp_y = FFUMOD(qp_y + s->HEVClc->tu.cu_qp_delta + 52 + 2 * off,
128                                  52 + off) - off;
129     } else
130         s->HEVClc->qp_y = qp_y;
131 }
132
133 static int get_qPy(HEVCContext *s, int xC, int yC)
134 {
135     int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
136     int x                 = xC >> log2_min_cb_size;
137     int y                 = yC >> log2_min_cb_size;
138     return s->qp_y_tab[x + y * s->ps.sps->min_cb_width];
139 }
140
141 static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
142                      ptrdiff_t stride_dst, ptrdiff_t stride_src)
143 {
144 int i, j;
145
146     if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) {
147         for (i = 0; i < height; i++) {
148             for (j = 0; j < width; j+=8)
149                 AV_COPY64U(dst+j, src+j);
150             dst += stride_dst;
151             src += stride_src;
152         }
153     } else {
154         for (i = 0; i < height; i++) {
155             for (j = 0; j < width; j+=16)
156                 AV_COPY128(dst+j, src+j);
157             dst += stride_dst;
158             src += stride_src;
159         }
160     }
161 }
162
163 static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
164 {
165     if (pixel_shift)
166         *(uint16_t *)dst = *(uint16_t *)src;
167     else
168         *dst = *src;
169 }
170
171 static void copy_vert(uint8_t *dst, const uint8_t *src,
172                       int pixel_shift, int height,
173                       ptrdiff_t stride_dst, ptrdiff_t stride_src)
174 {
175     int i;
176     if (pixel_shift == 0) {
177         for (i = 0; i < height; i++) {
178             *dst = *src;
179             dst += stride_dst;
180             src += stride_src;
181         }
182     } else {
183         for (i = 0; i < height; i++) {
184             *(uint16_t *)dst = *(uint16_t *)src;
185             dst += stride_dst;
186             src += stride_src;
187         }
188     }
189 }
190
191 static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
192                            ptrdiff_t stride_src, int x, int y, int width, int height,
193                            int c_idx, int x_ctb, int y_ctb)
194 {
195     int sh = s->ps.sps->pixel_shift;
196     int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
197     int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
198
199     /* copy horizontal edges */
200     memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
201         src, width << sh);
202     memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
203         src + stride_src * (height - 1), width << sh);
204
205     /* copy vertical edges */
206     copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
207
208     copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
209 }
210
211 static void restore_tqb_pixels(HEVCContext *s,
212                                uint8_t *src1, const uint8_t *dst1,
213                                ptrdiff_t stride_src, ptrdiff_t stride_dst,
214                                int x0, int y0, int width, int height, int c_idx)
215 {
216     if ( s->ps.pps->transquant_bypass_enable_flag ||
217             (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
218         int x, y;
219         int min_pu_size  = 1 << s->ps.sps->log2_min_pu_size;
220         int hshift       = s->ps.sps->hshift[c_idx];
221         int vshift       = s->ps.sps->vshift[c_idx];
222         int x_min        = ((x0         ) >> s->ps.sps->log2_min_pu_size);
223         int y_min        = ((y0         ) >> s->ps.sps->log2_min_pu_size);
224         int x_max        = ((x0 + width ) >> s->ps.sps->log2_min_pu_size);
225         int y_max        = ((y0 + height) >> s->ps.sps->log2_min_pu_size);
226         int len          = (min_pu_size >> hshift) << s->ps.sps->pixel_shift;
227         for (y = y_min; y < y_max; y++) {
228             for (x = x_min; x < x_max; x++) {
229                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
230                     int n;
231                     uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
232                     const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
233                     for (n = 0; n < (min_pu_size >> vshift); n++) {
234                         memcpy(src, dst, len);
235                         src += stride_src;
236                         dst += stride_dst;
237                     }
238                 }
239             }
240         }
241     }
242 }
243
244 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
245
246 static void sao_filter_CTB(HEVCContext *s, int x, int y)
247 {
248     static const uint8_t sao_tab[8] = { 0, 1, 2, 2, 3, 3, 4, 4 };
249     HEVCLocalContext *lc = s->HEVClc;
250     int c_idx;
251     int edges[4];  // 0 left 1 top 2 right 3 bottom
252     int x_ctb                = x >> s->ps.sps->log2_ctb_size;
253     int y_ctb                = y >> s->ps.sps->log2_ctb_size;
254     int ctb_addr_rs          = y_ctb * s->ps.sps->ctb_width + x_ctb;
255     int ctb_addr_ts          = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
256     SAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
257     // flags indicating unfilterable edges
258     uint8_t vert_edge[]      = { 0, 0 };
259     uint8_t horiz_edge[]     = { 0, 0 };
260     uint8_t diag_edge[]      = { 0, 0, 0, 0 };
261     uint8_t lfase            = CTB(s->filter_slice_edges, x_ctb, y_ctb);
262     uint8_t no_tile_filter   = s->ps.pps->tiles_enabled_flag &&
263                                !s->ps.pps->loop_filter_across_tiles_enabled_flag;
264     uint8_t restore          = no_tile_filter || !lfase;
265     uint8_t left_tile_edge   = 0;
266     uint8_t right_tile_edge  = 0;
267     uint8_t up_tile_edge     = 0;
268     uint8_t bottom_tile_edge = 0;
269
270     edges[0]   = x_ctb == 0;
271     edges[1]   = y_ctb == 0;
272     edges[2]   = x_ctb == s->ps.sps->ctb_width  - 1;
273     edges[3]   = y_ctb == s->ps.sps->ctb_height - 1;
274
275     if (restore) {
276         if (!edges[0]) {
277             left_tile_edge  = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
278             vert_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
279         }
280         if (!edges[2]) {
281             right_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
282             vert_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge;
283         }
284         if (!edges[1]) {
285             up_tile_edge     = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]];
286             horiz_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
287         }
288         if (!edges[3]) {
289             bottom_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->ps.sps->ctb_width]];
290             horiz_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge;
291         }
292         if (!edges[0] && !edges[1]) {
293             diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
294         }
295         if (!edges[1] && !edges[2]) {
296             diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge;
297         }
298         if (!edges[2] && !edges[3]) {
299             diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge;
300         }
301         if (!edges[0] && !edges[3]) {
302             diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge;
303         }
304     }
305
306     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
307         int x0       = x >> s->ps.sps->hshift[c_idx];
308         int y0       = y >> s->ps.sps->vshift[c_idx];
309         ptrdiff_t stride_src = s->frame->linesize[c_idx];
310         int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx];
311         int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx];
312         int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> s->ps.sps->hshift[c_idx]) - x0);
313         int height   = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0);
314         int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
315         uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)];
316         ptrdiff_t stride_dst;
317         uint8_t *dst;
318
319         switch (sao->type_idx[c_idx]) {
320         case SAO_BAND:
321             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
322                            x_ctb, y_ctb);
323             if (s->ps.pps->transquant_bypass_enable_flag ||
324                 (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
325             dst = lc->edge_emu_buffer;
326             stride_dst = 2*MAX_PB_SIZE;
327             copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src);
328             s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
329                                             sao->offset_val[c_idx], sao->band_position[c_idx],
330                                             width, height);
331             restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
332                                x, y, width, height, c_idx);
333             } else {
334             s->hevcdsp.sao_band_filter[tab](src, src, stride_src, stride_src,
335                                             sao->offset_val[c_idx], sao->band_position[c_idx],
336                                             width, height);
337             }
338             sao->type_idx[c_idx] = SAO_APPLIED;
339             break;
340         case SAO_EDGE:
341         {
342             int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
343             int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
344             int left_edge = edges[0];
345             int top_edge = edges[1];
346             int right_edge = edges[2];
347             int bottom_edge = edges[3];
348             int sh = s->ps.sps->pixel_shift;
349             int left_pixels, right_pixels;
350
351             stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
352             dst = lc->edge_emu_buffer + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE;
353
354             if (!top_edge) {
355                 int left = 1 - left_edge;
356                 int right = 1 - right_edge;
357                 const uint8_t *src1[2];
358                 uint8_t *dst1;
359                 int src_idx, pos;
360
361                 dst1 = dst - stride_dst - (left << sh);
362                 src1[0] = src - stride_src - (left << sh);
363                 src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
364                 pos = 0;
365                 if (left) {
366                     src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
367                                SAO_APPLIED);
368                     copy_pixel(dst1, src1[src_idx], sh);
369                     pos += (1 << sh);
370                 }
371                 src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
372                            SAO_APPLIED);
373                 memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
374                 if (right) {
375                     pos += width << sh;
376                     src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
377                                SAO_APPLIED);
378                     copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
379                 }
380             }
381             if (!bottom_edge) {
382                 int left = 1 - left_edge;
383                 int right = 1 - right_edge;
384                 const uint8_t *src1[2];
385                 uint8_t *dst1;
386                 int src_idx, pos;
387
388                 dst1 = dst + height * stride_dst - (left << sh);
389                 src1[0] = src + height * stride_src - (left << sh);
390                 src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
391                 pos = 0;
392                 if (left) {
393                     src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
394                                SAO_APPLIED);
395                     copy_pixel(dst1, src1[src_idx], sh);
396                     pos += (1 << sh);
397                 }
398                 src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
399                            SAO_APPLIED);
400                 memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
401                 if (right) {
402                     pos += width << sh;
403                     src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
404                                SAO_APPLIED);
405                     copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
406                 }
407             }
408             left_pixels = 0;
409             if (!left_edge) {
410                 if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
411                     copy_vert(dst - (1 << sh),
412                               s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
413                               sh, height, stride_dst, 1 << sh);
414                 } else {
415                     left_pixels = 1;
416                 }
417             }
418             right_pixels = 0;
419             if (!right_edge) {
420                 if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
421                     copy_vert(dst + (width << sh),
422                               s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
423                               sh, height, stride_dst, 1 << sh);
424                 } else {
425                     right_pixels = 1;
426                 }
427             }
428
429             copy_CTB(dst - (left_pixels << sh),
430                      src - (left_pixels << sh),
431                      (width + left_pixels + right_pixels) << sh,
432                      height, stride_dst, stride_src);
433
434             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
435                            x_ctb, y_ctb);
436             s->hevcdsp.sao_edge_filter[tab](src, dst, stride_src, sao->offset_val[c_idx],
437                                             sao->eo_class[c_idx], width, height);
438             s->hevcdsp.sao_edge_restore[restore](src, dst,
439                                                 stride_src, stride_dst,
440                                                 sao,
441                                                 edges, width,
442                                                 height, c_idx,
443                                                 vert_edge,
444                                                 horiz_edge,
445                                                 diag_edge);
446             restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
447                                x, y, width, height, c_idx);
448             sao->type_idx[c_idx] = SAO_APPLIED;
449             break;
450         }
451         }
452     }
453 }
454
455 static int get_pcm(HEVCContext *s, int x, int y)
456 {
457     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
458     int x_pu, y_pu;
459
460     if (x < 0 || y < 0)
461         return 2;
462
463     x_pu = x >> log2_min_pu_size;
464     y_pu = y >> log2_min_pu_size;
465
466     if (x_pu >= s->ps.sps->min_pu_width || y_pu >= s->ps.sps->min_pu_height)
467         return 2;
468     return s->is_pcm[y_pu * s->ps.sps->min_pu_width + x_pu];
469 }
470
471 #define TC_CALC(qp, bs)                                                 \
472     tctable[av_clip((qp) + DEFAULT_INTRA_TC_OFFSET * ((bs) - 1) +       \
473                     (tc_offset & -2),                                   \
474                     0, MAX_QP + DEFAULT_INTRA_TC_OFFSET)]
475
476 static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
477 {
478     uint8_t *src;
479     int x, y;
480     int chroma, beta;
481     int32_t c_tc[2], tc[2];
482     uint8_t no_p[2] = { 0 };
483     uint8_t no_q[2] = { 0 };
484
485     int log2_ctb_size = s->ps.sps->log2_ctb_size;
486     int x_end, x_end2, y_end;
487     int ctb_size        = 1 << log2_ctb_size;
488     int ctb             = (x0 >> log2_ctb_size) +
489                           (y0 >> log2_ctb_size) * s->ps.sps->ctb_width;
490     int cur_tc_offset   = s->deblock[ctb].tc_offset;
491     int cur_beta_offset = s->deblock[ctb].beta_offset;
492     int left_tc_offset, left_beta_offset;
493     int tc_offset, beta_offset;
494     int pcmf = (s->ps.sps->pcm_enabled_flag &&
495                 s->ps.sps->pcm.loop_filter_disable_flag) ||
496                s->ps.pps->transquant_bypass_enable_flag;
497
498     if (x0) {
499         left_tc_offset   = s->deblock[ctb - 1].tc_offset;
500         left_beta_offset = s->deblock[ctb - 1].beta_offset;
501     } else {
502         left_tc_offset   = 0;
503         left_beta_offset = 0;
504     }
505
506     x_end = x0 + ctb_size;
507     if (x_end > s->ps.sps->width)
508         x_end = s->ps.sps->width;
509     y_end = y0 + ctb_size;
510     if (y_end > s->ps.sps->height)
511         y_end = s->ps.sps->height;
512
513     tc_offset   = cur_tc_offset;
514     beta_offset = cur_beta_offset;
515
516     x_end2 = x_end;
517     if (x_end2 != s->ps.sps->width)
518         x_end2 -= 8;
519     for (y = y0; y < y_end; y += 8) {
520         // vertical filtering luma
521         for (x = x0 ? x0 : 8; x < x_end; x += 8) {
522             const int bs0 = s->vertical_bs[(x +  y      * s->bs_width) >> 2];
523             const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
524             if (bs0 || bs1) {
525                 const int qp = (get_qPy(s, x - 1, y)     + get_qPy(s, x, y)     + 1) >> 1;
526
527                 beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
528
529                 tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
530                 tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
531                 src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
532                 if (pcmf) {
533                     no_p[0] = get_pcm(s, x - 1, y);
534                     no_p[1] = get_pcm(s, x - 1, y + 4);
535                     no_q[0] = get_pcm(s, x, y);
536                     no_q[1] = get_pcm(s, x, y + 4);
537                     s->hevcdsp.hevc_v_loop_filter_luma_c(src,
538                                                          s->frame->linesize[LUMA],
539                                                          beta, tc, no_p, no_q);
540                 } else
541                     s->hevcdsp.hevc_v_loop_filter_luma(src,
542                                                        s->frame->linesize[LUMA],
543                                                        beta, tc, no_p, no_q);
544             }
545         }
546
547         if(!y)
548              continue;
549
550         // horizontal filtering luma
551         for (x = x0 ? x0 - 8 : 0; x < x_end2; x += 8) {
552             const int bs0 = s->horizontal_bs[( x      + y * s->bs_width) >> 2];
553             const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
554             if (bs0 || bs1) {
555                 const int qp = (get_qPy(s, x, y - 1)     + get_qPy(s, x, y)     + 1) >> 1;
556
557                 tc_offset   = x >= x0 ? cur_tc_offset : left_tc_offset;
558                 beta_offset = x >= x0 ? cur_beta_offset : left_beta_offset;
559
560                 beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
561                 tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
562                 tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
563                 src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
564                 if (pcmf) {
565                     no_p[0] = get_pcm(s, x, y - 1);
566                     no_p[1] = get_pcm(s, x + 4, y - 1);
567                     no_q[0] = get_pcm(s, x, y);
568                     no_q[1] = get_pcm(s, x + 4, y);
569                     s->hevcdsp.hevc_h_loop_filter_luma_c(src,
570                                                          s->frame->linesize[LUMA],
571                                                          beta, tc, no_p, no_q);
572                 } else
573                     s->hevcdsp.hevc_h_loop_filter_luma(src,
574                                                        s->frame->linesize[LUMA],
575                                                        beta, tc, no_p, no_q);
576             }
577         }
578     }
579
580     if (s->ps.sps->chroma_format_idc) {
581         for (chroma = 1; chroma <= 2; chroma++) {
582             int h = 1 << s->ps.sps->hshift[chroma];
583             int v = 1 << s->ps.sps->vshift[chroma];
584
585             // vertical filtering chroma
586             for (y = y0; y < y_end; y += (8 * v)) {
587                 for (x = x0 ? x0 : 8 * h; x < x_end; x += (8 * h)) {
588                     const int bs0 = s->vertical_bs[(x +  y            * s->bs_width) >> 2];
589                     const int bs1 = s->vertical_bs[(x + (y + (4 * v)) * s->bs_width) >> 2];
590
591                     if ((bs0 == 2) || (bs1 == 2)) {
592                         const int qp0 = (get_qPy(s, x - 1, y)           + get_qPy(s, x, y)           + 1) >> 1;
593                         const int qp1 = (get_qPy(s, x - 1, y + (4 * v)) + get_qPy(s, x, y + (4 * v)) + 1) >> 1;
594
595                         c_tc[0] = (bs0 == 2) ? chroma_tc(s, qp0, chroma, tc_offset) : 0;
596                         c_tc[1] = (bs1 == 2) ? chroma_tc(s, qp1, chroma, tc_offset) : 0;
597                         src       = &s->frame->data[chroma][(y >> s->ps.sps->vshift[chroma]) * s->frame->linesize[chroma] + ((x >> s->ps.sps->hshift[chroma]) << s->ps.sps->pixel_shift)];
598                         if (pcmf) {
599                             no_p[0] = get_pcm(s, x - 1, y);
600                             no_p[1] = get_pcm(s, x - 1, y + (4 * v));
601                             no_q[0] = get_pcm(s, x, y);
602                             no_q[1] = get_pcm(s, x, y + (4 * v));
603                             s->hevcdsp.hevc_v_loop_filter_chroma_c(src,
604                                                                    s->frame->linesize[chroma],
605                                                                    c_tc, no_p, no_q);
606                         } else
607                             s->hevcdsp.hevc_v_loop_filter_chroma(src,
608                                                                  s->frame->linesize[chroma],
609                                                                  c_tc, no_p, no_q);
610                     }
611                 }
612
613                 if(!y)
614                     continue;
615
616                 // horizontal filtering chroma
617                 tc_offset = x0 ? left_tc_offset : cur_tc_offset;
618                 x_end2 = x_end;
619                 if (x_end != s->ps.sps->width)
620                     x_end2 = x_end - 8 * h;
621                 for (x = x0 ? x0 - 8 * h : 0; x < x_end2; x += (8 * h)) {
622                     const int bs0 = s->horizontal_bs[( x          + y * s->bs_width) >> 2];
623                     const int bs1 = s->horizontal_bs[((x + 4 * h) + y * s->bs_width) >> 2];
624                     if ((bs0 == 2) || (bs1 == 2)) {
625                         const int qp0 = bs0 == 2 ? (get_qPy(s, x,           y - 1) + get_qPy(s, x,           y) + 1) >> 1 : 0;
626                         const int qp1 = bs1 == 2 ? (get_qPy(s, x + (4 * h), y - 1) + get_qPy(s, x + (4 * h), y) + 1) >> 1 : 0;
627
628                         c_tc[0]   = bs0 == 2 ? chroma_tc(s, qp0, chroma, tc_offset)     : 0;
629                         c_tc[1]   = bs1 == 2 ? chroma_tc(s, qp1, chroma, cur_tc_offset) : 0;
630                         src       = &s->frame->data[chroma][(y >> s->ps.sps->vshift[1]) * s->frame->linesize[chroma] + ((x >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
631                         if (pcmf) {
632                             no_p[0] = get_pcm(s, x,           y - 1);
633                             no_p[1] = get_pcm(s, x + (4 * h), y - 1);
634                             no_q[0] = get_pcm(s, x,           y);
635                             no_q[1] = get_pcm(s, x + (4 * h), y);
636                             s->hevcdsp.hevc_h_loop_filter_chroma_c(src,
637                                                                    s->frame->linesize[chroma],
638                                                                    c_tc, no_p, no_q);
639                         } else
640                             s->hevcdsp.hevc_h_loop_filter_chroma(src,
641                                                                  s->frame->linesize[chroma],
642                                                                  c_tc, no_p, no_q);
643                     }
644                 }
645             }
646         }
647     }
648 }
649
650 static int boundary_strength(HEVCContext *s, MvField *curr, MvField *neigh,
651                              RefPicList *neigh_refPicList)
652 {
653     if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
654         // same L0 and L1
655         if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]]  &&
656             s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] &&
657             neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) {
658             if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
659                  FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) &&
660                 (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
661                  FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4))
662                 return 1;
663             else
664                 return 0;
665         } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
666                    neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
667             if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
668                 FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4)
669                 return 1;
670             else
671                 return 0;
672         } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
673                    neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
674             if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
675                 FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)
676                 return 1;
677             else
678                 return 0;
679         } else {
680             return 1;
681         }
682     } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV
683         Mv A, B;
684         int ref_A, ref_B;
685
686         if (curr->pred_flag & 1) {
687             A     = curr->mv[0];
688             ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]];
689         } else {
690             A     = curr->mv[1];
691             ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]];
692         }
693
694         if (neigh->pred_flag & 1) {
695             B     = neigh->mv[0];
696             ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]];
697         } else {
698             B     = neigh->mv[1];
699             ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]];
700         }
701
702         if (ref_A == ref_B) {
703             if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4)
704                 return 1;
705             else
706                 return 0;
707         } else
708             return 1;
709     }
710
711     return 1;
712 }
713
714 void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
715                                            int log2_trafo_size)
716 {
717     HEVCLocalContext *lc = s->HEVClc;
718     MvField *tab_mvf     = s->ref->tab_mvf;
719     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
720     int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
721     int min_pu_width     = s->ps.sps->min_pu_width;
722     int min_tu_width     = s->ps.sps->min_tb_width;
723     int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
724                            (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
725     int boundary_upper, boundary_left;
726     int i, j, bs;
727
728     boundary_upper = y0 > 0 && !(y0 & 7);
729     if (boundary_upper &&
730         ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
731           lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
732           (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
733          (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
734           lc->boundary_flags & BOUNDARY_UPPER_TILE &&
735           (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
736         boundary_upper = 0;
737
738     if (boundary_upper) {
739         RefPicList *rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ?
740                               ff_hevc_get_ref_list(s, s->ref, x0, y0 - 1) :
741                               s->ref->refPicList;
742         int yp_pu = (y0 - 1) >> log2_min_pu_size;
743         int yq_pu =  y0      >> log2_min_pu_size;
744         int yp_tu = (y0 - 1) >> log2_min_tu_size;
745         int yq_tu =  y0      >> log2_min_tu_size;
746
747             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
748                 int x_pu = (x0 + i) >> log2_min_pu_size;
749                 int x_tu = (x0 + i) >> log2_min_tu_size;
750                 MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
751                 MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
752                 uint8_t top_cbf_luma  = s->cbf_luma[yp_tu * min_tu_width + x_tu];
753                 uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu];
754
755                 if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA)
756                     bs = 2;
757                 else if (curr_cbf_luma || top_cbf_luma)
758                     bs = 1;
759                 else
760                     bs = boundary_strength(s, curr, top, rpl_top);
761                 s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs;
762             }
763     }
764
765     // bs for vertical TU boundaries
766     boundary_left = x0 > 0 && !(x0 & 7);
767     if (boundary_left &&
768         ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
769           lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
770           (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
771          (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
772           lc->boundary_flags & BOUNDARY_LEFT_TILE &&
773           (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
774         boundary_left = 0;
775
776     if (boundary_left) {
777         RefPicList *rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ?
778                                ff_hevc_get_ref_list(s, s->ref, x0 - 1, y0) :
779                                s->ref->refPicList;
780         int xp_pu = (x0 - 1) >> log2_min_pu_size;
781         int xq_pu =  x0      >> log2_min_pu_size;
782         int xp_tu = (x0 - 1) >> log2_min_tu_size;
783         int xq_tu =  x0      >> log2_min_tu_size;
784
785             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
786                 int y_pu      = (y0 + i) >> log2_min_pu_size;
787                 int y_tu      = (y0 + i) >> log2_min_tu_size;
788                 MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
789                 MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
790                 uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu];
791                 uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu];
792
793                 if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA)
794                     bs = 2;
795                 else if (curr_cbf_luma || left_cbf_luma)
796                     bs = 1;
797                 else
798                     bs = boundary_strength(s, curr, left, rpl_left);
799                 s->vertical_bs[(x0 + (y0 + i) * s->bs_width) >> 2] = bs;
800             }
801     }
802
803     if (log2_trafo_size > log2_min_pu_size && !is_intra) {
804         RefPicList *rpl = s->ref->refPicList;
805
806         // bs for TU internal horizontal PU boundaries
807         for (j = 8; j < (1 << log2_trafo_size); j += 8) {
808             int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
809             int yq_pu = (y0 + j)     >> log2_min_pu_size;
810
811             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
812                 int x_pu = (x0 + i) >> log2_min_pu_size;
813                 MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
814                 MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
815
816                 bs = boundary_strength(s, curr, top, rpl);
817                 s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
818             }
819         }
820
821         // bs for TU internal vertical PU boundaries
822         for (j = 0; j < (1 << log2_trafo_size); j += 4) {
823             int y_pu = (y0 + j) >> log2_min_pu_size;
824
825             for (i = 8; i < (1 << log2_trafo_size); i += 8) {
826                 int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
827                 int xq_pu = (x0 + i)     >> log2_min_pu_size;
828                 MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
829                 MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
830
831                 bs = boundary_strength(s, curr, left, rpl);
832                 s->vertical_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
833             }
834         }
835     }
836 }
837
838 #undef LUMA
839 #undef CB
840 #undef CR
841
842 void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size)
843 {
844     int x_end = x >= s->ps.sps->width  - ctb_size;
845     int skip = 0;
846     if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
847         (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && !IS_IDR(s)) ||
848         (s->avctx->skip_loop_filter >= AVDISCARD_NONINTRA &&
849          s->sh.slice_type != HEVC_SLICE_I) ||
850         (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR &&
851          s->sh.slice_type == HEVC_SLICE_B) ||
852         (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
853         ff_hevc_nal_is_nonref(s->nal_unit_type)))
854         skip = 1;
855
856     if (!skip)
857         deblocking_filter_CTB(s, x, y);
858     if (s->ps.sps->sao_enabled && !skip) {
859         int y_end = y >= s->ps.sps->height - ctb_size;
860         if (y && x)
861             sao_filter_CTB(s, x - ctb_size, y - ctb_size);
862         if (x && y_end)
863             sao_filter_CTB(s, x - ctb_size, y);
864         if (y && x_end) {
865             sao_filter_CTB(s, x, y - ctb_size);
866             if (s->threads_type & FF_THREAD_FRAME )
867                 ff_thread_report_progress(&s->ref->tf, y, 0);
868         }
869         if (x_end && y_end) {
870             sao_filter_CTB(s, x , y);
871             if (s->threads_type & FF_THREAD_FRAME )
872                 ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0);
873         }
874     } else if (s->threads_type & FF_THREAD_FRAME && x_end)
875         ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0);
876 }
877
878 void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size)
879 {
880     int x_end = x_ctb >= s->ps.sps->width  - ctb_size;
881     int y_end = y_ctb >= s->ps.sps->height - ctb_size;
882     if (y_ctb && x_ctb)
883         ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size);
884     if (y_ctb && x_end)
885         ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size, ctb_size);
886     if (x_ctb && y_end)
887         ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb, ctb_size);
888 }