4 * Copyright (C) 2012 - 2013 Guillaume Martres
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/pixdesc.h"
25 #include "bit_depth_template.c"
28 #define POS(x, y) src[(x) + stride * (y)]
30 static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
31 int log2_size, int c_idx)
34 ((x) >> s->ps.sps->log2_min_pu_size)
36 (s->ref->tab_mvf[(x) + (y) * min_pu_width])
37 #define MVF_PU(x, y) \
38 MVF(PU(x0 + ((x) * (1 << hshift))), PU(y0 + ((y) * (1 << vshift))))
39 #define IS_INTRA(x, y) \
40 (MVF_PU(x, y).pred_flag == PF_INTRA)
41 #define MIN_TB_ADDR_ZS(x, y) \
42 s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
43 #define EXTEND(ptr, val, len) \
45 pixel4 pix = PIXEL_SPLAT_X4(val); \
46 for (i = 0; i < (len); i += 4) \
47 AV_WN4P(ptr + i, pix); \
50 #define EXTEND_RIGHT_CIP(ptr, start, length) \
51 for (i = start; i < (start) + (length); i += 4) \
52 if (!IS_INTRA(i, -1)) \
53 AV_WN4P(&ptr[i], a); \
55 a = PIXEL_SPLAT_X4(ptr[i+3])
56 #define EXTEND_LEFT_CIP(ptr, start, length) \
57 for (i = start; i > (start) - (length); i--) \
58 if (!IS_INTRA(i - 1, -1)) \
60 #define EXTEND_UP_CIP(ptr, start, length) \
61 for (i = (start); i > (start) - (length); i -= 4) \
62 if (!IS_INTRA(-1, i - 3)) \
63 AV_WN4P(&ptr[i - 3], a); \
65 a = PIXEL_SPLAT_X4(ptr[i - 3])
66 #define EXTEND_DOWN_CIP(ptr, start, length) \
67 for (i = start; i < (start) + (length); i += 4) \
68 if (!IS_INTRA(-1, i)) \
69 AV_WN4P(&ptr[i], a); \
71 a = PIXEL_SPLAT_X4(ptr[i + 3])
73 HEVCLocalContext *lc = s->HEVClc;
75 int hshift = s->ps.sps->hshift[c_idx];
76 int vshift = s->ps.sps->vshift[c_idx];
77 int size = (1 << log2_size);
78 int size_in_luma_h = size << hshift;
79 int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
80 int size_in_luma_v = size << vshift;
81 int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
84 int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
85 int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
87 int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
89 ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
90 pixel *src = (pixel*)s->frame->data[c_idx] + x + y * stride;
92 int min_pu_width = s->ps.sps->min_pu_width;
94 enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
95 lc->tu.intra_pred_mode;
97 pixel left_array[2 * MAX_TB_SIZE + 1];
98 pixel filtered_left_array[2 * MAX_TB_SIZE + 1];
99 pixel top_array[2 * MAX_TB_SIZE + 1];
100 pixel filtered_top_array[2 * MAX_TB_SIZE + 1];
102 pixel *left = left_array + 1;
103 pixel *top = top_array + 1;
104 pixel *filtered_left = filtered_left_array + 1;
105 pixel *filtered_top = filtered_top_array + 1;
106 int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->ps.sps->tb_mask);
107 int cand_left = lc->na.cand_left;
108 int cand_up_left = lc->na.cand_up_left;
109 int cand_up = lc->na.cand_up;
110 int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask, y_tb - 1);
112 int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->ps.sps->height) -
113 (y0 + size_in_luma_v)) >> vshift;
114 int top_right_size = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) -
115 (x0 + size_in_luma_h)) >> hshift;
117 if (s->ps.pps->constrained_intra_pred_flag == 1) {
118 int size_in_luma_pu_v = PU(size_in_luma_v);
119 int size_in_luma_pu_h = PU(size_in_luma_h);
120 int on_pu_edge_x = !av_mod_uintp2(x0, s->ps.sps->log2_min_pu_size);
121 int on_pu_edge_y = !av_mod_uintp2(y0, s->ps.sps->log2_min_pu_size);
122 if (!size_in_luma_pu_h)
124 if (cand_bottom_left == 1 && on_pu_edge_x) {
125 int x_left_pu = PU(x0 - 1);
126 int y_bottom_pu = PU(y0 + size_in_luma_v);
127 int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_bottom_pu);
128 cand_bottom_left = 0;
129 for (i = 0; i < max; i += 2)
130 cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
132 if (cand_left == 1 && on_pu_edge_x) {
133 int x_left_pu = PU(x0 - 1);
134 int y_left_pu = PU(y0);
135 int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_left_pu);
137 for (i = 0; i < max; i += 2)
138 cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
140 if (cand_up_left == 1) {
141 int x_left_pu = PU(x0 - 1);
142 int y_top_pu = PU(y0 - 1);
143 cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA;
145 if (cand_up == 1 && on_pu_edge_y) {
146 int x_top_pu = PU(x0);
147 int y_top_pu = PU(y0 - 1);
148 int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_top_pu);
150 for (i = 0; i < max; i += 2)
151 cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
153 if (cand_up_right == 1 && on_pu_edge_y) {
154 int y_top_pu = PU(y0 - 1);
155 int x_right_pu = PU(x0 + size_in_luma_h);
156 int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_right_pu);
158 for (i = 0; i < max; i += 2)
159 cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
161 memset(left, 128, 2 * MAX_TB_SIZE*sizeof(pixel));
162 memset(top , 128, 2 * MAX_TB_SIZE*sizeof(pixel));
166 left[-1] = POS(-1, -1);
170 memcpy(top, src - stride, size * sizeof(pixel));
172 memcpy(top + size, src - stride + size, size * sizeof(pixel));
173 EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1),
174 size - top_right_size);
177 for (i = 0; i < size; i++)
178 left[i] = POS(-1, i);
179 if (cand_bottom_left) {
180 for (i = size; i < size + bottom_left_size; i++)
181 left[i] = POS(-1, i);
182 EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
183 size - bottom_left_size);
186 if (s->ps.pps->constrained_intra_pred_flag == 1) {
187 if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
188 int size_max_x = x0 + ((2 * size) << hshift) < s->ps.sps->width ?
189 2 * size : (s->ps.sps->width - x0) >> hshift;
190 int size_max_y = y0 + ((2 * size) << vshift) < s->ps.sps->height ?
191 2 * size : (s->ps.sps->height - y0) >> vshift;
192 int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
193 if (!cand_up_right) {
194 size_max_x = x0 + ((size) << hshift) < s->ps.sps->width ?
195 size : (s->ps.sps->width - x0) >> hshift;
197 if (!cand_bottom_left) {
198 size_max_y = y0 + (( size) << vshift) < s->ps.sps->height ?
199 size : (s->ps.sps->height - y0) >> vshift;
201 if (cand_bottom_left || cand_left || cand_up_left) {
202 while (j > -1 && !IS_INTRA(-1, j))
204 if (!IS_INTRA(-1, j)) {
206 while (j < size_max_x && !IS_INTRA(j, -1))
208 EXTEND_LEFT_CIP(top, j, j + 1);
213 while (j < size_max_x && !IS_INTRA(j, -1))
217 EXTEND_LEFT_CIP(top, j, j + 1);
219 EXTEND_LEFT_CIP(top, j, j);
225 if (cand_bottom_left || cand_left) {
226 a = PIXEL_SPLAT_X4(left[-1]);
227 EXTEND_DOWN_CIP(left, 0, size_max_y);
230 EXTEND(left, left[-1], size);
231 if (!cand_bottom_left)
232 EXTEND(left + size, left[size - 1], size);
233 if (x0 != 0 && y0 != 0) {
234 a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
235 EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
236 if (!IS_INTRA(-1, - 1))
238 } else if (x0 == 0) {
239 EXTEND(left, 0, size_max_y);
241 a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
242 EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
246 a = PIXEL_SPLAT_X4(left[-1]);
247 EXTEND_RIGHT_CIP(top, 0, size_max_x);
251 // Infer the unavailable samples
252 if (!cand_bottom_left) {
254 EXTEND(left + size, left[size - 1], size);
255 } else if (cand_up_left) {
256 EXTEND(left, left[-1], 2 * size);
258 } else if (cand_up) {
260 EXTEND(left, left[-1], 2 * size);
263 } else if (cand_up_right) {
264 EXTEND(top, top[size], size);
265 left[-1] = top[size];
266 EXTEND(left, left[-1], 2 * size);
270 } else { // No samples available
271 left[-1] = (1 << (BIT_DEPTH - 1));
272 EXTEND(top, left[-1], 2 * size);
273 EXTEND(left, left[-1], 2 * size);
278 EXTEND(left, left[size], size);
283 EXTEND(top, left[-1], size);
285 EXTEND(top + size, top[size - 1], size);
290 if (!s->ps.sps->intra_smoothing_disabled_flag && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) {
291 if (mode != INTRA_DC && size != 4){
292 int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
293 int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)),
294 FFABS((int)(mode - 10U)));
295 if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
296 int threshold = 1 << (BIT_DEPTH - 5);
297 if (s->ps.sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 &&
299 FFABS(top[-1] + top[63] - 2 * top[31]) < threshold &&
300 FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
301 // We can't just overwrite values in top because it could be
302 // a pointer into src
303 filtered_top[-1] = top[-1];
304 filtered_top[63] = top[63];
305 for (i = 0; i < 63; i++)
306 filtered_top[i] = ((64 - (i + 1)) * top[-1] +
307 (i + 1) * top[63] + 32) >> 6;
308 for (i = 0; i < 63; i++)
309 left[i] = ((64 - (i + 1)) * left[-1] +
310 (i + 1) * left[63] + 32) >> 6;
313 filtered_left[2 * size - 1] = left[2 * size - 1];
314 filtered_top[2 * size - 1] = top[2 * size - 1];
315 for (i = 2 * size - 2; i >= 0; i--)
316 filtered_left[i] = (left[i + 1] + 2 * left[i] +
317 left[i - 1] + 2) >> 2;
319 filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
320 for (i = 2 * size - 2; i >= 0; i--)
321 filtered_top[i] = (top[i + 1] + 2 * top[i] +
322 top[i - 1] + 2) >> 2;
323 left = filtered_left;
332 s->hpc.pred_planar[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
333 (uint8_t *)left, stride);
336 s->hpc.pred_dc((uint8_t *)src, (uint8_t *)top,
337 (uint8_t *)left, stride, log2_size, c_idx);
340 s->hpc.pred_angular[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
341 (uint8_t *)left, stride, c_idx,
347 #define INTRA_PRED(size) \
348 static void FUNC(intra_pred_ ## size)(HEVCContext *s, int x0, int y0, int c_idx) \
350 FUNC(intra_pred)(s, x0, y0, size, c_idx); \
360 static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top,
361 const uint8_t *_left, ptrdiff_t stride,
365 pixel *src = (pixel *)_src;
366 const pixel *top = (const pixel *)_top;
367 const pixel *left = (const pixel *)_left;
368 int size = 1 << trafo_size;
369 for (y = 0; y < size; y++)
370 for (x = 0; x < size; x++)
371 POS(x, y) = ((size - 1 - x) * left[y] + (x + 1) * top[size] +
372 (size - 1 - y) * top[x] + (y + 1) * left[size] + size) >> (trafo_size + 1);
375 #define PRED_PLANAR(size)\
376 static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top, \
377 const uint8_t *left, ptrdiff_t stride) \
379 FUNC(pred_planar)(src, top, left, stride, size + 2); \
389 static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
390 const uint8_t *_left,
391 ptrdiff_t stride, int log2_size, int c_idx)
394 int size = (1 << log2_size);
395 pixel *src = (pixel *)_src;
396 const pixel *top = (const pixel *)_top;
397 const pixel *left = (const pixel *)_left;
400 for (i = 0; i < size; i++)
401 dc += left[i] + top[i];
403 dc >>= log2_size + 1;
405 a = PIXEL_SPLAT_X4(dc);
407 for (i = 0; i < size; i++)
408 for (j = 0; j < size; j+=4)
409 AV_WN4P(&POS(j, i), a);
411 if (c_idx == 0 && size < 32) {
412 POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
413 for (x = 1; x < size; x++)
414 POS(x, 0) = (top[x] + 3 * dc + 2) >> 2;
415 for (y = 1; y < size; y++)
416 POS(0, y) = (left[y] + 3 * dc + 2) >> 2;
420 static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
422 const uint8_t *_left,
423 ptrdiff_t stride, int c_idx,
427 pixel *src = (pixel *)_src;
428 const pixel *top = (const pixel *)_top;
429 const pixel *left = (const pixel *)_left;
431 static const int intra_pred_angle[] = {
432 32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
433 -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32
435 static const int inv_angle[] = {
436 -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
437 -630, -910, -1638, -4096
440 int angle = intra_pred_angle[mode - 2];
441 pixel ref_array[3 * MAX_TB_SIZE + 4];
442 pixel *ref_tmp = ref_array + size;
444 int last = (size * angle) >> 5;
448 if (angle < 0 && last < -1) {
449 for (x = 0; x <= size; x += 4)
450 AV_WN4P(&ref_tmp[x], AV_RN4P(&top[x - 1]));
451 for (x = last; x <= -1; x++)
452 ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
456 for (y = 0; y < size; y++) {
457 int idx = ((y + 1) * angle) >> 5;
458 int fact = ((y + 1) * angle) & 31;
460 for (x = 0; x < size; x += 4) {
461 POS(x , y) = ((32 - fact) * ref[x + idx + 1] +
462 fact * ref[x + idx + 2] + 16) >> 5;
463 POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] +
464 fact * ref[x + 1 + idx + 2] + 16) >> 5;
465 POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] +
466 fact * ref[x + 2 + idx + 2] + 16) >> 5;
467 POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] +
468 fact * ref[x + 3 + idx + 2] + 16) >> 5;
471 for (x = 0; x < size; x += 4)
472 AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1]));
475 if (mode == 26 && c_idx == 0 && size < 32) {
476 for (y = 0; y < size; y++)
477 POS(0, y) = av_clip_pixel(top[0] + ((left[y] - left[-1]) >> 1));
481 if (angle < 0 && last < -1) {
482 for (x = 0; x <= size; x += 4)
483 AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1]));
484 for (x = last; x <= -1; x++)
485 ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
489 for (x = 0; x < size; x++) {
490 int idx = ((x + 1) * angle) >> 5;
491 int fact = ((x + 1) * angle) & 31;
493 for (y = 0; y < size; y++) {
494 POS(x, y) = ((32 - fact) * ref[y + idx + 1] +
495 fact * ref[y + idx + 2] + 16) >> 5;
498 for (y = 0; y < size; y++)
499 POS(x, y) = ref[y + idx + 1];
502 if (mode == 10 && c_idx == 0 && size < 32) {
503 for (x = 0; x < size; x += 4) {
504 POS(x, 0) = av_clip_pixel(left[0] + ((top[x ] - top[-1]) >> 1));
505 POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - top[-1]) >> 1));
506 POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - top[-1]) >> 1));
507 POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - top[-1]) >> 1));
513 static void FUNC(pred_angular_0)(uint8_t *src, const uint8_t *top,
515 ptrdiff_t stride, int c_idx, int mode)
517 FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 2);
520 static void FUNC(pred_angular_1)(uint8_t *src, const uint8_t *top,
522 ptrdiff_t stride, int c_idx, int mode)
524 FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 3);
527 static void FUNC(pred_angular_2)(uint8_t *src, const uint8_t *top,
529 ptrdiff_t stride, int c_idx, int mode)
531 FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 4);
534 static void FUNC(pred_angular_3)(uint8_t *src, const uint8_t *top,
536 ptrdiff_t stride, int c_idx, int mode)
538 FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 5);
541 #undef EXTEND_LEFT_CIP
542 #undef EXTEND_RIGHT_CIP
544 #undef EXTEND_DOWN_CIP
550 #undef MIN_TB_ADDR_ZS