4 * Copyright (C) 2012 - 2013 Guillaume Martres
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "bit_depth_template.c"
30 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
31 GetBitContext *gb, int pcm_bit_depth)
34 pixel *dst = (pixel *)_dst;
36 stride /= sizeof(pixel);
38 for (y = 0; y < height; y++) {
39 for (x = 0; x < width; x++)
40 dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
45 static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coeffs,
46 ptrdiff_t stride, int size)
49 pixel *dst = (pixel *)_dst;
51 stride /= sizeof(pixel);
53 for (y = 0; y < size; y++) {
54 for (x = 0; x < size; x++) {
55 dst[x] = av_clip_pixel(dst[x] + *coeffs);
62 static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
65 FUNC(transquant_bypass)(_dst, coeffs, stride, 4);
68 static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
71 FUNC(transquant_bypass)(_dst, coeffs, stride, 8);
74 static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
77 FUNC(transquant_bypass)(_dst, coeffs, stride, 16);
80 static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
83 FUNC(transquant_bypass)(_dst, coeffs, stride, 32);
87 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
89 int16_t *coeffs = (int16_t *) _coeffs;
91 int size = 1 << log2_size;
95 for (y = 0; y < size - 1; y++) {
96 for (x = 0; x < size; x++)
97 coeffs[x] += coeffs[x - size];
101 for (y = 0; y < size; y++) {
102 for (x = 1; x < size; x++)
103 coeffs[x] += coeffs[x - 1];
109 static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
111 int shift = 15 - BIT_DEPTH - log2_size;
113 int size = 1 << log2_size;
114 int16_t *coeffs = _coeffs;
118 int offset = 1 << (shift - 1);
119 for (y = 0; y < size; y++) {
120 for (x = 0; x < size; x++) {
121 *coeffs = (*coeffs + offset) >> shift;
126 for (y = 0; y < size; y++) {
127 for (x = 0; x < size; x++) {
128 *coeffs = *coeffs << -shift;
135 #define SET(dst, x) (dst) = (x)
136 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
137 #define ADD_AND_SCALE(dst, x) \
138 (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
140 #define TR_4x4_LUMA(dst, src, step, assign) \
142 int c0 = src[0 * step] + src[2 * step]; \
143 int c1 = src[2 * step] + src[3 * step]; \
144 int c2 = src[0 * step] - src[3 * step]; \
145 int c3 = 74 * src[1 * step]; \
147 assign(dst[2 * step], 74 * (src[0 * step] - \
150 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
151 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
152 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
155 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
159 int add = 1 << (shift - 1);
160 int16_t *src = coeffs;
162 for (i = 0; i < 4; i++) {
163 TR_4x4_LUMA(src, src, 4, SCALE);
167 shift = 20 - BIT_DEPTH;
168 add = 1 << (shift - 1);
169 for (i = 0; i < 4; i++) {
170 TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
177 #define TR_4(dst, src, dstep, sstep, assign, end) \
179 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
180 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
181 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
182 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
184 assign(dst[0 * dstep], e0 + o0); \
185 assign(dst[1 * dstep], e1 + o1); \
186 assign(dst[2 * dstep], e1 - o1); \
187 assign(dst[3 * dstep], e0 - o0); \
190 #define TR_8(dst, src, dstep, sstep, assign, end) \
194 int o_8[4] = { 0 }; \
195 for (i = 0; i < 4; i++) \
196 for (j = 1; j < end; j += 2) \
197 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
198 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
200 for (i = 0; i < 4; i++) { \
201 assign(dst[i * dstep], e_8[i] + o_8[i]); \
202 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
206 #define TR_16(dst, src, dstep, sstep, assign, end) \
210 int o_16[8] = { 0 }; \
211 for (i = 0; i < 8; i++) \
212 for (j = 1; j < end; j += 2) \
213 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
214 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
216 for (i = 0; i < 8; i++) { \
217 assign(dst[i * dstep], e_16[i] + o_16[i]); \
218 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
222 #define TR_32(dst, src, dstep, sstep, assign, end) \
226 int o_32[16] = { 0 }; \
227 for (i = 0; i < 16; i++) \
228 for (j = 1; j < end; j += 2) \
229 o_32[i] += transform[j][i] * src[j * sstep]; \
230 TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
232 for (i = 0; i < 16; i++) { \
233 assign(dst[i * dstep], e_32[i] + o_32[i]); \
234 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
238 #define IDCT_VAR4(H) \
239 int limit2 = FFMIN(col_limit + 4, H)
240 #define IDCT_VAR8(H) \
241 int limit = FFMIN(col_limit, H); \
242 int limit2 = FFMIN(col_limit + 4, H)
243 #define IDCT_VAR16(H) IDCT_VAR8(H)
244 #define IDCT_VAR32(H) IDCT_VAR8(H)
247 static void FUNC(idct_##H ##x ##H )( \
248 int16_t *coeffs, int col_limit) { \
251 int add = 1 << (shift - 1); \
252 int16_t *src = coeffs; \
255 for (i = 0; i < H; i++) { \
256 TR_ ## H(src, src, H, H, SCALE, limit2); \
257 if (limit2 < H && i%4 == 0 && !!i) \
262 shift = 20 - BIT_DEPTH; \
263 add = 1 << (shift - 1); \
264 for (i = 0; i < H; i++) { \
265 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
271 static void FUNC(idct_##H ##x ##H ##_dc)( \
274 int shift = 14 - BIT_DEPTH; \
275 int add = 1 << (shift - 1); \
276 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
278 for (j = 0; j < H; j++) { \
279 for (i = 0; i < H; i++) { \
280 coeffs[i+j*H] = coeff; \
304 static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
305 ptrdiff_t stride_dst, ptrdiff_t stride_src,
306 int16_t *sao_offset_val, int sao_left_class,
307 int width, int height)
309 pixel *dst = (pixel *)_dst;
310 pixel *src = (pixel *)_src;
311 int offset_table[32] = { 0 };
313 int shift = BIT_DEPTH - 5;
315 stride_dst /= sizeof(pixel);
316 stride_src /= sizeof(pixel);
318 for (k = 0; k < 4; k++)
319 offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
320 for (y = 0; y < height; y++) {
321 for (x = 0; x < width; x++)
322 dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
328 #define CMP(a, b) (((a) > (b)) - ((a) < (b)))
330 static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
331 int eo, int width, int height) {
333 static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
334 static const int8_t pos[4][2][2] = {
335 { { -1, 0 }, { 1, 0 } }, // horizontal
336 { { 0, -1 }, { 0, 1 } }, // vertical
337 { { -1, -1 }, { 1, 1 } }, // 45 degree
338 { { 1, -1 }, { -1, 1 } }, // 135 degree
340 pixel *dst = (pixel *)_dst;
341 pixel *src = (pixel *)_src;
342 int a_stride, b_stride;
344 ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
345 stride_dst /= sizeof(pixel);
347 a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
348 b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
349 for (y = 0; y < height; y++) {
350 for (x = 0; x < width; x++) {
351 int diff0 = CMP(src[x], src[x + a_stride]);
352 int diff1 = CMP(src[x], src[x + b_stride]);
353 int offset_val = edge_idx[2 + diff0 + diff1];
354 dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
361 static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
362 ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
363 int *borders, int _width, int _height,
364 int c_idx, uint8_t *vert_edge,
365 uint8_t *horiz_edge, uint8_t *diag_edge)
368 pixel *dst = (pixel *)_dst;
369 pixel *src = (pixel *)_src;
370 int16_t *sao_offset_val = sao->offset_val[c_idx];
371 int sao_eo_class = sao->eo_class[c_idx];
372 int init_x = 0, width = _width, height = _height;
374 stride_dst /= sizeof(pixel);
375 stride_src /= sizeof(pixel);
377 if (sao_eo_class != SAO_EO_VERT) {
379 int offset_val = sao_offset_val[0];
380 for (y = 0; y < height; y++) {
381 dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
386 int offset_val = sao_offset_val[0];
387 int offset = width - 1;
388 for (x = 0; x < height; x++) {
389 dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
394 if (sao_eo_class != SAO_EO_HORIZ) {
396 int offset_val = sao_offset_val[0];
397 for (x = init_x; x < width; x++)
398 dst[x] = av_clip_pixel(src[x] + offset_val);
401 int offset_val = sao_offset_val[0];
402 int y_stride_dst = stride_dst * (height - 1);
403 int y_stride_src = stride_src * (height - 1);
404 for (x = init_x; x < width; x++)
405 dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
411 static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
412 ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
413 int *borders, int _width, int _height,
414 int c_idx, uint8_t *vert_edge,
415 uint8_t *horiz_edge, uint8_t *diag_edge)
418 pixel *dst = (pixel *)_dst;
419 pixel *src = (pixel *)_src;
420 int16_t *sao_offset_val = sao->offset_val[c_idx];
421 int sao_eo_class = sao->eo_class[c_idx];
422 int init_x = 0, init_y = 0, width = _width, height = _height;
424 stride_dst /= sizeof(pixel);
425 stride_src /= sizeof(pixel);
427 if (sao_eo_class != SAO_EO_VERT) {
429 int offset_val = sao_offset_val[0];
430 for (y = 0; y < height; y++) {
431 dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
436 int offset_val = sao_offset_val[0];
437 int offset = width - 1;
438 for (x = 0; x < height; x++) {
439 dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
444 if (sao_eo_class != SAO_EO_HORIZ) {
446 int offset_val = sao_offset_val[0];
447 for (x = init_x; x < width; x++)
448 dst[x] = av_clip_pixel(src[x] + offset_val);
452 int offset_val = sao_offset_val[0];
453 int y_stride_dst = stride_dst * (height - 1);
454 int y_stride_src = stride_src * (height - 1);
455 for (x = init_x; x < width; x++)
456 dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
462 int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
463 int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
464 int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
465 int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
467 // Restore pixels that can't be modified
468 if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
469 for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
470 dst[y*stride_dst] = src[y*stride_src];
472 if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
473 for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
474 dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
477 if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
478 for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
481 if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
482 for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
483 dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
485 if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
487 if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
488 dst[width-1] = src[width-1];
489 if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
490 dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
491 if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
492 dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
499 ////////////////////////////////////////////////////////////////////////////////
501 ////////////////////////////////////////////////////////////////////////////////
502 static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
503 uint8_t *_src, ptrdiff_t _srcstride,
504 int height, intptr_t mx, intptr_t my, int width)
507 pixel *src = (pixel *)_src;
508 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
510 for (y = 0; y < height; y++) {
511 for (x = 0; x < width; x++)
512 dst[x] = src[x] << (14 - BIT_DEPTH);
518 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
519 int height, intptr_t mx, intptr_t my, int width)
522 pixel *src = (pixel *)_src;
523 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
524 pixel *dst = (pixel *)_dst;
525 ptrdiff_t dststride = _dststride / sizeof(pixel);
527 for (y = 0; y < height; y++) {
528 memcpy(dst, src, width * sizeof(pixel));
534 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
536 int height, intptr_t mx, intptr_t my, int width)
539 pixel *src = (pixel *)_src;
540 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
541 pixel *dst = (pixel *)_dst;
542 ptrdiff_t dststride = _dststride / sizeof(pixel);
544 int shift = 14 + 1 - BIT_DEPTH;
546 int offset = 1 << (shift - 1);
551 for (y = 0; y < height; y++) {
552 for (x = 0; x < width; x++)
553 dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
560 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
561 int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
564 pixel *src = (pixel *)_src;
565 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
566 pixel *dst = (pixel *)_dst;
567 ptrdiff_t dststride = _dststride / sizeof(pixel);
568 int shift = denom + 14 - BIT_DEPTH;
570 int offset = 1 << (shift - 1);
575 ox = ox * (1 << (BIT_DEPTH - 8));
576 for (y = 0; y < height; y++) {
577 for (x = 0; x < width; x++)
578 dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
584 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
586 int height, int denom, int wx0, int wx1,
587 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
590 pixel *src = (pixel *)_src;
591 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
592 pixel *dst = (pixel *)_dst;
593 ptrdiff_t dststride = _dststride / sizeof(pixel);
595 int shift = 14 + 1 - BIT_DEPTH;
596 int log2Wd = denom + shift - 1;
598 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
599 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
600 for (y = 0; y < height; y++) {
601 for (x = 0; x < width; x++) {
602 dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
610 ////////////////////////////////////////////////////////////////////////////////
612 ////////////////////////////////////////////////////////////////////////////////
613 #define QPEL_FILTER(src, stride) \
614 (filter[0] * src[x - 3 * stride] + \
615 filter[1] * src[x - 2 * stride] + \
616 filter[2] * src[x - stride] + \
617 filter[3] * src[x ] + \
618 filter[4] * src[x + stride] + \
619 filter[5] * src[x + 2 * stride] + \
620 filter[6] * src[x + 3 * stride] + \
621 filter[7] * src[x + 4 * stride])
623 static void FUNC(put_hevc_qpel_h)(int16_t *dst,
624 uint8_t *_src, ptrdiff_t _srcstride,
625 int height, intptr_t mx, intptr_t my, int width)
628 pixel *src = (pixel*)_src;
629 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
630 const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
631 for (y = 0; y < height; y++) {
632 for (x = 0; x < width; x++)
633 dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
639 static void FUNC(put_hevc_qpel_v)(int16_t *dst,
640 uint8_t *_src, ptrdiff_t _srcstride,
641 int height, intptr_t mx, intptr_t my, int width)
644 pixel *src = (pixel*)_src;
645 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
646 const int8_t *filter = ff_hevc_qpel_filters[my - 1];
647 for (y = 0; y < height; y++) {
648 for (x = 0; x < width; x++)
649 dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
655 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
657 ptrdiff_t _srcstride,
658 int height, intptr_t mx,
659 intptr_t my, int width)
662 const int8_t *filter;
663 pixel *src = (pixel*)_src;
664 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
665 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
666 int16_t *tmp = tmp_array;
668 src -= QPEL_EXTRA_BEFORE * srcstride;
669 filter = ff_hevc_qpel_filters[mx - 1];
670 for (y = 0; y < height + QPEL_EXTRA; y++) {
671 for (x = 0; x < width; x++)
672 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
677 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
678 filter = ff_hevc_qpel_filters[my - 1];
679 for (y = 0; y < height; y++) {
680 for (x = 0; x < width; x++)
681 dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
687 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
688 uint8_t *_src, ptrdiff_t _srcstride,
689 int height, intptr_t mx, intptr_t my, int width)
692 pixel *src = (pixel*)_src;
693 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
694 pixel *dst = (pixel *)_dst;
695 ptrdiff_t dststride = _dststride / sizeof(pixel);
696 const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
697 int shift = 14 - BIT_DEPTH;
700 int offset = 1 << (shift - 1);
705 for (y = 0; y < height; y++) {
706 for (x = 0; x < width; x++)
707 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
713 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
715 int height, intptr_t mx, intptr_t my, int width)
718 pixel *src = (pixel*)_src;
719 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
720 pixel *dst = (pixel *)_dst;
721 ptrdiff_t dststride = _dststride / sizeof(pixel);
723 const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
725 int shift = 14 + 1 - BIT_DEPTH;
727 int offset = 1 << (shift - 1);
732 for (y = 0; y < height; y++) {
733 for (x = 0; x < width; x++)
734 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
741 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
742 uint8_t *_src, ptrdiff_t _srcstride,
743 int height, intptr_t mx, intptr_t my, int width)
746 pixel *src = (pixel*)_src;
747 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
748 pixel *dst = (pixel *)_dst;
749 ptrdiff_t dststride = _dststride / sizeof(pixel);
750 const int8_t *filter = ff_hevc_qpel_filters[my - 1];
751 int shift = 14 - BIT_DEPTH;
754 int offset = 1 << (shift - 1);
759 for (y = 0; y < height; y++) {
760 for (x = 0; x < width; x++)
761 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
768 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
770 int height, intptr_t mx, intptr_t my, int width)
773 pixel *src = (pixel*)_src;
774 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
775 pixel *dst = (pixel *)_dst;
776 ptrdiff_t dststride = _dststride / sizeof(pixel);
778 const int8_t *filter = ff_hevc_qpel_filters[my - 1];
780 int shift = 14 + 1 - BIT_DEPTH;
782 int offset = 1 << (shift - 1);
787 for (y = 0; y < height; y++) {
788 for (x = 0; x < width; x++)
789 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
796 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
797 uint8_t *_src, ptrdiff_t _srcstride,
798 int height, intptr_t mx, intptr_t my, int width)
801 const int8_t *filter;
802 pixel *src = (pixel*)_src;
803 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
804 pixel *dst = (pixel *)_dst;
805 ptrdiff_t dststride = _dststride / sizeof(pixel);
806 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
807 int16_t *tmp = tmp_array;
808 int shift = 14 - BIT_DEPTH;
811 int offset = 1 << (shift - 1);
816 src -= QPEL_EXTRA_BEFORE * srcstride;
817 filter = ff_hevc_qpel_filters[mx - 1];
818 for (y = 0; y < height + QPEL_EXTRA; y++) {
819 for (x = 0; x < width; x++)
820 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
825 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
826 filter = ff_hevc_qpel_filters[my - 1];
828 for (y = 0; y < height; y++) {
829 for (x = 0; x < width; x++)
830 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
836 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
838 int height, intptr_t mx, intptr_t my, int width)
841 const int8_t *filter;
842 pixel *src = (pixel*)_src;
843 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
844 pixel *dst = (pixel *)_dst;
845 ptrdiff_t dststride = _dststride / sizeof(pixel);
846 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
847 int16_t *tmp = tmp_array;
848 int shift = 14 + 1 - BIT_DEPTH;
850 int offset = 1 << (shift - 1);
855 src -= QPEL_EXTRA_BEFORE * srcstride;
856 filter = ff_hevc_qpel_filters[mx - 1];
857 for (y = 0; y < height + QPEL_EXTRA; y++) {
858 for (x = 0; x < width; x++)
859 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
864 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
865 filter = ff_hevc_qpel_filters[my - 1];
867 for (y = 0; y < height; y++) {
868 for (x = 0; x < width; x++)
869 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
876 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
877 uint8_t *_src, ptrdiff_t _srcstride,
878 int height, int denom, int wx, int ox,
879 intptr_t mx, intptr_t my, int width)
882 pixel *src = (pixel*)_src;
883 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
884 pixel *dst = (pixel *)_dst;
885 ptrdiff_t dststride = _dststride / sizeof(pixel);
886 const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
887 int shift = denom + 14 - BIT_DEPTH;
889 int offset = 1 << (shift - 1);
894 ox = ox * (1 << (BIT_DEPTH - 8));
895 for (y = 0; y < height; y++) {
896 for (x = 0; x < width; x++)
897 dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
903 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
905 int height, int denom, int wx0, int wx1,
906 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
909 pixel *src = (pixel*)_src;
910 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
911 pixel *dst = (pixel *)_dst;
912 ptrdiff_t dststride = _dststride / sizeof(pixel);
914 const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
916 int shift = 14 + 1 - BIT_DEPTH;
917 int log2Wd = denom + shift - 1;
919 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
920 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
921 for (y = 0; y < height; y++) {
922 for (x = 0; x < width; x++)
923 dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
924 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
931 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
932 uint8_t *_src, ptrdiff_t _srcstride,
933 int height, int denom, int wx, int ox,
934 intptr_t mx, intptr_t my, int width)
937 pixel *src = (pixel*)_src;
938 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
939 pixel *dst = (pixel *)_dst;
940 ptrdiff_t dststride = _dststride / sizeof(pixel);
941 const int8_t *filter = ff_hevc_qpel_filters[my - 1];
942 int shift = denom + 14 - BIT_DEPTH;
944 int offset = 1 << (shift - 1);
949 ox = ox * (1 << (BIT_DEPTH - 8));
950 for (y = 0; y < height; y++) {
951 for (x = 0; x < width; x++)
952 dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
958 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
960 int height, int denom, int wx0, int wx1,
961 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
964 pixel *src = (pixel*)_src;
965 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
966 pixel *dst = (pixel *)_dst;
967 ptrdiff_t dststride = _dststride / sizeof(pixel);
969 const int8_t *filter = ff_hevc_qpel_filters[my - 1];
971 int shift = 14 + 1 - BIT_DEPTH;
972 int log2Wd = denom + shift - 1;
974 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
975 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
976 for (y = 0; y < height; y++) {
977 for (x = 0; x < width; x++)
978 dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
979 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
986 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
987 uint8_t *_src, ptrdiff_t _srcstride,
988 int height, int denom, int wx, int ox,
989 intptr_t mx, intptr_t my, int width)
992 const int8_t *filter;
993 pixel *src = (pixel*)_src;
994 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
995 pixel *dst = (pixel *)_dst;
996 ptrdiff_t dststride = _dststride / sizeof(pixel);
997 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
998 int16_t *tmp = tmp_array;
999 int shift = denom + 14 - BIT_DEPTH;
1001 int offset = 1 << (shift - 1);
1006 src -= QPEL_EXTRA_BEFORE * srcstride;
1007 filter = ff_hevc_qpel_filters[mx - 1];
1008 for (y = 0; y < height + QPEL_EXTRA; y++) {
1009 for (x = 0; x < width; x++)
1010 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1015 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1016 filter = ff_hevc_qpel_filters[my - 1];
1018 ox = ox * (1 << (BIT_DEPTH - 8));
1019 for (y = 0; y < height; y++) {
1020 for (x = 0; x < width; x++)
1021 dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1027 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1029 int height, int denom, int wx0, int wx1,
1030 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1033 const int8_t *filter;
1034 pixel *src = (pixel*)_src;
1035 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1036 pixel *dst = (pixel *)_dst;
1037 ptrdiff_t dststride = _dststride / sizeof(pixel);
1038 int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1039 int16_t *tmp = tmp_array;
1040 int shift = 14 + 1 - BIT_DEPTH;
1041 int log2Wd = denom + shift - 1;
1043 src -= QPEL_EXTRA_BEFORE * srcstride;
1044 filter = ff_hevc_qpel_filters[mx - 1];
1045 for (y = 0; y < height + QPEL_EXTRA; y++) {
1046 for (x = 0; x < width; x++)
1047 tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1052 tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1053 filter = ff_hevc_qpel_filters[my - 1];
1055 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1056 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1057 for (y = 0; y < height; y++) {
1058 for (x = 0; x < width; x++)
1059 dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1060 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1063 src2 += MAX_PB_SIZE;
1067 ////////////////////////////////////////////////////////////////////////////////
1069 ////////////////////////////////////////////////////////////////////////////////
1070 #define EPEL_FILTER(src, stride) \
1071 (filter[0] * src[x - stride] + \
1072 filter[1] * src[x] + \
1073 filter[2] * src[x + stride] + \
1074 filter[3] * src[x + 2 * stride])
1076 static void FUNC(put_hevc_epel_h)(int16_t *dst,
1077 uint8_t *_src, ptrdiff_t _srcstride,
1078 int height, intptr_t mx, intptr_t my, int width)
1081 pixel *src = (pixel *)_src;
1082 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1083 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1084 for (y = 0; y < height; y++) {
1085 for (x = 0; x < width; x++)
1086 dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1092 static void FUNC(put_hevc_epel_v)(int16_t *dst,
1093 uint8_t *_src, ptrdiff_t _srcstride,
1094 int height, intptr_t mx, intptr_t my, int width)
1097 pixel *src = (pixel *)_src;
1098 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1099 const int8_t *filter = ff_hevc_epel_filters[my - 1];
1101 for (y = 0; y < height; y++) {
1102 for (x = 0; x < width; x++)
1103 dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1109 static void FUNC(put_hevc_epel_hv)(int16_t *dst,
1110 uint8_t *_src, ptrdiff_t _srcstride,
1111 int height, intptr_t mx, intptr_t my, int width)
1114 pixel *src = (pixel *)_src;
1115 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1116 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1117 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1118 int16_t *tmp = tmp_array;
1120 src -= EPEL_EXTRA_BEFORE * srcstride;
1122 for (y = 0; y < height + EPEL_EXTRA; y++) {
1123 for (x = 0; x < width; x++)
1124 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1129 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1130 filter = ff_hevc_epel_filters[my - 1];
1132 for (y = 0; y < height; y++) {
1133 for (x = 0; x < width; x++)
1134 dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1140 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1141 int height, intptr_t mx, intptr_t my, int width)
1144 pixel *src = (pixel *)_src;
1145 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1146 pixel *dst = (pixel *)_dst;
1147 ptrdiff_t dststride = _dststride / sizeof(pixel);
1148 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1149 int shift = 14 - BIT_DEPTH;
1151 int offset = 1 << (shift - 1);
1156 for (y = 0; y < height; y++) {
1157 for (x = 0; x < width; x++)
1158 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1164 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1166 int height, intptr_t mx, intptr_t my, int width)
1169 pixel *src = (pixel *)_src;
1170 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1171 pixel *dst = (pixel *)_dst;
1172 ptrdiff_t dststride = _dststride / sizeof(pixel);
1173 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1174 int shift = 14 + 1 - BIT_DEPTH;
1176 int offset = 1 << (shift - 1);
1181 for (y = 0; y < height; y++) {
1182 for (x = 0; x < width; x++) {
1183 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1187 src2 += MAX_PB_SIZE;
1191 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1192 int height, intptr_t mx, intptr_t my, int width)
1195 pixel *src = (pixel *)_src;
1196 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1197 pixel *dst = (pixel *)_dst;
1198 ptrdiff_t dststride = _dststride / sizeof(pixel);
1199 const int8_t *filter = ff_hevc_epel_filters[my - 1];
1200 int shift = 14 - BIT_DEPTH;
1202 int offset = 1 << (shift - 1);
1207 for (y = 0; y < height; y++) {
1208 for (x = 0; x < width; x++)
1209 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1215 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1217 int height, intptr_t mx, intptr_t my, int width)
1220 pixel *src = (pixel *)_src;
1221 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1222 const int8_t *filter = ff_hevc_epel_filters[my - 1];
1223 pixel *dst = (pixel *)_dst;
1224 ptrdiff_t dststride = _dststride / sizeof(pixel);
1225 int shift = 14 + 1 - BIT_DEPTH;
1227 int offset = 1 << (shift - 1);
1232 for (y = 0; y < height; y++) {
1233 for (x = 0; x < width; x++)
1234 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1237 src2 += MAX_PB_SIZE;
1241 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1242 int height, intptr_t mx, intptr_t my, int width)
1245 pixel *src = (pixel *)_src;
1246 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1247 pixel *dst = (pixel *)_dst;
1248 ptrdiff_t dststride = _dststride / sizeof(pixel);
1249 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1250 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1251 int16_t *tmp = tmp_array;
1252 int shift = 14 - BIT_DEPTH;
1254 int offset = 1 << (shift - 1);
1259 src -= EPEL_EXTRA_BEFORE * srcstride;
1261 for (y = 0; y < height + EPEL_EXTRA; y++) {
1262 for (x = 0; x < width; x++)
1263 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1268 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1269 filter = ff_hevc_epel_filters[my - 1];
1271 for (y = 0; y < height; y++) {
1272 for (x = 0; x < width; x++)
1273 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1279 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1281 int height, intptr_t mx, intptr_t my, int width)
1284 pixel *src = (pixel *)_src;
1285 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1286 pixel *dst = (pixel *)_dst;
1287 ptrdiff_t dststride = _dststride / sizeof(pixel);
1288 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1289 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1290 int16_t *tmp = tmp_array;
1291 int shift = 14 + 1 - BIT_DEPTH;
1293 int offset = 1 << (shift - 1);
1298 src -= EPEL_EXTRA_BEFORE * srcstride;
1300 for (y = 0; y < height + EPEL_EXTRA; y++) {
1301 for (x = 0; x < width; x++)
1302 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1307 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1308 filter = ff_hevc_epel_filters[my - 1];
1310 for (y = 0; y < height; y++) {
1311 for (x = 0; x < width; x++)
1312 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1315 src2 += MAX_PB_SIZE;
1319 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1320 int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1323 pixel *src = (pixel *)_src;
1324 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1325 pixel *dst = (pixel *)_dst;
1326 ptrdiff_t dststride = _dststride / sizeof(pixel);
1327 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1328 int shift = denom + 14 - BIT_DEPTH;
1330 int offset = 1 << (shift - 1);
1335 ox = ox * (1 << (BIT_DEPTH - 8));
1336 for (y = 0; y < height; y++) {
1337 for (x = 0; x < width; x++) {
1338 dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1345 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1347 int height, int denom, int wx0, int wx1,
1348 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1351 pixel *src = (pixel *)_src;
1352 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1353 pixel *dst = (pixel *)_dst;
1354 ptrdiff_t dststride = _dststride / sizeof(pixel);
1355 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1356 int shift = 14 + 1 - BIT_DEPTH;
1357 int log2Wd = denom + shift - 1;
1359 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1360 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1361 for (y = 0; y < height; y++) {
1362 for (x = 0; x < width; x++)
1363 dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1364 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1367 src2 += MAX_PB_SIZE;
1371 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1372 int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1375 pixel *src = (pixel *)_src;
1376 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1377 pixel *dst = (pixel *)_dst;
1378 ptrdiff_t dststride = _dststride / sizeof(pixel);
1379 const int8_t *filter = ff_hevc_epel_filters[my - 1];
1380 int shift = denom + 14 - BIT_DEPTH;
1382 int offset = 1 << (shift - 1);
1387 ox = ox * (1 << (BIT_DEPTH - 8));
1388 for (y = 0; y < height; y++) {
1389 for (x = 0; x < width; x++) {
1390 dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1397 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1399 int height, int denom, int wx0, int wx1,
1400 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1403 pixel *src = (pixel *)_src;
1404 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1405 const int8_t *filter = ff_hevc_epel_filters[my - 1];
1406 pixel *dst = (pixel *)_dst;
1407 ptrdiff_t dststride = _dststride / sizeof(pixel);
1408 int shift = 14 + 1 - BIT_DEPTH;
1409 int log2Wd = denom + shift - 1;
1411 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1412 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1413 for (y = 0; y < height; y++) {
1414 for (x = 0; x < width; x++)
1415 dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1416 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1419 src2 += MAX_PB_SIZE;
1423 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1424 int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1427 pixel *src = (pixel *)_src;
1428 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1429 pixel *dst = (pixel *)_dst;
1430 ptrdiff_t dststride = _dststride / sizeof(pixel);
1431 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1432 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1433 int16_t *tmp = tmp_array;
1434 int shift = denom + 14 - BIT_DEPTH;
1436 int offset = 1 << (shift - 1);
1441 src -= EPEL_EXTRA_BEFORE * srcstride;
1443 for (y = 0; y < height + EPEL_EXTRA; y++) {
1444 for (x = 0; x < width; x++)
1445 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1450 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1451 filter = ff_hevc_epel_filters[my - 1];
1453 ox = ox * (1 << (BIT_DEPTH - 8));
1454 for (y = 0; y < height; y++) {
1455 for (x = 0; x < width; x++)
1456 dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1462 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1464 int height, int denom, int wx0, int wx1,
1465 int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1468 pixel *src = (pixel *)_src;
1469 ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1470 pixel *dst = (pixel *)_dst;
1471 ptrdiff_t dststride = _dststride / sizeof(pixel);
1472 const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1473 int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1474 int16_t *tmp = tmp_array;
1475 int shift = 14 + 1 - BIT_DEPTH;
1476 int log2Wd = denom + shift - 1;
1478 src -= EPEL_EXTRA_BEFORE * srcstride;
1480 for (y = 0; y < height + EPEL_EXTRA; y++) {
1481 for (x = 0; x < width; x++)
1482 tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1487 tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1488 filter = ff_hevc_epel_filters[my - 1];
1490 ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1491 ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1492 for (y = 0; y < height; y++) {
1493 for (x = 0; x < width; x++)
1494 dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1495 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1498 src2 += MAX_PB_SIZE;
1501 #define P3 pix[-4 * xstride]
1502 #define P2 pix[-3 * xstride]
1503 #define P1 pix[-2 * xstride]
1504 #define P0 pix[-1 * xstride]
1505 #define Q0 pix[0 * xstride]
1506 #define Q1 pix[1 * xstride]
1507 #define Q2 pix[2 * xstride]
1508 #define Q3 pix[3 * xstride]
1510 // line three. used only for deblocking decision
1511 #define TP3 pix[-4 * xstride + 3 * ystride]
1512 #define TP2 pix[-3 * xstride + 3 * ystride]
1513 #define TP1 pix[-2 * xstride + 3 * ystride]
1514 #define TP0 pix[-1 * xstride + 3 * ystride]
1515 #define TQ0 pix[0 * xstride + 3 * ystride]
1516 #define TQ1 pix[1 * xstride + 3 * ystride]
1517 #define TQ2 pix[2 * xstride + 3 * ystride]
1518 #define TQ3 pix[3 * xstride + 3 * ystride]
1520 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
1521 ptrdiff_t _xstride, ptrdiff_t _ystride,
1523 uint8_t *_no_p, uint8_t *_no_q)
1526 pixel *pix = (pixel *)_pix;
1527 ptrdiff_t xstride = _xstride / sizeof(pixel);
1528 ptrdiff_t ystride = _ystride / sizeof(pixel);
1530 beta <<= BIT_DEPTH - 8;
1532 for (j = 0; j < 2; j++) {
1533 const int dp0 = abs(P2 - 2 * P1 + P0);
1534 const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1535 const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1536 const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1537 const int d0 = dp0 + dq0;
1538 const int d3 = dp3 + dq3;
1539 const int tc = _tc[j] << (BIT_DEPTH - 8);
1540 const int no_p = _no_p[j];
1541 const int no_q = _no_q[j];
1543 if (d0 + d3 >= beta) {
1547 const int beta_3 = beta >> 3;
1548 const int beta_2 = beta >> 2;
1549 const int tc25 = ((tc * 5 + 1) >> 1);
1551 if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1552 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1553 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1555 const int tc2 = tc << 1;
1556 for (d = 0; d < 4; d++) {
1566 P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1567 P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1568 P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1571 Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1572 Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1573 Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1577 } else { // normal filtering
1580 const int tc_2 = tc >> 1;
1581 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1583 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1586 for (d = 0; d < 4; d++) {
1593 int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1594 if (abs(delta0) < 10 * tc) {
1595 delta0 = av_clip(delta0, -tc, tc);
1597 P0 = av_clip_pixel(p0 + delta0);
1599 Q0 = av_clip_pixel(q0 - delta0);
1600 if (!no_p && nd_p > 1) {
1601 const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1602 P1 = av_clip_pixel(p1 + deltap1);
1604 if (!no_q && nd_q > 1) {
1605 const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1606 Q1 = av_clip_pixel(q1 + deltaq1);
1616 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1617 ptrdiff_t _ystride, int *_tc,
1618 uint8_t *_no_p, uint8_t *_no_q)
1620 int d, j, no_p, no_q;
1621 pixel *pix = (pixel *)_pix;
1622 ptrdiff_t xstride = _xstride / sizeof(pixel);
1623 ptrdiff_t ystride = _ystride / sizeof(pixel);
1625 for (j = 0; j < 2; j++) {
1626 const int tc = _tc[j] << (BIT_DEPTH - 8);
1634 for (d = 0; d < 4; d++) {
1640 delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1642 P0 = av_clip_pixel(p0 + delta0);
1644 Q0 = av_clip_pixel(q0 - delta0);
1650 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1651 int32_t *tc, uint8_t *no_p,
1654 FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1657 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1658 int32_t *tc, uint8_t *no_p,
1661 FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1664 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1665 int beta, int32_t *tc, uint8_t *no_p,
1668 FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1669 beta, tc, no_p, no_q);
1672 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1673 int beta, int32_t *tc, uint8_t *no_p,
1676 FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1677 beta, tc, no_p, no_q);