1 /*****************************************************************************
2 * postprocessing_mmxext.c: Post Processing plugin MMXEXT
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: postprocessing_mmxext.c,v 1.2 2002/08/08 22:28:22 sam Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
24 #include <vlc/vlc.h> /* only use u8, u32 .... */
26 #include "postprocessing.h"
27 #include "postprocessing_common.h"
29 /*****************************************************************************
31 * Internals functions common to pp_Deblock_V and pp_Deblock_H
33 *****************************************************************************/
35 /*****************************************************************************
37 *****************************************************************************/
40 /* XXX PP_THR1 need to be defined as ULL */
42 /* Use same things as in idct but how it work ? */
43 #define UNUSED_LONGLONG( foo ) \
44 static const unsigned long long foo __asm__ (#foo) __attribute__((unused))
46 /* to calculate isDC_mode for mmx */
47 UNUSED_LONGLONG( mmx_127_thr1 ) = ( ( 127ULL - PP_THR1 ) << 56 )|
48 ( ( 127ULL - PP_THR1 ) << 48 )|
49 ( ( 127ULL - PP_THR1 ) << 40 )|
50 ( ( 127ULL - PP_THR1 ) << 32 )|
51 ( ( 127ULL - PP_THR1 ) << 24 )|
52 ( ( 127ULL - PP_THR1 ) << 16 )|
53 ( ( 127ULL - PP_THR1 ) << 8 )|
54 ( ( 127ULL - PP_THR1 ) );
56 UNUSED_LONGLONG( mmx_127_2xthr1_1 ) = ( ( 127ULL - PP_2xTHR1 -1) << 56 )|
57 ( ( 127ULL - PP_2xTHR1 -1 ) << 48 )|
58 ( ( 127ULL - PP_2xTHR1 -1 ) << 40 )|
59 ( ( 127ULL - PP_2xTHR1 -1 ) << 32 )|
60 ( ( 127ULL - PP_2xTHR1 -1 ) << 24 )|
61 ( ( 127ULL - PP_2xTHR1 -1 ) << 16 )|
62 ( ( 127ULL - PP_2xTHR1 -1 ) << 8 )|
63 ( ( 127ULL - PP_2xTHR1 -1 ) );
65 UNUSED_LONGLONG( mmx_m2_5_m5_2 ) = 0xfffe0005fffb0002ULL;
68 /* find min bytes from r ans set it in r, t is destroyed */
69 #define MMXEXT_GET_PMIN( r, t ) \
70 "movq " #r ", " #t " \n\
72 pminub " #t ", " #r " \n\
73 pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n\
74 pminub " #t ", " #r " \n\
75 pshufw $0xfe, " #r ", " #t " \n\
76 pminub " #t ", " #r " \n"
78 /* find mzx bytes from r ans set it in r, t is destroyed */
79 #define MMXEXT_GET_PMAX( r, t ) \
80 "movq " #r ", " #t " \n\
82 pmaxub " #t ", " #r " \n\
83 pshufw $0xf5, " #r ", " #t " \n\
84 pmaxub " #t ", " #r " \n\
85 pshufw $0xfe, " #r ", " #t " \n\
86 pmaxub " #t ", " #r " \n"
90 #define MMXEXT_GET_LMINMAX( s, m, M, t ) \
91 "movq " #s ", " #t " \n\
92 pminub " #t ", " #m " \n\
93 pmaxub " #t ", " #M " \n"
98 d1 = a - b with unsigned saturate
105 /****************************************************************************
106 * pp_deblock_isDC_mode : Check if we will use DC mode or Default mode
107 ****************************************************************************
108 * Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 )
110 * Called for for each pixel on a boundary block when doing deblocking
111 * so need to be fast ...
113 ****************************************************************************/
114 static inline int pp_deblock_isDC_mode( u8 *p_v )
120 x = v[i] - v[i+1] without signed saturation
121 ( XXX see if there is'nt problem, but can't be with signed
122 sat because pixel will be saturate :(
123 so x within [-128, 127] and we have to test if it fit in [-M, M]
124 we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]
125 and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good
127 __asm__ __volatile__ (" \n\
128 #* Do (v0-v1) to (v7-v8) \n\
129 movq (%1), %%mm1 # load v0->v7 \n\
130 movq 1(%1), %%mm2 # load v1->v8 \n\
131 psubb %%mm2, %%mm1 # v[i]-v[i+1] \n\
132 paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap \n\
133 pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1 \n\
134 pxor %%mm0, %%mm0 # mm0 = 0 \n\
135 psadbw %%mm1, %%mm0 \n\
140 : "=r"(i_eq_cnt) : "r" (p_v) );
142 /* last test, hey, 9 don't fit in MMX */
143 if(( ( p_v[8] - p_v[9] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
149 /* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */
152 for( i =0; i < 9; i++ )
154 if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )
161 return( (i_eq_cnt >= PP_THR2 ) ? 1 : 0 );
164 static inline int pp_deblock_isMinMaxOk( u8 *p_v, int i_QP )
168 __asm__ __volatile__ (
169 "movq 1(%1), %%mm0 # 8 bytes \n"
170 "movq %%mm0, %%mm1 \n"
171 MMXEXT_GET_PMIN( %%mm0, %%mm7 )
172 MMXEXT_GET_PMAX( %%mm1, %%mm7 )
173 "psubd %%mm0, %%mm1 # max - min \n\
175 andl $255, %0" : "=r"(i_range) : "r"(p_v) );
181 i_min = i_max = p_v[1];
182 for( i = 2; i < 9; i++ )
184 if( i_max < p_v[i] ) i_max = p_v[i];
185 if( i_min > p_v[i] ) i_min = p_v[i];
187 i_range = i_max - i_min;
190 return( i_range< 2*i_QP ? 1 : 0 );
194 static inline void pp_deblock_DefaultMode( u8 i_v[10], int i_stride,
198 int a3x0, a3x0_, a3x1, a3x2;
201 /* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */
203 /* First calculate a3x0 */
204 __asm__ __volatile__ ( " \n\
205 pxor %%mm7, %%mm7 # mm7 = 0 \n\
206 movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2) \n\
207 movd 3(%1), %%mm0 \n\
208 punpcklbw %%mm7,%%mm0 \n\
209 pmaddwd %%mm6, %%mm0 \n"
210 "pshufw $0xfe, %%mm0, %%mm1 \n"
211 "paddd %%mm1, %%mm0 \n\
212 movd %%mm0, %0" : "=r"(a3x0) :"r"(i_v) );
214 a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );
226 /* XXX Now a3x0 is abs( a3x0 ) */
227 if( ( a3x0 < 8 * i_QP )&&( a3x0 != 0 ) ) /* |a3x0| < 8*i_QP */
229 /* calculate a3x1 et a3x2 */
230 __asm__ __volatile__ ( " \n\
232 # mm6 = ( 2, -5, 5, -2 ) \n\
233 movd 1(%2), %%mm0 \n\
234 movd 5(%2), %%mm2 \n\
235 punpcklbw %%mm7,%%mm0 \n\
236 punpcklbw %%mm7,%%mm2 \n\
237 pmaddwd %%mm6, %%mm0 \n\
238 pmaddwd %%mm6, %%mm2 \n\
239 pshufw $0xfe, %%mm0, %%mm1 \n\
240 paddd %%mm1, %%mm0 # mm0 = a3x1 \n\
242 pshufw $0xfe, %%mm2, %%mm1 \n\
243 paddd %%mm1, %%mm2 # mm2 = a3x2 \n\
245 " : "=r"(a3x1), "=r"(a3x2) : "r"(i_v) );
247 a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );
248 a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );
251 if( a3x1 < 0) a3x1 = -a3x1; /* abs( a3x1 ) */
252 if( a3x2 < 0) a3x2 = -a3x2; /* abs( a3x2 ) */
254 a3x0_ = PP_MIN3( a3x0, a3x1, a3x2 );
256 d = 5 *( a3x0 - a3x0_ ) / 8; /* always > 0 */
258 i_delta = ( i_v[4] - i_v[5] ) / 2;
259 /* clip into [0, i_delta] or [i_delta, 0] */
262 if( !b_neg ) /* since true d has sgn(d) = - sgn( a3x0 ) */
265 if( d < i_delta ) d = i_delta;
274 if( d > i_delta ) d = i_delta;
284 static inline void pp_deblock_DCMode( u8 *p_v, /* = int i_v[10] */
288 i_p0 = PP_ABS( p_v[1] - p_v[0] ) < i_QP ? p_v[0] : p_v[1];
289 i_p9 = PP_ABS( p_v[8] - p_v[9] ) < i_QP ? p_v[9] : p_v[8];
291 /* mm0 = 8 pix unmodified
292 -We will process first 4 pixel
293 mm0 = 8 pix unmodified
294 mm1 = for the first part of the 4 first pix
295 (v1) -> (p0) -> ... ( word )
300 = for the commoin part between first and last pix
301 (v2) -> (v3) -> ... ( word )
306 = for the last part of the 4 last pix
307 (v5) -> (v6) -> ... ( word )
312 mm2 = acu for first new pix
313 mm3 = acu for last pix
318 __asm__ __volatile__ (
319 "pxor %%mm7, %%mm7 \n\
320 movq 1(%0), %%mm0 # get 8 pix \n\
321 # unpack into mm1 \n\
322 movq %%mm0, %%mm1 \n\
323 punpcklbw %%mm7, %%mm1 \n\
324 # get p_0 and i_p9 \n\
329 movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !! \n\
330 movq %%mm1, %%mm2 \n\
331 psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4] \n\
334 por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)\n\
336 paddw %%mm1, %%mm2 \n\
337 paddw %%mm1, %%mm2 \n\
339 pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, v1, v2)\n\
340 paddw %%mm1, %%mm2 \n\
341 paddw %%mm1, %%mm2 \n\
343 pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, p0, v2)\n\
344 paddw %%mm1, %%mm2 \n\
346 pshufw $0x90,%%mm1,%%mm1 # mm1 =( p0, p0, p0, p0)\n\
347 paddw %%mm1, %%mm2 \n\
348 # Now last part a little borring\n\
349 # last part for mm2, beginig for mm3
350 movq %%mm0, %%mm1 \n\
352 punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )\n\
353 paddw %%mm1, %%mm2 \n\
354 paddw %%mm1, %%mm2 \n\
355 paddw %%mm1, %%mm3 \n\
358 movq %%mm0, %%mm1 \n\
360 punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )\n\
362 paddw %%mm1, %%mm2 \n\
363 paddw %%mm1, %%mm3 \n\
365 movq %%mm0, %%mm1 \n\
367 punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7) \n\
368 paddw %%mm1, %%mm2 \n\
369 paddw %%mm1, %%mm3 \n\
370 paddw %%mm1, %%mm3 \n\
372 movq %%mm0, %%mm1 \n\
374 punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8) \n\
375 paddw %%mm1, %%mm2 \n\
377 paddw %%mm1, %%mm3 \n\
378 # Now last part for last 4 pix \n\
380 movq %%mm0, %%mm1 \n\
381 punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8) \n\
384 por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )\n\
386 paddw %%mm1, %%mm3 \n\
387 paddw %%mm1, %%mm3 \n\
389 pshufw $0xf9,%%mm1,%%mm1 # mm1 =( v7, v8, p9, p9)\n\
390 paddw %%mm1, %%mm3 \n\
391 paddw %%mm1, %%mm3 \n\
393 pshufw $0xf9,%%mm1,%%mm1 # mm1 =( v8, p9, p9, p9)\n\
394 paddw %%mm1, %%mm3 \n\
396 pshufw $0xf9,%%mm1,%%mm1 # mm1 =( p9, p9, p9, p9)\n\
397 paddw %%mm1, %%mm3 \n\
401 packuswb %%mm3, %%mm2 \n\
402 movq %%mm2, 1(%0) \n\
404 ": : "r"(p_v), "r"(i_p0), "r"(i_p9) : "memory" );
407 for( i = 1; i < 9; i++ )
409 v[i] = p_v[i]; /* save 8 pix that will be modified */
412 p_v[1] = ( 6 * i_p0 + 4 * v[1]
413 + 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4;
415 p_v[2] = ( 4 * i_p0 + 2 * v[1] + 4 * v[2]
416 + 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4;
418 p_v[3] = ( 2 * i_p0 + 2 * (v[1] + v[2]) + 4 * v[3]
419 + 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4;
421 p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4]
422 + 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4;
424 p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5]
425 + 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4;
427 p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6]
428 + 2 *( v[7] + v[8]) + 2 * i_p9) >> 4;
430 p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7]
431 + 2 * v[8] + 4 * i_p9) >> 4;
433 p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8]
441 /*****************************************************************************/
442 /*---------------------------------------------------------------------------*/
444 /* ---------- filter Vertical lines so follow horizontal edges -------- */
446 /*---------------------------------------------------------------------------*/
447 /*****************************************************************************/
449 void E_( pp_deblock_V )( u8 *p_plane,
450 int i_width, int i_height, int i_stride,
451 QT_STORE_T *p_QP_store, int i_QP_stride,
456 int i_QP_scale; /* use to do ( ? >> i_QP_scale ) */
461 i_QP_scale = b_chroma ? 5 : 4 ;
463 for( y = 8; y < i_height - 4; y += 8 )
465 p_v = p_plane + ( y - 5 )* i_stride;
466 for( x = 0; x < i_width; x++ )
468 /* First get 10 vert pix to use them without i_stride */
469 for( i = 0; i < 10; i++ )
471 i_v[i] = p_v[i*i_stride + x];
474 i_QP = p_QP_store[(y>>i_QP_scale)*i_QP_stride+
476 /* XXX QP is for v5 */
477 if( pp_deblock_isDC_mode( i_v ) )
479 if( pp_deblock_isMinMaxOk( i_v, i_QP ) )
481 pp_deblock_DCMode( i_v, i_QP );
486 pp_deblock_DefaultMode( i_v, i_stride, i_QP );
489 /* Copy back, XXX only 1-8 were modified */
490 for( i = 1; i < 9; i++ )
492 p_v[i*i_stride + x] = i_v[i];
500 /*****************************************************************************/
501 /*---------------------------------------------------------------------------*/
503 /* --------- filter Horizontal lines so follow vertical edges -------- */
505 /*---------------------------------------------------------------------------*/
506 /*****************************************************************************/
508 void E_( pp_deblock_H )( u8 *p_plane,
509 int i_width, int i_height, int i_stride,
510 QT_STORE_T *p_QP_store, int i_QP_stride,
518 i_QP_scale = b_chroma ? 5 : 4 ;
520 for( y = 0; y < i_height; y++ )
522 p_v = p_plane + y * i_stride - 5;
523 for( x = 8; x < i_width - 4; x += 8 )
525 /* p_v point 5 pix before a block boundary */
526 /* XXX QP is for v5 */
527 i_QP = p_QP_store[(y>>i_QP_scale)*i_QP_stride+
529 if( pp_deblock_isDC_mode( p_v + x ) )
531 if( pp_deblock_isMinMaxOk( p_v+ x, i_QP ) )
533 pp_deblock_DCMode( p_v+x, i_QP );
538 pp_deblock_DefaultMode( p_v+x, i_stride, i_QP );
547 /*****************************************************************************
549 * Internals functions common to pp_Dering_Y pp_Dering_C
551 *****************************************************************************/
553 static inline void pp_dering_MinMax( u8 *p_block, int i_stride,
554 int *pi_min, int *pi_max )
557 /* First we will extract min/max for each pix on vertical line
558 and next extract global min/max */
559 __asm__ __volatile__(
561 "leal (%2,%3), %%eax \n\
562 movq (%2), %%mm0 #load line \n\
563 movq %%mm0, %%mm1 \n"
565 MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
566 MMXEXT_GET_LMINMAX( (%%eax, %3), %%mm0, %%mm1, %%mm7 )
567 MMXEXT_GET_LMINMAX( (%%eax, %3,2), %%mm0, %%mm1, %%mm7 )
568 MMXEXT_GET_LMINMAX( (%2, %3, 4), %%mm0, %%mm1, %%mm7 )
569 "leal (%%eax,%3,4), %%eax \n"
571 MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
572 MMXEXT_GET_LMINMAX( (%%eax, %3), %%mm0, %%mm1, %%mm7 )
573 MMXEXT_GET_LMINMAX( (%%eax, %3,2), %%mm0, %%mm1, %%mm7 )
574 MMXEXT_GET_PMIN( %%mm0, %%mm7 )
575 MMXEXT_GET_PMAX( %%mm1, %%mm7 )
576 "movd %%mm0, %%eax \n\
579 movd %%mm1, %%eax \n\
583 : : "r"(pi_min), "r"(pi_max), "r"(p_block), "r"(i_stride) : "%eax", "memory" );
586 i_min = 255; i_max = 0;
588 for( y = 0; y < 8; y++ )
590 for( x = 0; x < 8; x++ )
592 if( i_min > p_block[x] ) i_min = p_block[x];
593 if( i_max < p_block[x] ) i_max = p_block[x];
604 static inline void pp_dering_BinIndex( u8 *p_block, int i_stride, int i_thr,
610 /* first create mm7 with all bytes set to thr and mm6 = 0 */
611 __asm__ __volatile__(
614 "movd %%eax, %%mm7 \n"
615 "pshufw $0x00, %%mm7, %%mm7 \n"
616 "pxor %%mm6, %%mm6 \n"
617 : : "r"(i_thr) : "%eax" );
619 for( y = 0; y < 10; y++ )
621 __asm__ __volatile__(
622 "movq (%1), %%mm0 \n"
623 "psubusb %%mm7, %%mm0 \n" /* sat makes that x <= thr --> 0 */
624 "pcmpeqb %%mm6, %%mm0 \n" /* p_block <= i_thr ? -1 : 0 */
625 "pmovmskb %%mm0, %0 \n" /* i_bin msb of each bytes */
626 : "=r"(i_bin) :"r"(p_block) );
627 /* Now last 2 tests */
628 if( p_block[8] <= i_thr ) i_bin |= 1 << 8;
629 if( p_block[9] <= i_thr ) i_bin |= 1 << 9;
631 i_bin |= (~i_bin) << 16; /* for detect three 1 or three 0*/
632 *p_bin = ( i_bin >> 1 )&&( i_bin )&&( i_bin << 1 );
640 for( y = 0; y < 10; y++ )
643 for( x = 0; x < 10; x++ )
645 if( p_block[x] > i_thr )
650 i_bin |= (~i_bin) << 16; /* for detect also three 0 */
651 *p_bin = i_bin&( i_bin >> 1 )&( i_bin << 1 );
660 static inline void pp_dering_Filter( u8 *p_block, int i_stride,
674 for( y = 0; y < 8; y++ )
676 i_bin = p_bin[y] & p_bin[y+1] & p_bin[y+2]; /* To be optimised */
677 i_bin |= i_bin >> 16; /* detect 0 or 1 */
679 for( x = 0; x < 8; x++ )
681 if( i_bin&0x02 ) /* 0x02 since 10 index but want 1-9 */
687 i_f = p_block[x - i_stride - 1] +
688 ( p_block[x - i_stride ] << 1)+
689 p_block[x - i_stride + 1] +
691 ( p_block[x - 1] << 1 )+
692 ( p_block[x ] << 2 )+
693 ( p_block[x + 1] << 1 )+
695 p_block[x + i_stride - 1] +
696 ( p_block[x + i_stride ] << 1 ) +
697 p_block[x + i_stride + 1];
699 i_flt[y][x] = ( 8 + i_f ) >> 4;
703 i_flt[y][x] = p_block[x];
712 /* Create mm7 with all bytes set to QP/2 */
713 __asm__ __volatile__(
715 "shrl $1, %%eax \n" /* i_QP/2 */
717 "movd %%eax, %%mm7 \n"
718 "pshufw $0x00, %%mm7, %%mm7 \n"
719 : : "r"(i_QP) : "%eax" );
721 for( y = 0; y < 8; y++ )
723 /* clamp those values and copy them */
724 __asm__ __volatile__(
725 "movq (%0), %%mm0 \n" /* mm0 = i_ftl[y][0] ... i_ftl[y][7] */
726 "movq (%1), %%mm1 \n" /* mm1 = p_sav[0] ... p_sav[7] */
727 "movq %%mm1, %%mm2 \n"
728 "psubusb %%mm7, %%mm1 \n" /* mm1 = psav - i_QP/2 ( >= 0 ) */
729 "paddusb %%mm7, %%mm2 \n" /* mm2 = psav + i_QP/2 ( <= 255 ) */
730 "pmaxub %%mm1, %%mm0 \n" /* psav - i_QP/2 <= mm0 */
731 "pminub %%mm2, %%mm0 \n" /* mm0 <= psav + i_QP/2 */
732 "movq %%mm0, (%1) \n"
733 : :"r"(i_flt[y]), "r"(p_sav) : "memory" );
740 /*****************************************************************************/
741 /*---------------------------------------------------------------------------*/
743 /* ----------------- Dering filter on Y and C blocks ----------------- */
745 /*---------------------------------------------------------------------------*/
746 /*****************************************************************************/
748 void E_( pp_dering_Y )( u8 *p_plane,
749 int i_width, int i_height, int i_stride,
750 QT_STORE_T *p_QP_store, int i_QP_stride )
753 int i_max[4], i_min[4], i_range[4];
755 int i_max_range, i_kmax;
760 /* We process 4 blocks/loop*/
761 for( y = 8; y < i_height-8; y += 16 )
769 p_block[0] = p_plane + y * i_stride + 8;
770 p_block[1] = p_block[0] + 8;
771 p_block[2] = p_block[0] + ( i_stride << 3 );
772 p_block[3] = p_block[2] + 8;
774 for( x = 8; x < i_width-8; x += 16 )
776 /* 1: Calculate threshold */
777 /* Calculate max/min for each block */
778 pp_dering_MinMax( p_block[0], i_stride, &i_min[0], &i_max[0] );
779 pp_dering_MinMax( p_block[1], i_stride, &i_min[1], &i_max[1] );
780 pp_dering_MinMax( p_block[2], i_stride, &i_min[2], &i_max[2] );
781 pp_dering_MinMax( p_block[3], i_stride, &i_min[3], &i_max[3] );
782 /* Calculate range, max_range and thr */
783 i_max_range = 0; i_kmax = 0;
784 for( k = 0; k < 4; k++ )
786 i_range[k] = i_max[k] - i_min[k];
787 i_thr[k] = ( i_max[k] + i_min[k] + 1 )/2;
788 if( i_max_range < i_max[k])
790 i_max_range = i_max[k];
794 /* Now rearrange thr */
795 if( i_max_range > 64 )
797 for( k = 1; k < 5; k++ )
799 if( i_range[k] < 16 )
804 if( i_range[k] < 32 )
806 i_thr[k] = i_thr[i_kmax];
812 for( k = 1; k < 5; k++ )
814 if( i_range[k] < 16 )
820 /* 2: Index acquisition 10x10 ! so " -i_stride - 1"*/
821 pp_dering_BinIndex( p_block[0] - i_stride - 1, i_stride,
822 i_thr[0], i_bin[0] );
823 pp_dering_BinIndex( p_block[1] - i_stride - 1, i_stride,
824 i_thr[1], i_bin[1] );
825 pp_dering_BinIndex( p_block[2] - i_stride - 1, i_stride,
826 i_thr[2], i_bin[2] );
827 pp_dering_BinIndex( p_block[3] - i_stride - 1, i_stride,
828 i_thr[3], i_bin[3] );
831 /* 3: adaptive smoothing */
832 /* since we begin at (8,8) QP can be different for each block */
833 p_QP = &( p_QP_store[( y >> 4) * i_QP_stride + (x >> 4)] );
835 pp_dering_Filter( p_block[0], i_stride,
838 pp_dering_Filter( p_block[1], i_stride,
841 pp_dering_Filter( p_block[2], i_stride,
842 i_bin[2], p_QP[i_QP_stride] );
844 pp_dering_Filter( p_block[3], i_stride,
845 i_bin[3], p_QP[i_QP_stride+1] );
856 void E_( pp_dering_C )( u8 *p_plane,
857 int i_width, int i_height, int i_stride,
858 QT_STORE_T *p_QP_store, int i_QP_stride )
868 for( y = 8; y < i_height-8; y += 8 )
871 p_block = p_plane + y * i_stride + 8;
872 for( x = 8; x < i_width-8; x += 8 )
875 /* 1: Calculate threshold */
876 /* Calculate max/min for each block */
877 pp_dering_MinMax( p_block, i_stride,
880 i_thr = ( i_max + i_min + 1 )/2;
882 /* 2: Index acquisition 10x10 */
883 /* point on 10x10 in wich we have our 8x8 block */
884 pp_dering_BinIndex( p_block - i_stride -1, i_stride,
888 /* 3: adaptive smoothing */
889 pp_dering_Filter( p_block, i_stride,
891 p_QP_store[(y>>5)*i_QP_stride+ (x>>5)]);