2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = allmost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
89 //#define DEBUG_BRIGHTNESS
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
93 #include "mangle.h" //FIXME should be supressed
99 #define GET_MODE_BUFFER_SIZE 500
100 #define OPTIONS_ARRAY_SIZE 10
102 #define TEMP_STRIDE 8
103 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
105 #if defined(ARCH_X86)
106 static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
107 static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
108 static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
109 static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
110 static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
111 static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
112 static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
113 static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
116 static uint8_t clip_table[3*256];
117 static uint8_t * const clip_tab= clip_table + 256;
119 static const int attribute_used deringThreshold= 20;
122 static struct PPFilter filters[]=
124 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
125 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
126 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
127 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
128 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
129 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
130 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
131 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
132 {"dr", "dering", 1, 5, 6, DERING},
133 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
134 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
138 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
139 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
140 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
141 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
142 {NULL, NULL,0,0,0,0} //End Marker
145 static const char *replaceTable[]=
147 "default", "hdeblock:a,vdeblock:a,dering:a",
148 "de", "hdeblock:a,vdeblock:a,dering:a",
149 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
150 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
151 "ac", "ha:a:128:7,va:a,dering:a",
156 #if defined(ARCH_X86)
157 static inline void prefetchnta(void *p)
159 asm volatile( "prefetchnta (%0)\n\t"
164 static inline void prefetcht0(void *p)
166 asm volatile( "prefetcht0 (%0)\n\t"
171 static inline void prefetcht1(void *p)
173 asm volatile( "prefetcht1 (%0)\n\t"
178 static inline void prefetcht2(void *p)
180 asm volatile( "prefetcht2 (%0)\n\t"
186 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
189 * Check if the given 8x8 Block is mostly "flat"
191 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
195 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
196 const int dcThreshold= dcOffset*2 + 1;
198 for(y=0; y<BLOCK_SIZE; y++)
200 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
209 return numEq > c->ppMode.flatnessThreshold;
213 * Check if the middle 8x8 Block in the given 8x16 block is flat
215 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
218 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
219 const int dcThreshold= dcOffset*2 + 1;
221 src+= stride*4; // src points to begin of the 8x8 Block
222 for(y=0; y<BLOCK_SIZE-1; y++)
224 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
234 return numEq > c->ppMode.flatnessThreshold;
237 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
242 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
244 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
246 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
248 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
253 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
260 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
266 for(x=0; x<BLOCK_SIZE; x+=4)
268 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
271 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
276 for(x=0; x<BLOCK_SIZE; x++)
278 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
285 for(x=0; x<BLOCK_SIZE; x++)
291 int v= src[x + y*stride];
295 if(max-min > 2*QP) return 0;
301 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
302 if( isHorizDC_C(src, stride, c) ){
303 if( isHorizMinMaxOk_C(src, stride, c->QP) )
312 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
313 if( isVertDC_C(src, stride, c) ){
314 if( isVertMinMaxOk_C(src, stride, c->QP) )
323 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
326 for(y=0; y<BLOCK_SIZE; y++)
328 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
330 if(FFABS(middleEnergy) < 8*c->QP)
332 const int q=(dst[3] - dst[4])/2;
333 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
334 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
336 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
340 d*= FFSIGN(-middleEnergy);
361 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
362 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
364 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
367 for(y=0; y<BLOCK_SIZE; y++)
369 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
370 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
373 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
374 sums[1] = sums[0] - first + dst[3];
375 sums[2] = sums[1] - first + dst[4];
376 sums[3] = sums[2] - first + dst[5];
377 sums[4] = sums[3] - first + dst[6];
378 sums[5] = sums[4] - dst[0] + dst[7];
379 sums[6] = sums[5] - dst[1] + last;
380 sums[7] = sums[6] - dst[2] + last;
381 sums[8] = sums[7] - dst[3] + last;
382 sums[9] = sums[8] - dst[4] + last;
384 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
385 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
386 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
387 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
388 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
389 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
390 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
391 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
398 * Experimental Filter 1 (Horizontal)
399 * will not damage linear gradients
400 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
401 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
402 * MMX2 version does correct clipping C version does not
403 * not identical with the vertical one
405 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
408 static uint64_t *lut= NULL;
412 lut = av_malloc(256*8);
415 int v= i < 128 ? 2*i : 2*(i-256);
417 //Simulate 112242211 9-Tap filter
418 uint64_t a= (v/16) & 0xFF;
419 uint64_t b= (v/8) & 0xFF;
420 uint64_t c= (v/4) & 0xFF;
421 uint64_t d= (3*v/8) & 0xFF;
423 //Simulate piecewise linear interpolation
424 uint64_t a= (v/16) & 0xFF;
425 uint64_t b= (v*3/16) & 0xFF;
426 uint64_t c= (v*5/16) & 0xFF;
427 uint64_t d= (7*v/16) & 0xFF;
428 uint64_t A= (0x100 - a)&0xFF;
429 uint64_t B= (0x100 - b)&0xFF;
430 uint64_t C= (0x100 - c)&0xFF;
431 uint64_t D= (0x100 - c)&0xFF;
433 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
434 (D<<24) | (C<<16) | (B<<8) | (A);
435 //lut[i] = (v<<32) | (v<<24);
439 for(y=0; y<BLOCK_SIZE; y++)
441 int a= src[1] - src[2];
442 int b= src[3] - src[4];
443 int c= src[5] - src[6];
445 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449 int v = d * FFSIGN(-b);
464 * accurate deblock filter
466 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
469 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
470 const int dcThreshold= dcOffset*2 + 1;
472 src+= step*4; // src points to begin of the 8x8 Block
476 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
485 if(numEq > c->ppMode.flatnessThreshold){
488 if(src[0] > src[step]){
496 if(src[x*step] > src[(x+1)*step]){
497 if(src[x *step] > max) max= src[ x *step];
498 if(src[(x+1)*step] < min) min= src[(x+1)*step];
500 if(src[(x+1)*step] > max) max= src[(x+1)*step];
501 if(src[ x *step] < min) min= src[ x *step];
505 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
506 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
509 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
510 sums[1] = sums[0] - first + src[3*step];
511 sums[2] = sums[1] - first + src[4*step];
512 sums[3] = sums[2] - first + src[5*step];
513 sums[4] = sums[3] - first + src[6*step];
514 sums[5] = sums[4] - src[0*step] + src[7*step];
515 sums[6] = sums[5] - src[1*step] + last;
516 sums[7] = sums[6] - src[2*step] + last;
517 sums[8] = sums[7] - src[3*step] + last;
518 sums[9] = sums[8] - src[4*step] + last;
520 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
521 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
522 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
523 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
524 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
525 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
526 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
527 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
530 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
532 if(FFABS(middleEnergy) < 8*QP)
534 const int q=(src[3*step] - src[4*step])/2;
535 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
538 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
542 d*= FFSIGN(-middleEnergy);
569 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
571 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
577 #define COMPILE_ALTIVEC
578 #endif //HAVE_ALTIVEC
579 #endif //ARCH_POWERPC
581 #if defined(ARCH_X86)
583 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
591 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
592 #define COMPILE_3DNOW
594 #endif /* defined(ARCH_X86) */
605 #define RENAME(a) a ## _C
606 #include "postprocess_template.c"
610 #ifdef COMPILE_ALTIVEC
613 #define RENAME(a) a ## _altivec
614 #include "postprocess_altivec_template.c"
615 #include "postprocess_template.c"
617 #endif //ARCH_POWERPC
625 #define RENAME(a) a ## _MMX
626 #include "postprocess_template.c"
635 #define RENAME(a) a ## _MMX2
636 #include "postprocess_template.c"
645 #define RENAME(a) a ## _3DNow
646 #include "postprocess_template.c"
649 // minor note: the HAVE_xyz is messed up after that line so do not use it.
651 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
652 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
654 PPContext *c= (PPContext *)vc;
655 PPMode *ppMode= (PPMode *)vm;
656 c->ppMode= *ppMode; //FIXME
658 // Using ifs here as they are faster than function pointers although the
659 // difference would not be measureable here but it is much better because
660 // someone might exchange the CPU whithout restarting MPlayer ;)
661 #ifdef RUNTIME_CPUDETECT
662 #if defined(ARCH_X86)
663 // ordered per speed fasterst first
664 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
665 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
667 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
669 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
676 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682 #else //RUNTIME_CPUDETECT
684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 #elif defined (HAVE_3DNOW)
686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 #elif defined (HAVE_MMX)
688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 #elif defined (HAVE_ALTIVEC)
690 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
692 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
694 #endif //!RUNTIME_CPUDETECT
697 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
698 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
700 /* -pp Command line Help
703 "Available postprocessing filters:\n"
705 "short long name short long option Description\n"
706 "* * a autoq CPU power dependent enabler\n"
707 " c chrom chrominance filtering enabled\n"
708 " y nochrom chrominance filtering disabled\n"
709 " n noluma luma filtering disabled\n"
710 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
711 " 1. difference factor: default=32, higher -> more deblocking\n"
712 " 2. flatness threshold: default=39, lower -> more deblocking\n"
713 " the h & v deblocking filters share these\n"
714 " so you can't set different thresholds for h / v\n"
715 "vb vdeblock (2 threshold) vertical deblocking filter\n"
716 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
717 "va vadeblock (2 threshold) vertical deblocking filter\n"
718 "h1 x1hdeblock experimental h deblock filter 1\n"
719 "v1 x1vdeblock experimental v deblock filter 1\n"
720 "dr dering deringing filter\n"
721 "al autolevels automatic brightness / contrast\n"
722 " f fullyrange stretch luminance to (0..255)\n"
723 "lb linblenddeint linear blend deinterlacer\n"
724 "li linipoldeint linear interpolating deinterlace\n"
725 "ci cubicipoldeint cubic interpolating deinterlacer\n"
726 "md mediandeint median deinterlacer\n"
727 "fd ffmpegdeint ffmpeg deinterlacer\n"
728 "l5 lowpass5 FIR lowpass deinterlacer\n"
729 "de default hb:a,vb:a,dr:a\n"
730 "fa fast h1:a,v1:a,dr:a\n"
731 "ac ha:a:128:7,va:a,dr:a\n"
732 "tn tmpnoise (3 threshold) temporal noise reducer\n"
733 " 1. <= 2. <= 3. larger -> stronger filtering\n"
734 "fq forceQuant <quantizer> force quantizer\n"
736 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737 "long form example:\n"
738 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
739 "short form example:\n"
740 "vb:a/hb:a/lb de,-vb\n"
746 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
748 char temp[GET_MODE_BUFFER_SIZE];
750 const char *filterDelimiters= ",/";
751 const char *optionDelimiters= ":";
752 struct PPMode *ppMode;
755 ppMode= av_malloc(sizeof(PPMode));
758 ppMode->chromMode= 0;
759 ppMode->maxTmpNoise[0]= 700;
760 ppMode->maxTmpNoise[1]= 1500;
761 ppMode->maxTmpNoise[2]= 3000;
762 ppMode->maxAllowedY= 234;
763 ppMode->minAllowedY= 16;
764 ppMode->baseDcDiff= 256/8;
765 ppMode->flatnessThreshold= 56-16-1;
766 ppMode->maxClippedThreshold= 0.01;
769 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
771 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
775 int q= 1000000; //PP_QUALITY_MAX;
779 char *options[OPTIONS_ARRAY_SIZE];
782 int numOfUnknownOptions=0;
783 int enable=1; //does the user want us to enabled or disabled the filter
785 filterToken= strtok(p, filterDelimiters);
786 if(filterToken == NULL) break;
787 p+= strlen(filterToken) + 1; // p points to next filterToken
788 filterName= strtok(filterToken, optionDelimiters);
789 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
791 if(*filterName == '-')
797 for(;;){ //for all options
798 option= strtok(NULL, optionDelimiters);
799 if(option == NULL) break;
801 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
802 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
803 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
804 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
805 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
808 options[numOfUnknownOptions] = option;
809 numOfUnknownOptions++;
811 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
813 options[numOfUnknownOptions] = NULL;
815 /* replace stuff from the replace Table */
816 for(i=0; replaceTable[2*i]!=NULL; i++)
818 if(!strcmp(replaceTable[2*i], filterName))
820 int newlen= strlen(replaceTable[2*i + 1]);
824 if(p==NULL) p= temp, *p=0; //last filter
825 else p--, *p=','; //not last filter
828 spaceLeft= p - temp + plen;
829 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
834 memmove(p + newlen, p, plen+1);
835 memcpy(p, replaceTable[2*i + 1], newlen);
840 for(i=0; filters[i].shortName!=NULL; i++)
842 if( !strcmp(filters[i].longName, filterName)
843 || !strcmp(filters[i].shortName, filterName))
845 ppMode->lumMode &= ~filters[i].mask;
846 ppMode->chromMode &= ~filters[i].mask;
849 if(!enable) break; // user wants to disable it
851 if(q >= filters[i].minLumQuality && luma)
852 ppMode->lumMode|= filters[i].mask;
853 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
854 if(q >= filters[i].minChromQuality)
855 ppMode->chromMode|= filters[i].mask;
857 if(filters[i].mask == LEVEL_FIX)
860 ppMode->minAllowedY= 16;
861 ppMode->maxAllowedY= 234;
862 for(o=0; options[o]!=NULL; o++)
864 if( !strcmp(options[o],"fullyrange")
865 ||!strcmp(options[o],"f"))
867 ppMode->minAllowedY= 0;
868 ppMode->maxAllowedY= 255;
869 numOfUnknownOptions--;
873 else if(filters[i].mask == TEMP_NOISE_FILTER)
878 for(o=0; options[o]!=NULL; o++)
881 ppMode->maxTmpNoise[numOfNoises]=
882 strtol(options[o], &tail, 0);
886 numOfUnknownOptions--;
887 if(numOfNoises >= 3) break;
891 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
892 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
896 for(o=0; options[o]!=NULL && o<2; o++)
899 int val= strtol(options[o], &tail, 0);
900 if(tail==options[o]) break;
902 numOfUnknownOptions--;
903 if(o==0) ppMode->baseDcDiff= val;
904 else ppMode->flatnessThreshold= val;
907 else if(filters[i].mask == FORCE_QUANT)
910 ppMode->forcedQuant= 15;
912 for(o=0; options[o]!=NULL && o<1; o++)
915 int val= strtol(options[o], &tail, 0);
916 if(tail==options[o]) break;
918 numOfUnknownOptions--;
919 ppMode->forcedQuant= val;
924 if(!filterNameOk) ppMode->error++;
925 ppMode->error += numOfUnknownOptions;
928 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
931 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
938 void pp_free_mode(pp_mode_t *mode){
942 static void reallocAlign(void **p, int alignment, int size){
944 *p= av_mallocz(size);
947 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
948 int mbWidth = (width+15)>>4;
949 int mbHeight= (height+15)>>4;
953 c->qpStride= qpStride;
955 reallocAlign((void **)&c->tempDst, 8, stride*24);
956 reallocAlign((void **)&c->tempSrc, 8, stride*24);
957 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
958 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
960 c->yHistogram[i]= width*height/64*15/256;
964 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
965 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
966 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
969 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
970 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
971 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
972 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
975 static void global_init(void){
977 memset(clip_table, 0, 256);
978 for(i=256; i<512; i++)
980 memset(clip_table+512, 0, 256);
983 static const char * context_to_name(void * ptr) {
987 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
989 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
990 PPContext *c= av_malloc(sizeof(PPContext));
991 int stride= (width+15)&(~15); //assumed / will realloc if needed
992 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
996 memset(c, 0, sizeof(PPContext));
997 c->av_class = &av_codec_context_class;
999 if(cpuCaps&PP_FORMAT){
1000 c->hChromaSubSample= cpuCaps&0x3;
1001 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1003 c->hChromaSubSample= 1;
1004 c->vChromaSubSample= 1;
1007 reallocBuffers(c, width, height, stride, qpStride);
1014 void pp_free_context(void *vc){
1015 PPContext *c = (PPContext*)vc;
1018 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1019 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1021 av_free(c->tempBlocks);
1022 av_free(c->yHistogram);
1023 av_free(c->tempDst);
1024 av_free(c->tempSrc);
1025 av_free(c->deintTemp);
1026 av_free(c->stdQPTable);
1027 av_free(c->nonBQPTable);
1028 av_free(c->forcedQPTable);
1030 memset(c, 0, sizeof(PPContext));
1035 void pp_postprocess(uint8_t * src[3], int srcStride[3],
1036 uint8_t * dst[3], int dstStride[3],
1037 int width, int height,
1038 QP_STORE_T *QP_store, int QPStride,
1039 pp_mode_t *vm, void *vc, int pict_type)
1041 int mbWidth = (width+15)>>4;
1042 int mbHeight= (height+15)>>4;
1043 PPMode *mode = (PPMode*)vm;
1044 PPContext *c = (PPContext*)vc;
1045 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1046 int absQPStride = FFABS(QPStride);
1048 // c->stride and c->QPStride are always positive
1049 if(c->stride < minStride || c->qpStride < absQPStride)
1050 reallocBuffers(c, width, height,
1051 FFMAX(minStride, c->stride),
1052 FFMAX(c->qpStride, absQPStride));
1054 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1057 QP_store= c->forcedQPTable;
1058 absQPStride = QPStride = 0;
1059 if(mode->lumMode & FORCE_QUANT)
1060 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1062 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1065 if(pict_type & PP_PICT_TYPE_QP2){
1067 const int count= mbHeight * absQPStride;
1068 for(i=0; i<(count>>2); i++){
1069 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1071 for(i<<=2; i<count; i++){
1072 c->stdQPTable[i] = QP_store[i]>>1;
1074 QP_store= c->stdQPTable;
1075 QPStride= absQPStride;
1080 for(y=0; y<mbHeight; y++){
1081 for(x=0; x<mbWidth; x++){
1082 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1084 av_log(c, AV_LOG_INFO, "\n");
1086 av_log(c, AV_LOG_INFO, "\n");
1089 if((pict_type&7)!=3)
1091 if (QPStride >= 0) {
1093 const int count= mbHeight * QPStride;
1094 for(i=0; i<(count>>2); i++){
1095 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1097 for(i<<=2; i<count; i++){
1098 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1102 for(i=0; i<mbHeight; i++) {
1103 for(j=0; j<absQPStride; j++) {
1104 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1110 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1111 mode->lumMode, mode->chromMode);
1113 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1114 width, height, QP_store, QPStride, 0, mode, c);
1116 width = (width )>>c->hChromaSubSample;
1117 height = (height)>>c->vChromaSubSample;
1121 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1122 width, height, QP_store, QPStride, 1, mode, c);
1123 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1124 width, height, QP_store, QPStride, 2, mode, c);
1126 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1128 linecpy(dst[1], src[1], height, srcStride[1]);
1129 linecpy(dst[2], src[2], height, srcStride[2]);
1134 for(y=0; y<height; y++)
1136 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1137 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);