2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51 # more or less selfinvented filters so the exactness isnt too meaningfull
52 E = Exact implementation
53 e = allmost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
89 //#define DEBUG_BRIGHTNESS
91 #include "libvo/fastmemcpy.h"
93 #include "postprocess.h"
94 #include "postprocess_internal.h"
96 #include "mangle.h" //FIXME should be supressed
102 #define GET_MODE_BUFFER_SIZE 500
103 #define OPTIONS_ARRAY_SIZE 10
105 #define TEMP_STRIDE 8
106 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
108 #if defined(ARCH_X86) || defined(ARCH_X86_64)
109 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
110 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
111 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
112 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
113 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
114 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
115 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
116 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
119 static uint8_t clip_table[3*256];
120 static uint8_t * const clip_tab= clip_table + 256;
122 static const int attribute_used deringThreshold= 20;
125 static struct PPFilter filters[]=
127 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
128 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
129 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
130 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
131 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
132 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
133 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
134 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
135 {"dr", "dering", 1, 5, 6, DERING},
136 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
137 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
138 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
139 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
140 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
141 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
142 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
143 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
144 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
145 {NULL, NULL,0,0,0,0} //End Marker
148 static const char *replaceTable[]=
150 "default", "hdeblock:a,vdeblock:a,dering:a",
151 "de", "hdeblock:a,vdeblock:a,dering:a",
152 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
153 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
154 "ac", "ha:a:128:7,va:a,dering:a",
159 #if defined(ARCH_X86) || defined(ARCH_X86_64)
160 static inline void prefetchnta(void *p)
162 asm volatile( "prefetchnta (%0)\n\t"
167 static inline void prefetcht0(void *p)
169 asm volatile( "prefetcht0 (%0)\n\t"
174 static inline void prefetcht1(void *p)
176 asm volatile( "prefetcht1 (%0)\n\t"
181 static inline void prefetcht2(void *p)
183 asm volatile( "prefetcht2 (%0)\n\t"
189 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
192 * Check if the given 8x8 Block is mostly "flat"
194 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
198 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
199 const int dcThreshold= dcOffset*2 + 1;
201 for(y=0; y<BLOCK_SIZE; y++)
203 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
205 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
207 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
208 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
212 return numEq > c->ppMode.flatnessThreshold;
216 * Check if the middle 8x8 Block in the given 8x16 block is flat
218 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
221 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
222 const int dcThreshold= dcOffset*2 + 1;
224 src+= stride*4; // src points to begin of the 8x8 Block
225 for(y=0; y<BLOCK_SIZE-1; y++)
227 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
232 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
237 return numEq > c->ppMode.flatnessThreshold;
240 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
245 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
247 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
249 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
251 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
256 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
263 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269 for(x=0; x<BLOCK_SIZE; x+=4)
271 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279 for(x=0; x<BLOCK_SIZE; x++)
281 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
288 for(x=0; x<BLOCK_SIZE; x++)
294 int v= src[x + y*stride];
298 if(max-min > 2*QP) return 0;
304 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
305 if( isHorizDC_C(src, stride, c) ){
306 if( isHorizMinMaxOk_C(src, stride, c->QP) )
315 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
316 if( isVertDC_C(src, stride, c) ){
317 if( isVertMinMaxOk_C(src, stride, c->QP) )
326 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
329 for(y=0; y<BLOCK_SIZE; y++)
331 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
333 if(ABS(middleEnergy) < 8*c->QP)
335 const int q=(dst[3] - dst[4])/2;
336 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
337 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
339 int d= ABS(middleEnergy) - FFMIN( ABS(leftEnergy), ABS(rightEnergy) );
343 d*= FFSIGN(-middleEnergy);
364 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
365 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
367 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
370 for(y=0; y<BLOCK_SIZE; y++)
372 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
373 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
376 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
377 sums[1] = sums[0] - first + dst[3];
378 sums[2] = sums[1] - first + dst[4];
379 sums[3] = sums[2] - first + dst[5];
380 sums[4] = sums[3] - first + dst[6];
381 sums[5] = sums[4] - dst[0] + dst[7];
382 sums[6] = sums[5] - dst[1] + last;
383 sums[7] = sums[6] - dst[2] + last;
384 sums[8] = sums[7] - dst[3] + last;
385 sums[9] = sums[8] - dst[4] + last;
387 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
388 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
389 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
390 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
391 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
392 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
393 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
394 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
401 * Experimental Filter 1 (Horizontal)
402 * will not damage linear gradients
403 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
404 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
405 * MMX2 version does correct clipping C version doesnt
406 * not identical with the vertical one
408 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
411 static uint64_t *lut= NULL;
415 lut = av_malloc(256*8);
418 int v= i < 128 ? 2*i : 2*(i-256);
420 //Simulate 112242211 9-Tap filter
421 uint64_t a= (v/16) & 0xFF;
422 uint64_t b= (v/8) & 0xFF;
423 uint64_t c= (v/4) & 0xFF;
424 uint64_t d= (3*v/8) & 0xFF;
426 //Simulate piecewise linear interpolation
427 uint64_t a= (v/16) & 0xFF;
428 uint64_t b= (v*3/16) & 0xFF;
429 uint64_t c= (v*5/16) & 0xFF;
430 uint64_t d= (7*v/16) & 0xFF;
431 uint64_t A= (0x100 - a)&0xFF;
432 uint64_t B= (0x100 - b)&0xFF;
433 uint64_t C= (0x100 - c)&0xFF;
434 uint64_t D= (0x100 - c)&0xFF;
436 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
437 (D<<24) | (C<<16) | (B<<8) | (A);
438 //lut[i] = (v<<32) | (v<<24);
442 for(y=0; y<BLOCK_SIZE; y++)
444 int a= src[1] - src[2];
445 int b= src[3] - src[4];
446 int c= src[5] - src[6];
448 int d= FFMAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
452 int v = d * FFSIGN(-b);
467 * accurate deblock filter
469 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
472 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
473 const int dcThreshold= dcOffset*2 + 1;
475 src+= step*4; // src points to begin of the 8x8 Block
479 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
486 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
487 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
488 if(numEq > c->ppMode.flatnessThreshold){
491 if(src[0] > src[step]){
499 if(src[x*step] > src[(x+1)*step]){
500 if(src[x *step] > max) max= src[ x *step];
501 if(src[(x+1)*step] < min) min= src[(x+1)*step];
503 if(src[(x+1)*step] > max) max= src[(x+1)*step];
504 if(src[ x *step] < min) min= src[ x *step];
508 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
509 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
512 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
513 sums[1] = sums[0] - first + src[3*step];
514 sums[2] = sums[1] - first + src[4*step];
515 sums[3] = sums[2] - first + src[5*step];
516 sums[4] = sums[3] - first + src[6*step];
517 sums[5] = sums[4] - src[0*step] + src[7*step];
518 sums[6] = sums[5] - src[1*step] + last;
519 sums[7] = sums[6] - src[2*step] + last;
520 sums[8] = sums[7] - src[3*step] + last;
521 sums[9] = sums[8] - src[4*step] + last;
523 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
524 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
525 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
526 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
527 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
528 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
529 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
530 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
533 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
535 if(ABS(middleEnergy) < 8*QP)
537 const int q=(src[3*step] - src[4*step])/2;
538 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
539 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
541 int d= ABS(middleEnergy) - FFMIN( ABS(leftEnergy), ABS(rightEnergy) );
545 d*= FFSIGN(-middleEnergy);
572 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
574 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
580 #define COMPILE_ALTIVEC
581 #endif //HAVE_ALTIVEC
582 #endif //ARCH_POWERPC
584 #if defined(ARCH_X86) || defined(ARCH_X86_64)
586 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
590 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
594 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
595 #define COMPILE_3DNOW
608 #define RENAME(a) a ## _C
609 #include "postprocess_template.c"
613 #ifdef COMPILE_ALTIVEC
616 #define RENAME(a) a ## _altivec
617 #include "postprocess_altivec_template.c"
618 #include "postprocess_template.c"
620 #endif //ARCH_POWERPC
628 #define RENAME(a) a ## _MMX
629 #include "postprocess_template.c"
638 #define RENAME(a) a ## _MMX2
639 #include "postprocess_template.c"
648 #define RENAME(a) a ## _3DNow
649 #include "postprocess_template.c"
652 // minor note: the HAVE_xyz is messed up after that line so dont use it
654 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
655 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
657 PPContext *c= (PPContext *)vc;
658 PPMode *ppMode= (PPMode *)vm;
659 c->ppMode= *ppMode; //FIXME
661 // useing ifs here as they are faster than function pointers allthough the
662 // difference wouldnt be messureable here but its much better because
663 // someone might exchange the cpu whithout restarting mplayer ;)
664 #ifdef RUNTIME_CPUDETECT
665 #if defined(ARCH_X86) || defined(ARCH_X86_64)
666 // ordered per speed fasterst first
667 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
668 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
670 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
672 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
679 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 #else //RUNTIME_CPUDETECT
687 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688 #elif defined (HAVE_3DNOW)
689 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690 #elif defined (HAVE_MMX)
691 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
692 #elif defined (HAVE_ALTIVEC)
693 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
695 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
697 #endif //!RUNTIME_CPUDETECT
700 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
701 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
703 /* -pp Command line Help
706 "Available postprocessing filters:\n"
708 "short long name short long option Description\n"
709 "* * a autoq CPU power dependent enabler\n"
710 " c chrom chrominance filtering enabled\n"
711 " y nochrom chrominance filtering disabled\n"
712 " n noluma luma filtering disabled\n"
713 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
714 " 1. difference factor: default=32, higher -> more deblocking\n"
715 " 2. flatness threshold: default=39, lower -> more deblocking\n"
716 " the h & v deblocking filters share these\n"
717 " so you can't set different thresholds for h / v\n"
718 "vb vdeblock (2 threshold) vertical deblocking filter\n"
719 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
720 "va vadeblock (2 threshold) vertical deblocking filter\n"
721 "h1 x1hdeblock experimental h deblock filter 1\n"
722 "v1 x1vdeblock experimental v deblock filter 1\n"
723 "dr dering deringing filter\n"
724 "al autolevels automatic brightness / contrast\n"
725 " f fullyrange stretch luminance to (0..255)\n"
726 "lb linblenddeint linear blend deinterlacer\n"
727 "li linipoldeint linear interpolating deinterlace\n"
728 "ci cubicipoldeint cubic interpolating deinterlacer\n"
729 "md mediandeint median deinterlacer\n"
730 "fd ffmpegdeint ffmpeg deinterlacer\n"
731 "l5 lowpass5 FIR lowpass deinterlacer\n"
732 "de default hb:a,vb:a,dr:a\n"
733 "fa fast h1:a,v1:a,dr:a\n"
734 "ac ha:a:128:7,va:a,dr:a\n"
735 "tn tmpnoise (3 threshold) temporal noise reducer\n"
736 " 1. <= 2. <= 3. larger -> stronger filtering\n"
737 "fq forceQuant <quantizer> force quantizer\n"
739 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
740 "long form example:\n"
741 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
742 "short form example:\n"
743 "vb:a/hb:a/lb de,-vb\n"
749 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
751 char temp[GET_MODE_BUFFER_SIZE];
753 const char *filterDelimiters= ",/";
754 const char *optionDelimiters= ":";
755 struct PPMode *ppMode;
758 ppMode= av_malloc(sizeof(PPMode));
761 ppMode->chromMode= 0;
762 ppMode->maxTmpNoise[0]= 700;
763 ppMode->maxTmpNoise[1]= 1500;
764 ppMode->maxTmpNoise[2]= 3000;
765 ppMode->maxAllowedY= 234;
766 ppMode->minAllowedY= 16;
767 ppMode->baseDcDiff= 256/8;
768 ppMode->flatnessThreshold= 56-16-1;
769 ppMode->maxClippedThreshold= 0.01;
772 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
774 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
778 int q= 1000000; //PP_QUALITY_MAX;
782 char *options[OPTIONS_ARRAY_SIZE];
785 int numOfUnknownOptions=0;
786 int enable=1; //does the user want us to enabled or disabled the filter
788 filterToken= strtok(p, filterDelimiters);
789 if(filterToken == NULL) break;
790 p+= strlen(filterToken) + 1; // p points to next filterToken
791 filterName= strtok(filterToken, optionDelimiters);
792 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
794 if(*filterName == '-')
800 for(;;){ //for all options
801 option= strtok(NULL, optionDelimiters);
802 if(option == NULL) break;
804 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
805 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
806 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
807 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
808 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
811 options[numOfUnknownOptions] = option;
812 numOfUnknownOptions++;
814 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
816 options[numOfUnknownOptions] = NULL;
818 /* replace stuff from the replace Table */
819 for(i=0; replaceTable[2*i]!=NULL; i++)
821 if(!strcmp(replaceTable[2*i], filterName))
823 int newlen= strlen(replaceTable[2*i + 1]);
827 if(p==NULL) p= temp, *p=0; //last filter
828 else p--, *p=','; //not last filter
831 spaceLeft= p - temp + plen;
832 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
837 memmove(p + newlen, p, plen+1);
838 memcpy(p, replaceTable[2*i + 1], newlen);
843 for(i=0; filters[i].shortName!=NULL; i++)
845 if( !strcmp(filters[i].longName, filterName)
846 || !strcmp(filters[i].shortName, filterName))
848 ppMode->lumMode &= ~filters[i].mask;
849 ppMode->chromMode &= ~filters[i].mask;
852 if(!enable) break; // user wants to disable it
854 if(q >= filters[i].minLumQuality && luma)
855 ppMode->lumMode|= filters[i].mask;
856 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
857 if(q >= filters[i].minChromQuality)
858 ppMode->chromMode|= filters[i].mask;
860 if(filters[i].mask == LEVEL_FIX)
863 ppMode->minAllowedY= 16;
864 ppMode->maxAllowedY= 234;
865 for(o=0; options[o]!=NULL; o++)
867 if( !strcmp(options[o],"fullyrange")
868 ||!strcmp(options[o],"f"))
870 ppMode->minAllowedY= 0;
871 ppMode->maxAllowedY= 255;
872 numOfUnknownOptions--;
876 else if(filters[i].mask == TEMP_NOISE_FILTER)
881 for(o=0; options[o]!=NULL; o++)
884 ppMode->maxTmpNoise[numOfNoises]=
885 strtol(options[o], &tail, 0);
889 numOfUnknownOptions--;
890 if(numOfNoises >= 3) break;
894 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
895 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
899 for(o=0; options[o]!=NULL && o<2; o++)
902 int val= strtol(options[o], &tail, 0);
903 if(tail==options[o]) break;
905 numOfUnknownOptions--;
906 if(o==0) ppMode->baseDcDiff= val;
907 else ppMode->flatnessThreshold= val;
910 else if(filters[i].mask == FORCE_QUANT)
913 ppMode->forcedQuant= 15;
915 for(o=0; options[o]!=NULL && o<1; o++)
918 int val= strtol(options[o], &tail, 0);
919 if(tail==options[o]) break;
921 numOfUnknownOptions--;
922 ppMode->forcedQuant= val;
927 if(!filterNameOk) ppMode->error++;
928 ppMode->error += numOfUnknownOptions;
931 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
934 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
941 void pp_free_mode(pp_mode_t *mode){
945 static void reallocAlign(void **p, int alignment, int size){
947 *p= av_mallocz(size);
950 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
951 int mbWidth = (width+15)>>4;
952 int mbHeight= (height+15)>>4;
956 c->qpStride= qpStride;
958 reallocAlign((void **)&c->tempDst, 8, stride*24);
959 reallocAlign((void **)&c->tempSrc, 8, stride*24);
960 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
961 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
963 c->yHistogram[i]= width*height/64*15/256;
967 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
968 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
969 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
972 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
973 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
974 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
975 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
978 static void global_init(void){
980 memset(clip_table, 0, 256);
981 for(i=256; i<512; i++)
983 memset(clip_table+512, 0, 256);
986 static const char * context_to_name(void * ptr) {
990 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
992 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
993 PPContext *c= av_malloc(sizeof(PPContext));
994 int stride= (width+15)&(~15); //assumed / will realloc if needed
995 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
999 memset(c, 0, sizeof(PPContext));
1000 c->av_class = &av_codec_context_class;
1001 c->cpuCaps= cpuCaps;
1002 if(cpuCaps&PP_FORMAT){
1003 c->hChromaSubSample= cpuCaps&0x3;
1004 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1006 c->hChromaSubSample= 1;
1007 c->vChromaSubSample= 1;
1010 reallocBuffers(c, width, height, stride, qpStride);
1017 void pp_free_context(void *vc){
1018 PPContext *c = (PPContext*)vc;
1021 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1024 av_free(c->tempBlocks);
1025 av_free(c->yHistogram);
1026 av_free(c->tempDst);
1027 av_free(c->tempSrc);
1028 av_free(c->deintTemp);
1029 av_free(c->stdQPTable);
1030 av_free(c->nonBQPTable);
1031 av_free(c->forcedQPTable);
1033 memset(c, 0, sizeof(PPContext));
1038 void pp_postprocess(uint8_t * src[3], int srcStride[3],
1039 uint8_t * dst[3], int dstStride[3],
1040 int width, int height,
1041 QP_STORE_T *QP_store, int QPStride,
1042 pp_mode_t *vm, void *vc, int pict_type)
1044 int mbWidth = (width+15)>>4;
1045 int mbHeight= (height+15)>>4;
1046 PPMode *mode = (PPMode*)vm;
1047 PPContext *c = (PPContext*)vc;
1048 int minStride= FFMAX(ABS(srcStride[0]), ABS(dstStride[0]));
1049 int absQPStride = ABS(QPStride);
1051 // c->stride and c->QPStride are always positive
1052 if(c->stride < minStride || c->qpStride < absQPStride)
1053 reallocBuffers(c, width, height,
1054 FFMAX(minStride, c->stride),
1055 FFMAX(c->qpStride, absQPStride));
1057 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1060 QP_store= c->forcedQPTable;
1061 absQPStride = QPStride = 0;
1062 if(mode->lumMode & FORCE_QUANT)
1063 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1065 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1068 if(pict_type & PP_PICT_TYPE_QP2){
1070 const int count= mbHeight * absQPStride;
1071 for(i=0; i<(count>>2); i++){
1072 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1074 for(i<<=2; i<count; i++){
1075 c->stdQPTable[i] = QP_store[i]>>1;
1077 QP_store= c->stdQPTable;
1078 QPStride= absQPStride;
1083 for(y=0; y<mbHeight; y++){
1084 for(x=0; x<mbWidth; x++){
1085 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1087 av_log(c, AV_LOG_INFO, "\n");
1089 av_log(c, AV_LOG_INFO, "\n");
1092 if((pict_type&7)!=3)
1094 if (QPStride >= 0) {
1096 const int count= mbHeight * QPStride;
1097 for(i=0; i<(count>>2); i++){
1098 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1100 for(i<<=2; i<count; i++){
1101 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1105 for(i=0; i<mbHeight; i++) {
1106 for(j=0; j<absQPStride; j++) {
1107 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1113 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1114 mode->lumMode, mode->chromMode);
1116 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1117 width, height, QP_store, QPStride, 0, mode, c);
1119 width = (width )>>c->hChromaSubSample;
1120 height = (height)>>c->vChromaSubSample;
1124 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1125 width, height, QP_store, QPStride, 1, mode, c);
1126 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1127 width, height, QP_store, QPStride, 2, mode, c);
1129 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1131 linecpy(dst[1], src[1], height, srcStride[1]);
1132 linecpy(dst[2], src[2], height, srcStride[2]);
1137 for(y=0; y<height; y++)
1139 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);