2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file libpostproc/postprocess.c
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
77 #include "libavutil/avutil.h"
83 //#define HAVE_AMD3DNOW
86 //#define DEBUG_BRIGHTNESS
87 #include "postprocess.h"
88 #include "postprocess_internal.h"
90 unsigned postproc_version(void)
92 return LIBPOSTPROC_VERSION_INT;
95 const char * postproc_configuration(void)
97 return FFMPEG_CONFIGURATION;
100 const char * postproc_license(void)
102 #define LICENSE_PREFIX "libpostproc license: "
103 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
110 #define GET_MODE_BUFFER_SIZE 500
111 #define OPTIONS_ARRAY_SIZE 10
113 #define TEMP_STRIDE 8
114 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
117 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
118 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
119 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
120 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
121 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
122 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
123 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
124 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
127 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
130 static struct PPFilter filters[]=
132 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
133 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
134 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
135 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
136 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
137 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
138 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
139 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
140 {"dr", "dering", 1, 5, 6, DERING},
141 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
142 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
143 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
144 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
145 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
146 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
147 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
148 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
149 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
150 {NULL, NULL,0,0,0,0} //End Marker
153 static const char *replaceTable[]=
155 "default", "hb:a,vb:a,dr:a",
156 "de", "hb:a,vb:a,dr:a",
157 "fast", "h1:a,v1:a,dr:a",
158 "fa", "h1:a,v1:a,dr:a",
159 "ac", "ha:a:128:7,va:a,dr:a",
165 static inline void prefetchnta(void *p)
167 __asm__ volatile( "prefetchnta (%0)\n\t"
172 static inline void prefetcht0(void *p)
174 __asm__ volatile( "prefetcht0 (%0)\n\t"
179 static inline void prefetcht1(void *p)
181 __asm__ volatile( "prefetcht1 (%0)\n\t"
186 static inline void prefetcht2(void *p)
188 __asm__ volatile( "prefetcht2 (%0)\n\t"
194 /* The horizontal functions exist only in C because the MMX
195 * code is faster with vertical filters and transposing. */
198 * Check if the given 8x8 Block is mostly "flat"
200 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
204 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205 const int dcThreshold= dcOffset*2 + 1;
207 for(y=0; y<BLOCK_SIZE; y++){
208 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
209 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
217 return numEq > c->ppMode.flatnessThreshold;
221 * Check if the middle 8x8 Block in the given 8x16 block is flat
223 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
227 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
228 const int dcThreshold= dcOffset*2 + 1;
230 src+= stride*4; // src points to begin of the 8x8 Block
231 for(y=0; y<BLOCK_SIZE-1; y++){
232 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
233 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
242 return numEq > c->ppMode.flatnessThreshold;
245 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
261 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
268 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
274 for(x=0; x<BLOCK_SIZE; x+=4){
275 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
276 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
283 for(x=0; x<BLOCK_SIZE; x++){
284 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
291 for(x=0; x<BLOCK_SIZE; x++){
296 int v= src[x + y*stride];
300 if(max-min > 2*QP) return 0;
306 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
308 if( isHorizDC_C(src, stride, c) ){
309 if( isHorizMinMaxOk_C(src, stride, c->QP) )
318 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
320 if( isVertDC_C(src, stride, c) ){
321 if( isVertMinMaxOk_C(src, stride, c->QP) )
330 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
333 for(y=0; y<BLOCK_SIZE; y++){
334 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
336 if(FFABS(middleEnergy) < 8*c->QP){
337 const int q=(dst[3] - dst[4])/2;
338 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
339 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
341 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
345 d*= FFSIGN(-middleEnergy);
366 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
367 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
369 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
372 for(y=0; y<BLOCK_SIZE; y++){
373 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
374 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
377 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
378 sums[1] = sums[0] - first + dst[3];
379 sums[2] = sums[1] - first + dst[4];
380 sums[3] = sums[2] - first + dst[5];
381 sums[4] = sums[3] - first + dst[6];
382 sums[5] = sums[4] - dst[0] + dst[7];
383 sums[6] = sums[5] - dst[1] + last;
384 sums[7] = sums[6] - dst[2] + last;
385 sums[8] = sums[7] - dst[3] + last;
386 sums[9] = sums[8] - dst[4] + last;
388 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
389 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
390 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
391 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
392 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
393 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
394 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
395 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
402 * Experimental Filter 1 (Horizontal)
403 * will not damage linear gradients
404 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
405 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
406 * MMX2 version does correct clipping C version does not
407 * not identical with the vertical one
409 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
412 static uint64_t *lut= NULL;
416 lut = av_malloc(256*8);
419 int v= i < 128 ? 2*i : 2*(i-256);
421 //Simulate 112242211 9-Tap filter
422 uint64_t a= (v/16) & 0xFF;
423 uint64_t b= (v/8) & 0xFF;
424 uint64_t c= (v/4) & 0xFF;
425 uint64_t d= (3*v/8) & 0xFF;
427 //Simulate piecewise linear interpolation
428 uint64_t a= (v/16) & 0xFF;
429 uint64_t b= (v*3/16) & 0xFF;
430 uint64_t c= (v*5/16) & 0xFF;
431 uint64_t d= (7*v/16) & 0xFF;
432 uint64_t A= (0x100 - a)&0xFF;
433 uint64_t B= (0x100 - b)&0xFF;
434 uint64_t C= (0x100 - c)&0xFF;
435 uint64_t D= (0x100 - c)&0xFF;
437 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
438 (D<<24) | (C<<16) | (B<<8) | (A);
439 //lut[i] = (v<<32) | (v<<24);
443 for(y=0; y<BLOCK_SIZE; y++){
444 int a= src[1] - src[2];
445 int b= src[3] - src[4];
446 int c= src[5] - src[6];
448 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
451 int v = d * FFSIGN(-b);
465 * accurate deblock filter
467 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
470 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
471 const int dcThreshold= dcOffset*2 + 1;
473 src+= step*4; // src points to begin of the 8x8 Block
477 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
478 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
486 if(numEq > c->ppMode.flatnessThreshold){
489 if(src[0] > src[step]){
497 if(src[x*step] > src[(x+1)*step]){
498 if(src[x *step] > max) max= src[ x *step];
499 if(src[(x+1)*step] < min) min= src[(x+1)*step];
501 if(src[(x+1)*step] > max) max= src[(x+1)*step];
502 if(src[ x *step] < min) min= src[ x *step];
506 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
507 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
510 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
511 sums[1] = sums[0] - first + src[3*step];
512 sums[2] = sums[1] - first + src[4*step];
513 sums[3] = sums[2] - first + src[5*step];
514 sums[4] = sums[3] - first + src[6*step];
515 sums[5] = sums[4] - src[0*step] + src[7*step];
516 sums[6] = sums[5] - src[1*step] + last;
517 sums[7] = sums[6] - src[2*step] + last;
518 sums[8] = sums[7] - src[3*step] + last;
519 sums[9] = sums[8] - src[4*step] + last;
521 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
522 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
523 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
524 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
525 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
526 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
527 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
528 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
531 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
533 if(FFABS(middleEnergy) < 8*QP){
534 const int q=(src[3*step] - src[4*step])/2;
535 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
538 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
542 d*= FFSIGN(-middleEnergy);
566 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
568 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
573 #define COMPILE_ALTIVEC
574 #endif //HAVE_ALTIVEC
578 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
582 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
586 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
587 #define COMPILE_3DNOW
589 #endif /* ARCH_X86 */
596 #define HAVE_AMD3DNOW 0
598 #define HAVE_ALTIVEC 0
601 #define RENAME(a) a ## _C
602 #include "postprocess_template.c"
605 #ifdef COMPILE_ALTIVEC
608 #define HAVE_ALTIVEC 1
609 #define RENAME(a) a ## _altivec
610 #include "postprocess_altivec_template.c"
611 #include "postprocess_template.c"
619 #define RENAME(a) a ## _MMX
620 #include "postprocess_template.c"
630 #define RENAME(a) a ## _MMX2
631 #include "postprocess_template.c"
642 #define HAVE_AMD3DNOW 1
643 #define RENAME(a) a ## _3DNow
644 #include "postprocess_template.c"
647 // minor note: the HAVE_xyz is messed up after that line so do not use it.
649 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
652 PPContext *c= (PPContext *)vc;
653 PPMode *ppMode= (PPMode *)vm;
654 c->ppMode= *ppMode; //FIXME
656 // Using ifs here as they are faster than function pointers although the
657 // difference would not be measurable here but it is much better because
658 // someone might exchange the CPU whithout restarting MPlayer ;)
659 #if CONFIG_RUNTIME_CPUDETECT
661 // ordered per speed fastest first
662 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
673 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678 #else //CONFIG_RUNTIME_CPUDETECT
680 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690 #endif //!CONFIG_RUNTIME_CPUDETECT
693 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
694 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
696 /* -pp Command line Help
698 #if LIBPOSTPROC_VERSION_INT < (52<<16)
699 const char *const pp_help=
701 const char pp_help[] =
703 "Available postprocessing filters:\n"
705 "short long name short long option Description\n"
706 "* * a autoq CPU power dependent enabler\n"
707 " c chrom chrominance filtering enabled\n"
708 " y nochrom chrominance filtering disabled\n"
709 " n noluma luma filtering disabled\n"
710 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
711 " 1. difference factor: default=32, higher -> more deblocking\n"
712 " 2. flatness threshold: default=39, lower -> more deblocking\n"
713 " the h & v deblocking filters share these\n"
714 " so you can't set different thresholds for h / v\n"
715 "vb vdeblock (2 threshold) vertical deblocking filter\n"
716 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
717 "va vadeblock (2 threshold) vertical deblocking filter\n"
718 "h1 x1hdeblock experimental h deblock filter 1\n"
719 "v1 x1vdeblock experimental v deblock filter 1\n"
720 "dr dering deringing filter\n"
721 "al autolevels automatic brightness / contrast\n"
722 " f fullyrange stretch luminance to (0..255)\n"
723 "lb linblenddeint linear blend deinterlacer\n"
724 "li linipoldeint linear interpolating deinterlace\n"
725 "ci cubicipoldeint cubic interpolating deinterlacer\n"
726 "md mediandeint median deinterlacer\n"
727 "fd ffmpegdeint ffmpeg deinterlacer\n"
728 "l5 lowpass5 FIR lowpass deinterlacer\n"
729 "de default hb:a,vb:a,dr:a\n"
730 "fa fast h1:a,v1:a,dr:a\n"
731 "ac ha:a:128:7,va:a,dr:a\n"
732 "tn tmpnoise (3 threshold) temporal noise reducer\n"
733 " 1. <= 2. <= 3. larger -> stronger filtering\n"
734 "fq forceQuant <quantizer> force quantizer\n"
736 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737 "long form example:\n"
738 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
739 "short form example:\n"
740 "vb:a/hb:a/lb de,-vb\n"
746 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
748 char temp[GET_MODE_BUFFER_SIZE];
750 static const char filterDelimiters[] = ",/";
751 static const char optionDelimiters[] = ":";
752 struct PPMode *ppMode;
755 ppMode= av_malloc(sizeof(PPMode));
758 ppMode->chromMode= 0;
759 ppMode->maxTmpNoise[0]= 700;
760 ppMode->maxTmpNoise[1]= 1500;
761 ppMode->maxTmpNoise[2]= 3000;
762 ppMode->maxAllowedY= 234;
763 ppMode->minAllowedY= 16;
764 ppMode->baseDcDiff= 256/8;
765 ppMode->flatnessThreshold= 56-16-1;
766 ppMode->maxClippedThreshold= 0.01;
769 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
771 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
775 int q= 1000000; //PP_QUALITY_MAX;
779 char *options[OPTIONS_ARRAY_SIZE];
782 int numOfUnknownOptions=0;
783 int enable=1; //does the user want us to enabled or disabled the filter
785 filterToken= strtok(p, filterDelimiters);
786 if(filterToken == NULL) break;
787 p+= strlen(filterToken) + 1; // p points to next filterToken
788 filterName= strtok(filterToken, optionDelimiters);
789 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
791 if(*filterName == '-'){
796 for(;;){ //for all options
797 option= strtok(NULL, optionDelimiters);
798 if(option == NULL) break;
800 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
801 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
802 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
803 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
804 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
806 options[numOfUnknownOptions] = option;
807 numOfUnknownOptions++;
809 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
811 options[numOfUnknownOptions] = NULL;
813 /* replace stuff from the replace Table */
814 for(i=0; replaceTable[2*i]!=NULL; i++){
815 if(!strcmp(replaceTable[2*i], filterName)){
816 int newlen= strlen(replaceTable[2*i + 1]);
820 if(p==NULL) p= temp, *p=0; //last filter
821 else p--, *p=','; //not last filter
824 spaceLeft= p - temp + plen;
825 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
829 memmove(p + newlen, p, plen+1);
830 memcpy(p, replaceTable[2*i + 1], newlen);
835 for(i=0; filters[i].shortName!=NULL; i++){
836 if( !strcmp(filters[i].longName, filterName)
837 || !strcmp(filters[i].shortName, filterName)){
838 ppMode->lumMode &= ~filters[i].mask;
839 ppMode->chromMode &= ~filters[i].mask;
842 if(!enable) break; // user wants to disable it
844 if(q >= filters[i].minLumQuality && luma)
845 ppMode->lumMode|= filters[i].mask;
846 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847 if(q >= filters[i].minChromQuality)
848 ppMode->chromMode|= filters[i].mask;
850 if(filters[i].mask == LEVEL_FIX){
852 ppMode->minAllowedY= 16;
853 ppMode->maxAllowedY= 234;
854 for(o=0; options[o]!=NULL; o++){
855 if( !strcmp(options[o],"fullyrange")
856 ||!strcmp(options[o],"f")){
857 ppMode->minAllowedY= 0;
858 ppMode->maxAllowedY= 255;
859 numOfUnknownOptions--;
863 else if(filters[i].mask == TEMP_NOISE_FILTER)
868 for(o=0; options[o]!=NULL; o++){
870 ppMode->maxTmpNoise[numOfNoises]=
871 strtol(options[o], &tail, 0);
872 if(tail!=options[o]){
874 numOfUnknownOptions--;
875 if(numOfNoises >= 3) break;
879 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
880 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
883 for(o=0; options[o]!=NULL && o<2; o++){
885 int val= strtol(options[o], &tail, 0);
886 if(tail==options[o]) break;
888 numOfUnknownOptions--;
889 if(o==0) ppMode->baseDcDiff= val;
890 else ppMode->flatnessThreshold= val;
893 else if(filters[i].mask == FORCE_QUANT){
895 ppMode->forcedQuant= 15;
897 for(o=0; options[o]!=NULL && o<1; o++){
899 int val= strtol(options[o], &tail, 0);
900 if(tail==options[o]) break;
902 numOfUnknownOptions--;
903 ppMode->forcedQuant= val;
908 if(!filterNameOk) ppMode->error++;
909 ppMode->error += numOfUnknownOptions;
912 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
914 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
921 void pp_free_mode(pp_mode *mode){
925 static void reallocAlign(void **p, int alignment, int size){
927 *p= av_mallocz(size);
930 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
931 int mbWidth = (width+15)>>4;
932 int mbHeight= (height+15)>>4;
936 c->qpStride= qpStride;
938 reallocAlign((void **)&c->tempDst, 8, stride*24);
939 reallocAlign((void **)&c->tempSrc, 8, stride*24);
940 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
941 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
943 c->yHistogram[i]= width*height/64*15/256;
946 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
947 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
948 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
951 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
952 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
953 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
954 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
957 static const char * context_to_name(void * ptr) {
961 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
963 pp_context *pp_get_context(int width, int height, int cpuCaps){
964 PPContext *c= av_malloc(sizeof(PPContext));
965 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
966 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
968 memset(c, 0, sizeof(PPContext));
969 c->av_class = &av_codec_context_class;
971 if(cpuCaps&PP_FORMAT){
972 c->hChromaSubSample= cpuCaps&0x3;
973 c->vChromaSubSample= (cpuCaps>>4)&0x3;
975 c->hChromaSubSample= 1;
976 c->vChromaSubSample= 1;
979 reallocBuffers(c, width, height, stride, qpStride);
986 void pp_free_context(void *vc){
987 PPContext *c = (PPContext*)vc;
990 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
991 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
993 av_free(c->tempBlocks);
994 av_free(c->yHistogram);
997 av_free(c->deintTemp);
998 av_free(c->stdQPTable);
999 av_free(c->nonBQPTable);
1000 av_free(c->forcedQPTable);
1002 memset(c, 0, sizeof(PPContext));
1007 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1008 uint8_t * dst[3], const int dstStride[3],
1009 int width, int height,
1010 const QP_STORE_T *QP_store, int QPStride,
1011 pp_mode *vm, void *vc, int pict_type)
1013 int mbWidth = (width+15)>>4;
1014 int mbHeight= (height+15)>>4;
1015 PPMode *mode = (PPMode*)vm;
1016 PPContext *c = (PPContext*)vc;
1017 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1018 int absQPStride = FFABS(QPStride);
1020 // c->stride and c->QPStride are always positive
1021 if(c->stride < minStride || c->qpStride < absQPStride)
1022 reallocBuffers(c, width, height,
1023 FFMAX(minStride, c->stride),
1024 FFMAX(c->qpStride, absQPStride));
1026 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1028 QP_store= c->forcedQPTable;
1029 absQPStride = QPStride = 0;
1030 if(mode->lumMode & FORCE_QUANT)
1031 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1033 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1036 if(pict_type & PP_PICT_TYPE_QP2){
1038 const int count= mbHeight * absQPStride;
1039 for(i=0; i<(count>>2); i++){
1040 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1042 for(i<<=2; i<count; i++){
1043 c->stdQPTable[i] = QP_store[i]>>1;
1045 QP_store= c->stdQPTable;
1046 QPStride= absQPStride;
1051 for(y=0; y<mbHeight; y++){
1052 for(x=0; x<mbWidth; x++){
1053 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1055 av_log(c, AV_LOG_INFO, "\n");
1057 av_log(c, AV_LOG_INFO, "\n");
1060 if((pict_type&7)!=3){
1063 const int count= mbHeight * QPStride;
1064 for(i=0; i<(count>>2); i++){
1065 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1067 for(i<<=2; i<count; i++){
1068 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1072 for(i=0; i<mbHeight; i++) {
1073 for(j=0; j<absQPStride; j++) {
1074 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1080 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1081 mode->lumMode, mode->chromMode);
1083 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1084 width, height, QP_store, QPStride, 0, mode, c);
1086 width = (width )>>c->hChromaSubSample;
1087 height = (height)>>c->vChromaSubSample;
1089 if(mode->chromMode){
1090 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1091 width, height, QP_store, QPStride, 1, mode, c);
1092 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1093 width, height, QP_store, QPStride, 2, mode, c);
1095 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1096 linecpy(dst[1], src[1], height, srcStride[1]);
1097 linecpy(dst[2], src[2], height, srcStride[2]);
1100 for(y=0; y<height; y++){
1101 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1102 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);