2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
83 //#define HAVE_AMD3DNOW
86 //#define DEBUG_BRIGHTNESS
87 #include "postprocess.h"
88 #include "postprocess_internal.h"
89 #include "libavutil/avstring.h"
91 unsigned postproc_version(void)
93 return LIBPOSTPROC_VERSION_INT;
96 const char *postproc_configuration(void)
98 return FFMPEG_CONFIGURATION;
101 const char *postproc_license(void)
103 #define LICENSE_PREFIX "libpostproc license: "
104 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111 #define GET_MODE_BUFFER_SIZE 500
112 #define OPTIONS_ARRAY_SIZE 10
114 #define TEMP_STRIDE 8
115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131 static struct PPFilter filters[]=
133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
135 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
136 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
141 {"dr", "dering", 1, 5, 6, DERING},
142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
151 {NULL, NULL,0,0,0,0} //End Marker
154 static const char *replaceTable[]=
156 "default", "hb:a,vb:a,dr:a",
157 "de", "hb:a,vb:a,dr:a",
158 "fast", "h1:a,v1:a,dr:a",
159 "fa", "h1:a,v1:a,dr:a",
160 "ac", "ha:a:128:7,va:a,dr:a",
166 static inline void prefetchnta(void *p)
168 __asm__ volatile( "prefetchnta (%0)\n\t"
173 static inline void prefetcht0(void *p)
175 __asm__ volatile( "prefetcht0 (%0)\n\t"
180 static inline void prefetcht1(void *p)
182 __asm__ volatile( "prefetcht1 (%0)\n\t"
187 static inline void prefetcht2(void *p)
189 __asm__ volatile( "prefetcht2 (%0)\n\t"
195 /* The horizontal functions exist only in C because the MMX
196 * code is faster with vertical filters and transposing. */
199 * Check if the given 8x8 Block is mostly "flat"
201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206 const int dcThreshold= dcOffset*2 + 1;
208 for(y=0; y<BLOCK_SIZE; y++){
209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
218 return numEq > c->ppMode.flatnessThreshold;
222 * Check if the middle 8x8 Block in the given 8x16 block is flat
224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
228 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229 const int dcThreshold= dcOffset*2 + 1;
231 src+= stride*4; // src points to begin of the 8x8 Block
232 for(y=0; y<BLOCK_SIZE-1; y++){
233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
243 return numEq > c->ppMode.flatnessThreshold;
246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
262 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
266 for(x=0; x<BLOCK_SIZE; x+=4){
267 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
268 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
277 if( isHorizDC_C(src, stride, c) ){
278 if( isHorizMinMaxOk_C(src, stride, c->QP) )
287 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
289 if( isVertDC_C(src, stride, c) ){
290 if( isVertMinMaxOk_C(src, stride, c->QP) )
299 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
302 for(y=0; y<BLOCK_SIZE; y++){
303 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
305 if(FFABS(middleEnergy) < 8*c->QP){
306 const int q=(dst[3] - dst[4])/2;
307 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
308 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
310 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
314 d*= FFSIGN(-middleEnergy);
335 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
336 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
338 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
341 for(y=0; y<BLOCK_SIZE; y++){
342 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
343 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
346 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
347 sums[1] = sums[0] - first + dst[3];
348 sums[2] = sums[1] - first + dst[4];
349 sums[3] = sums[2] - first + dst[5];
350 sums[4] = sums[3] - first + dst[6];
351 sums[5] = sums[4] - dst[0] + dst[7];
352 sums[6] = sums[5] - dst[1] + last;
353 sums[7] = sums[6] - dst[2] + last;
354 sums[8] = sums[7] - dst[3] + last;
355 sums[9] = sums[8] - dst[4] + last;
357 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
358 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
359 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
360 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
361 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
362 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
363 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
364 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
371 * Experimental Filter 1 (Horizontal)
372 * will not damage linear gradients
373 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
374 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
375 * MMX2 version does correct clipping C version does not
376 * not identical with the vertical one
378 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
381 static uint64_t *lut= NULL;
385 lut = av_malloc(256*8);
388 int v= i < 128 ? 2*i : 2*(i-256);
390 //Simulate 112242211 9-Tap filter
391 uint64_t a= (v/16) & 0xFF;
392 uint64_t b= (v/8) & 0xFF;
393 uint64_t c= (v/4) & 0xFF;
394 uint64_t d= (3*v/8) & 0xFF;
396 //Simulate piecewise linear interpolation
397 uint64_t a= (v/16) & 0xFF;
398 uint64_t b= (v*3/16) & 0xFF;
399 uint64_t c= (v*5/16) & 0xFF;
400 uint64_t d= (7*v/16) & 0xFF;
401 uint64_t A= (0x100 - a)&0xFF;
402 uint64_t B= (0x100 - b)&0xFF;
403 uint64_t C= (0x100 - c)&0xFF;
404 uint64_t D= (0x100 - c)&0xFF;
406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407 (D<<24) | (C<<16) | (B<<8) | (A);
408 //lut[i] = (v<<32) | (v<<24);
412 for(y=0; y<BLOCK_SIZE; y++){
413 int a= src[1] - src[2];
414 int b= src[3] - src[4];
415 int c= src[5] - src[6];
417 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
420 int v = d * FFSIGN(-b);
434 * accurate deblock filter
436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
439 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
440 const int dcThreshold= dcOffset*2 + 1;
442 src+= step*4; // src points to begin of the 8x8 Block
446 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
447 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
448 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
449 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
450 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
451 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
452 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
453 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
454 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
455 if(numEq > c->ppMode.flatnessThreshold){
458 if(src[0] > src[step]){
466 if(src[x*step] > src[(x+1)*step]){
467 if(src[x *step] > max) max= src[ x *step];
468 if(src[(x+1)*step] < min) min= src[(x+1)*step];
470 if(src[(x+1)*step] > max) max= src[(x+1)*step];
471 if(src[ x *step] < min) min= src[ x *step];
475 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
476 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
479 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
480 sums[1] = sums[0] - first + src[3*step];
481 sums[2] = sums[1] - first + src[4*step];
482 sums[3] = sums[2] - first + src[5*step];
483 sums[4] = sums[3] - first + src[6*step];
484 sums[5] = sums[4] - src[0*step] + src[7*step];
485 sums[6] = sums[5] - src[1*step] + last;
486 sums[7] = sums[6] - src[2*step] + last;
487 sums[8] = sums[7] - src[3*step] + last;
488 sums[9] = sums[8] - src[4*step] + last;
490 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
491 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
492 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
493 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
494 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
495 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
496 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
497 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
500 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
502 if(FFABS(middleEnergy) < 8*QP){
503 const int q=(src[3*step] - src[4*step])/2;
504 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
505 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
507 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
511 d*= FFSIGN(-middleEnergy);
535 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
537 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
542 #define COMPILE_ALTIVEC
543 #endif //HAVE_ALTIVEC
547 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
551 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
555 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
556 #define COMPILE_3DNOW
558 #endif /* ARCH_X86 */
565 #define HAVE_AMD3DNOW 0
567 #define HAVE_ALTIVEC 0
570 #define RENAME(a) a ## _C
571 #include "postprocess_template.c"
574 #ifdef COMPILE_ALTIVEC
577 #define HAVE_ALTIVEC 1
578 #define RENAME(a) a ## _altivec
579 #include "postprocess_altivec_template.c"
580 #include "postprocess_template.c"
588 #define RENAME(a) a ## _MMX
589 #include "postprocess_template.c"
599 #define RENAME(a) a ## _MMX2
600 #include "postprocess_template.c"
611 #define HAVE_AMD3DNOW 1
612 #define RENAME(a) a ## _3DNow
613 #include "postprocess_template.c"
616 // minor note: the HAVE_xyz is messed up after that line so do not use it.
618 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
619 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
621 PPContext *c= (PPContext *)vc;
622 PPMode *ppMode= (PPMode *)vm;
623 c->ppMode= *ppMode; //FIXME
625 // Using ifs here as they are faster than function pointers although the
626 // difference would not be measurable here but it is much better because
627 // someone might exchange the CPU whithout restarting MPlayer ;)
628 #if CONFIG_RUNTIME_CPUDETECT
630 // ordered per speed fastest first
631 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
632 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
633 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
634 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
635 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
636 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
638 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
641 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
642 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
645 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
647 #else //CONFIG_RUNTIME_CPUDETECT
649 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 #endif //!CONFIG_RUNTIME_CPUDETECT
662 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
663 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
665 /* -pp Command line Help
667 #if LIBPOSTPROC_VERSION_INT < (52<<16)
668 const char *const pp_help=
670 const char pp_help[] =
672 "Available postprocessing filters:\n"
674 "short long name short long option Description\n"
675 "* * a autoq CPU power dependent enabler\n"
676 " c chrom chrominance filtering enabled\n"
677 " y nochrom chrominance filtering disabled\n"
678 " n noluma luma filtering disabled\n"
679 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
680 " 1. difference factor: default=32, higher -> more deblocking\n"
681 " 2. flatness threshold: default=39, lower -> more deblocking\n"
682 " the h & v deblocking filters share these\n"
683 " so you can't set different thresholds for h / v\n"
684 "vb vdeblock (2 threshold) vertical deblocking filter\n"
685 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
686 "va vadeblock (2 threshold) vertical deblocking filter\n"
687 "h1 x1hdeblock experimental h deblock filter 1\n"
688 "v1 x1vdeblock experimental v deblock filter 1\n"
689 "dr dering deringing filter\n"
690 "al autolevels automatic brightness / contrast\n"
691 " f fullyrange stretch luminance to (0..255)\n"
692 "lb linblenddeint linear blend deinterlacer\n"
693 "li linipoldeint linear interpolating deinterlace\n"
694 "ci cubicipoldeint cubic interpolating deinterlacer\n"
695 "md mediandeint median deinterlacer\n"
696 "fd ffmpegdeint ffmpeg deinterlacer\n"
697 "l5 lowpass5 FIR lowpass deinterlacer\n"
698 "de default hb:a,vb:a,dr:a\n"
699 "fa fast h1:a,v1:a,dr:a\n"
700 "ac ha:a:128:7,va:a,dr:a\n"
701 "tn tmpnoise (3 threshold) temporal noise reducer\n"
702 " 1. <= 2. <= 3. larger -> stronger filtering\n"
703 "fq forceQuant <quantizer> force quantizer\n"
705 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
706 "long form example:\n"
707 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
708 "short form example:\n"
709 "vb:a/hb:a/lb de,-vb\n"
715 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
717 char temp[GET_MODE_BUFFER_SIZE];
719 static const char filterDelimiters[] = ",/";
720 static const char optionDelimiters[] = ":";
721 struct PPMode *ppMode;
724 ppMode= av_malloc(sizeof(PPMode));
727 ppMode->chromMode= 0;
728 ppMode->maxTmpNoise[0]= 700;
729 ppMode->maxTmpNoise[1]= 1500;
730 ppMode->maxTmpNoise[2]= 3000;
731 ppMode->maxAllowedY= 234;
732 ppMode->minAllowedY= 16;
733 ppMode->baseDcDiff= 256/8;
734 ppMode->flatnessThreshold= 56-16-1;
735 ppMode->maxClippedThreshold= 0.01;
738 memset(temp, 0, GET_MODE_BUFFER_SIZE);
739 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
741 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
745 int q= 1000000; //PP_QUALITY_MAX;
749 char *options[OPTIONS_ARRAY_SIZE];
752 int numOfUnknownOptions=0;
753 int enable=1; //does the user want us to enabled or disabled the filter
755 filterToken= strtok(p, filterDelimiters);
756 if(filterToken == NULL) break;
757 p+= strlen(filterToken) + 1; // p points to next filterToken
758 filterName= strtok(filterToken, optionDelimiters);
759 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
761 if(*filterName == '-'){
766 for(;;){ //for all options
767 option= strtok(NULL, optionDelimiters);
768 if(option == NULL) break;
770 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
771 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
772 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
773 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
774 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
776 options[numOfUnknownOptions] = option;
777 numOfUnknownOptions++;
779 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
781 options[numOfUnknownOptions] = NULL;
783 /* replace stuff from the replace Table */
784 for(i=0; replaceTable[2*i]!=NULL; i++){
785 if(!strcmp(replaceTable[2*i], filterName)){
786 int newlen= strlen(replaceTable[2*i + 1]);
793 spaceLeft= p - temp + plen;
794 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
798 memmove(p + newlen, p, plen+1);
799 memcpy(p, replaceTable[2*i + 1], newlen);
804 for(i=0; filters[i].shortName!=NULL; i++){
805 if( !strcmp(filters[i].longName, filterName)
806 || !strcmp(filters[i].shortName, filterName)){
807 ppMode->lumMode &= ~filters[i].mask;
808 ppMode->chromMode &= ~filters[i].mask;
811 if(!enable) break; // user wants to disable it
813 if(q >= filters[i].minLumQuality && luma)
814 ppMode->lumMode|= filters[i].mask;
815 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
816 if(q >= filters[i].minChromQuality)
817 ppMode->chromMode|= filters[i].mask;
819 if(filters[i].mask == LEVEL_FIX){
821 ppMode->minAllowedY= 16;
822 ppMode->maxAllowedY= 234;
823 for(o=0; options[o]!=NULL; o++){
824 if( !strcmp(options[o],"fullyrange")
825 ||!strcmp(options[o],"f")){
826 ppMode->minAllowedY= 0;
827 ppMode->maxAllowedY= 255;
828 numOfUnknownOptions--;
832 else if(filters[i].mask == TEMP_NOISE_FILTER)
837 for(o=0; options[o]!=NULL; o++){
839 ppMode->maxTmpNoise[numOfNoises]=
840 strtol(options[o], &tail, 0);
841 if(tail!=options[o]){
843 numOfUnknownOptions--;
844 if(numOfNoises >= 3) break;
848 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
849 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
852 for(o=0; options[o]!=NULL && o<2; o++){
854 int val= strtol(options[o], &tail, 0);
855 if(tail==options[o]) break;
857 numOfUnknownOptions--;
858 if(o==0) ppMode->baseDcDiff= val;
859 else ppMode->flatnessThreshold= val;
862 else if(filters[i].mask == FORCE_QUANT){
864 ppMode->forcedQuant= 15;
866 for(o=0; options[o]!=NULL && o<1; o++){
868 int val= strtol(options[o], &tail, 0);
869 if(tail==options[o]) break;
871 numOfUnknownOptions--;
872 ppMode->forcedQuant= val;
877 if(!filterNameOk) ppMode->error++;
878 ppMode->error += numOfUnknownOptions;
881 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
883 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
890 void pp_free_mode(pp_mode *mode){
894 static void reallocAlign(void **p, int alignment, int size){
896 *p= av_mallocz(size);
899 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
900 int mbWidth = (width+15)>>4;
901 int mbHeight= (height+15)>>4;
905 c->qpStride= qpStride;
907 reallocAlign((void **)&c->tempDst, 8, stride*24);
908 reallocAlign((void **)&c->tempSrc, 8, stride*24);
909 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
910 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
912 c->yHistogram[i]= width*height/64*15/256;
915 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
916 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
917 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
920 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
921 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
922 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
923 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
926 static const char * context_to_name(void * ptr) {
930 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
932 pp_context *pp_get_context(int width, int height, int cpuCaps){
933 PPContext *c= av_malloc(sizeof(PPContext));
934 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
935 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
937 memset(c, 0, sizeof(PPContext));
938 c->av_class = &av_codec_context_class;
940 if(cpuCaps&PP_FORMAT){
941 c->hChromaSubSample= cpuCaps&0x3;
942 c->vChromaSubSample= (cpuCaps>>4)&0x3;
944 c->hChromaSubSample= 1;
945 c->vChromaSubSample= 1;
948 reallocBuffers(c, width, height, stride, qpStride);
955 void pp_free_context(void *vc){
956 PPContext *c = (PPContext*)vc;
959 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
960 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
962 av_free(c->tempBlocks);
963 av_free(c->yHistogram);
966 av_free(c->deintTemp);
967 av_free(c->stdQPTable);
968 av_free(c->nonBQPTable);
969 av_free(c->forcedQPTable);
971 memset(c, 0, sizeof(PPContext));
976 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
977 uint8_t * dst[3], const int dstStride[3],
978 int width, int height,
979 const QP_STORE_T *QP_store, int QPStride,
980 pp_mode *vm, void *vc, int pict_type)
982 int mbWidth = (width+15)>>4;
983 int mbHeight= (height+15)>>4;
984 PPMode *mode = (PPMode*)vm;
985 PPContext *c = (PPContext*)vc;
986 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
987 int absQPStride = FFABS(QPStride);
989 // c->stride and c->QPStride are always positive
990 if(c->stride < minStride || c->qpStride < absQPStride)
991 reallocBuffers(c, width, height,
992 FFMAX(minStride, c->stride),
993 FFMAX(c->qpStride, absQPStride));
995 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
997 QP_store= c->forcedQPTable;
998 absQPStride = QPStride = 0;
999 if(mode->lumMode & FORCE_QUANT)
1000 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1002 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1005 if(pict_type & PP_PICT_TYPE_QP2){
1007 const int count= mbHeight * absQPStride;
1008 for(i=0; i<(count>>2); i++){
1009 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1011 for(i<<=2; i<count; i++){
1012 c->stdQPTable[i] = QP_store[i]>>1;
1014 QP_store= c->stdQPTable;
1015 QPStride= absQPStride;
1020 for(y=0; y<mbHeight; y++){
1021 for(x=0; x<mbWidth; x++){
1022 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1024 av_log(c, AV_LOG_INFO, "\n");
1026 av_log(c, AV_LOG_INFO, "\n");
1029 if((pict_type&7)!=3){
1032 const int count= mbHeight * QPStride;
1033 for(i=0; i<(count>>2); i++){
1034 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1036 for(i<<=2; i<count; i++){
1037 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1041 for(i=0; i<mbHeight; i++) {
1042 for(j=0; j<absQPStride; j++) {
1043 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1049 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1050 mode->lumMode, mode->chromMode);
1052 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1053 width, height, QP_store, QPStride, 0, mode, c);
1055 width = (width )>>c->hChromaSubSample;
1056 height = (height)>>c->vChromaSubSample;
1058 if(mode->chromMode){
1059 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1060 width, height, QP_store, QPStride, 1, mode, c);
1061 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1062 width, height, QP_store, QPStride, 2, mode, c);
1064 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1065 linecpy(dst[1], src[1], height, srcStride[1]);
1066 linecpy(dst[2], src[2], height, srcStride[2]);
1069 for(y=0; y<height; y++){
1070 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1071 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);