2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
92 #include "libavutil/ffversion.h"
93 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
95 unsigned postproc_version(void)
97 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
98 return LIBPOSTPROC_VERSION_INT;
101 const char *postproc_configuration(void)
103 return FFMPEG_CONFIGURATION;
106 const char *postproc_license(void)
108 #define LICENSE_PREFIX "libpostproc license: "
109 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
116 #define GET_MODE_BUFFER_SIZE 500
117 #define OPTIONS_ARRAY_SIZE 10
119 #define TEMP_STRIDE 8
120 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
122 #if ARCH_X86 && HAVE_INLINE_ASM
123 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
124 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
125 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
126 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
127 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
128 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
129 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
130 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
133 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
136 static const struct PPFilter filters[]=
138 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
139 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
140 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
141 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
142 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
143 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
144 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
145 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
146 {"dr", "dering", 1, 5, 6, DERING},
147 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
148 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
149 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
150 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
151 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
152 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
153 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
154 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
155 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
156 {"be", "bitexact", 1, 0, 0, BITEXACT},
157 {"vi", "visualize", 1, 0, 0, VISUALIZE},
158 {NULL, NULL,0,0,0,0} //End Marker
161 static const char * const replaceTable[]=
163 "default", "hb:a,vb:a,dr:a",
164 "de", "hb:a,vb:a,dr:a",
165 "fast", "h1:a,v1:a,dr:a",
166 "fa", "h1:a,v1:a,dr:a",
167 "ac", "ha:a:128:7,va:a,dr:a",
171 /* The horizontal functions exist only in C because the MMX
172 * code is faster with vertical filters and transposing. */
175 * Check if the given 8x8 Block is mostly "flat"
177 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
181 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
182 const int dcThreshold= dcOffset*2 + 1;
184 for(y=0; y<BLOCK_SIZE; y++){
185 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
186 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
187 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
188 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
189 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
190 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
191 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
194 return numEq > c->ppMode.flatnessThreshold;
198 * Check if the middle 8x8 Block in the given 8x16 block is flat
200 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
204 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205 const int dcThreshold= dcOffset*2 + 1;
207 src+= stride*4; // src points to begin of the 8x8 Block
208 for(y=0; y<BLOCK_SIZE-1; y++){
209 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
210 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
211 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
212 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
213 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
214 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
215 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
216 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
219 return numEq > c->ppMode.flatnessThreshold;
222 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
226 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
228 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
230 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
232 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
238 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
242 for(x=0; x<BLOCK_SIZE; x+=4){
243 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
244 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
245 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
246 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
251 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
253 if( isHorizDC_C(src, stride, c) ){
254 return isHorizMinMaxOk_C(src, stride, c->QP);
260 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
262 if( isVertDC_C(src, stride, c) ){
263 return isVertMinMaxOk_C(src, stride, c->QP);
269 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
272 for(y=0; y<BLOCK_SIZE; y++){
273 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
275 if(FFABS(middleEnergy) < 8*c->QP){
276 const int q=(dst[3] - dst[4])/2;
277 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
278 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
280 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
284 d*= FFSIGN(-middleEnergy);
305 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
306 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
308 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
311 for(y=0; y<BLOCK_SIZE; y++){
312 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
313 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
316 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
317 sums[1] = sums[0] - first + dst[3];
318 sums[2] = sums[1] - first + dst[4];
319 sums[3] = sums[2] - first + dst[5];
320 sums[4] = sums[3] - first + dst[6];
321 sums[5] = sums[4] - dst[0] + dst[7];
322 sums[6] = sums[5] - dst[1] + last;
323 sums[7] = sums[6] - dst[2] + last;
324 sums[8] = sums[7] - dst[3] + last;
325 sums[9] = sums[8] - dst[4] + last;
327 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
328 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
329 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
330 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
331 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
332 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
333 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
334 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
341 * Experimental Filter 1 (Horizontal)
342 * will not damage linear gradients
343 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
344 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
345 * MMX2 version does correct clipping C version does not
346 * not identical with the vertical one
348 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
351 static uint64_t lut[256];
357 int v= i < 128 ? 2*i : 2*(i-256);
359 //Simulate 112242211 9-Tap filter
360 uint64_t a= (v/16) & 0xFF;
361 uint64_t b= (v/8) & 0xFF;
362 uint64_t c= (v/4) & 0xFF;
363 uint64_t d= (3*v/8) & 0xFF;
365 //Simulate piecewise linear interpolation
366 uint64_t a= (v/16) & 0xFF;
367 uint64_t b= (v*3/16) & 0xFF;
368 uint64_t c= (v*5/16) & 0xFF;
369 uint64_t d= (7*v/16) & 0xFF;
370 uint64_t A= (0x100 - a)&0xFF;
371 uint64_t B= (0x100 - b)&0xFF;
372 uint64_t C= (0x100 - c)&0xFF;
373 uint64_t D= (0x100 - c)&0xFF;
375 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
376 (D<<24) | (C<<16) | (B<<8) | (A);
377 //lut[i] = (v<<32) | (v<<24);
381 for(y=0; y<BLOCK_SIZE; y++){
382 int a= src[1] - src[2];
383 int b= src[3] - src[4];
384 int c= src[5] - src[6];
386 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
389 int v = d * FFSIGN(-b);
403 * accurate deblock filter
405 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
406 int stride, const PPContext *c, int mode)
410 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
411 const int dcThreshold= dcOffset*2 + 1;
413 src+= step*4; // src points to begin of the 8x8 Block
417 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
418 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
419 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
420 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
421 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
422 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
423 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
424 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
425 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
426 if(numEq > c->ppMode.flatnessThreshold){
429 if(src[0] > src[step]){
437 if(src[x*step] > src[(x+1)*step]){
438 if(src[x *step] > max) max= src[ x *step];
439 if(src[(x+1)*step] < min) min= src[(x+1)*step];
441 if(src[(x+1)*step] > max) max= src[(x+1)*step];
442 if(src[ x *step] < min) min= src[ x *step];
446 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
447 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
450 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
451 sums[1] = sums[0] - first + src[3*step];
452 sums[2] = sums[1] - first + src[4*step];
453 sums[3] = sums[2] - first + src[5*step];
454 sums[4] = sums[3] - first + src[6*step];
455 sums[5] = sums[4] - src[0*step] + src[7*step];
456 sums[6] = sums[5] - src[1*step] + last;
457 sums[7] = sums[6] - src[2*step] + last;
458 sums[8] = sums[7] - src[3*step] + last;
459 sums[9] = sums[8] - src[4*step] + last;
461 if (mode & VISUALIZE) {
471 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
472 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
473 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
474 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
475 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
476 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
477 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
478 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
481 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
483 if(FFABS(middleEnergy) < 8*QP){
484 const int q=(src[3*step] - src[4*step])/2;
485 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
486 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
488 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
492 d*= FFSIGN(-middleEnergy);
502 if ((mode & VISUALIZE) && d) {
503 d= (d < 0) ? 32 : -32;
504 src[3*step]= av_clip_uint8(src[3*step] - d);
505 src[4*step]= av_clip_uint8(src[4*step] + d);
523 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
525 //we always compile C for testing which needs bitexactness
526 #define TEMPLATE_PP_C 1
527 #include "postprocess_template.c"
530 # define TEMPLATE_PP_ALTIVEC 1
531 # include "postprocess_altivec_template.c"
532 # include "postprocess_template.c"
535 #if ARCH_X86 && HAVE_INLINE_ASM
536 # if CONFIG_RUNTIME_CPUDETECT
537 # define TEMPLATE_PP_MMX 1
538 # include "postprocess_template.c"
539 # define TEMPLATE_PP_MMXEXT 1
540 # include "postprocess_template.c"
541 # define TEMPLATE_PP_3DNOW 1
542 # include "postprocess_template.c"
543 # define TEMPLATE_PP_SSE2 1
544 # include "postprocess_template.c"
546 # if HAVE_SSE2_INLINE
547 # define TEMPLATE_PP_SSE2 1
548 # include "postprocess_template.c"
549 # elif HAVE_MMXEXT_INLINE
550 # define TEMPLATE_PP_MMXEXT 1
551 # include "postprocess_template.c"
552 # elif HAVE_AMD3DNOW_INLINE
553 # define TEMPLATE_PP_3DNOW 1
554 # include "postprocess_template.c"
555 # elif HAVE_MMX_INLINE
556 # define TEMPLATE_PP_MMX 1
557 # include "postprocess_template.c"
562 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
563 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
565 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
566 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
568 pp_fn pp = postProcess_C;
569 PPContext *c= (PPContext *)vc;
570 PPMode *ppMode= (PPMode *)vm;
571 c->ppMode= *ppMode; //FIXME
573 if (!(ppMode->lumMode & BITEXACT)) {
574 #if CONFIG_RUNTIME_CPUDETECT
575 #if ARCH_X86 && HAVE_INLINE_ASM
576 // ordered per speed fastest first
577 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
578 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
579 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
580 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
582 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
584 #else /* CONFIG_RUNTIME_CPUDETECT */
586 pp = postProcess_SSE2;
587 #elif HAVE_MMXEXT_INLINE
588 pp = postProcess_MMX2;
589 #elif HAVE_AMD3DNOW_INLINE
590 pp = postProcess_3DNow;
591 #elif HAVE_MMX_INLINE
592 pp = postProcess_MMX;
594 pp = postProcess_altivec;
596 #endif /* !CONFIG_RUNTIME_CPUDETECT */
599 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
602 /* -pp Command line Help
604 const char pp_help[] =
605 "Available postprocessing filters:\n"
607 "short long name short long option Description\n"
608 "* * a autoq CPU power dependent enabler\n"
609 " c chrom chrominance filtering enabled\n"
610 " y nochrom chrominance filtering disabled\n"
611 " n noluma luma filtering disabled\n"
612 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
613 " 1. difference factor: default=32, higher -> more deblocking\n"
614 " 2. flatness threshold: default=39, lower -> more deblocking\n"
615 " the h & v deblocking filters share these\n"
616 " so you can't set different thresholds for h / v\n"
617 "vb vdeblock (2 threshold) vertical deblocking filter\n"
618 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
619 "va vadeblock (2 threshold) vertical deblocking filter\n"
620 "h1 x1hdeblock experimental h deblock filter 1\n"
621 "v1 x1vdeblock experimental v deblock filter 1\n"
622 "dr dering deringing filter\n"
623 "al autolevels automatic brightness / contrast\n"
624 " f fullyrange stretch luminance to (0..255)\n"
625 "lb linblenddeint linear blend deinterlacer\n"
626 "li linipoldeint linear interpolating deinterlace\n"
627 "ci cubicipoldeint cubic interpolating deinterlacer\n"
628 "md mediandeint median deinterlacer\n"
629 "fd ffmpegdeint ffmpeg deinterlacer\n"
630 "l5 lowpass5 FIR lowpass deinterlacer\n"
631 "de default hb:a,vb:a,dr:a\n"
632 "fa fast h1:a,v1:a,dr:a\n"
633 "ac ha:a:128:7,va:a,dr:a\n"
634 "tn tmpnoise (3 threshold) temporal noise reducer\n"
635 " 1. <= 2. <= 3. larger -> stronger filtering\n"
636 "fq forceQuant <quantizer> force quantizer\n"
638 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
639 "long form example:\n"
640 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
641 "short form example:\n"
642 "vb:a/hb:a/lb de,-vb\n"
648 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
650 char temp[GET_MODE_BUFFER_SIZE];
652 static const char filterDelimiters[] = ",/";
653 static const char optionDelimiters[] = ":|";
654 struct PPMode *ppMode;
658 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
662 if (!strcmp(name, "help")) {
664 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
665 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
666 av_log(NULL, AV_LOG_INFO, "%s", temp);
671 ppMode= av_malloc(sizeof(PPMode));
676 ppMode->chromMode= 0;
677 ppMode->maxTmpNoise[0]= 700;
678 ppMode->maxTmpNoise[1]= 1500;
679 ppMode->maxTmpNoise[2]= 3000;
680 ppMode->maxAllowedY= 234;
681 ppMode->minAllowedY= 16;
682 ppMode->baseDcDiff= 256/8;
683 ppMode->flatnessThreshold= 56-16-1;
684 ppMode->maxClippedThreshold= (AVRational){1,100};
687 memset(temp, 0, GET_MODE_BUFFER_SIZE);
688 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
690 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
693 const char *filterName;
694 int q= 1000000; //PP_QUALITY_MAX;
698 const char *options[OPTIONS_ARRAY_SIZE];
701 int numOfUnknownOptions=0;
702 int enable=1; //does the user want us to enabled or disabled the filter
705 filterToken= av_strtok(p, filterDelimiters, &tokstate);
706 if(!filterToken) break;
707 p+= strlen(filterToken) + 1; // p points to next filterToken
708 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
713 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
715 if(*filterName == '-'){
720 for(;;){ //for all options
721 option= av_strtok(NULL, optionDelimiters, &tokstate);
724 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
725 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
726 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
727 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
728 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
730 options[numOfUnknownOptions] = option;
731 numOfUnknownOptions++;
733 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
735 options[numOfUnknownOptions] = NULL;
737 /* replace stuff from the replace Table */
738 for(i=0; replaceTable[2*i]; i++){
739 if(!strcmp(replaceTable[2*i], filterName)){
740 size_t newlen = strlen(replaceTable[2*i + 1]);
747 spaceLeft= p - temp + plen;
748 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
752 memmove(p + newlen, p, plen+1);
753 memcpy(p, replaceTable[2*i + 1], newlen);
758 for(i=0; filters[i].shortName; i++){
759 if( !strcmp(filters[i].longName, filterName)
760 || !strcmp(filters[i].shortName, filterName)){
761 ppMode->lumMode &= ~filters[i].mask;
762 ppMode->chromMode &= ~filters[i].mask;
765 if(!enable) break; // user wants to disable it
767 if(q >= filters[i].minLumQuality && luma)
768 ppMode->lumMode|= filters[i].mask;
769 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
770 if(q >= filters[i].minChromQuality)
771 ppMode->chromMode|= filters[i].mask;
773 if(filters[i].mask == LEVEL_FIX){
775 ppMode->minAllowedY= 16;
776 ppMode->maxAllowedY= 234;
777 for(o=0; options[o]; o++){
778 if( !strcmp(options[o],"fullyrange")
779 ||!strcmp(options[o],"f")){
780 ppMode->minAllowedY= 0;
781 ppMode->maxAllowedY= 255;
782 numOfUnknownOptions--;
786 else if(filters[i].mask == TEMP_NOISE_FILTER)
791 for(o=0; options[o]; o++){
793 ppMode->maxTmpNoise[numOfNoises]=
794 strtol(options[o], &tail, 0);
795 if(tail!=options[o]){
797 numOfUnknownOptions--;
798 if(numOfNoises >= 3) break;
802 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
803 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
806 for(o=0; options[o] && o<2; o++){
808 int val= strtol(options[o], &tail, 0);
809 if(tail==options[o]) break;
811 numOfUnknownOptions--;
812 if(o==0) ppMode->baseDcDiff= val;
813 else ppMode->flatnessThreshold= val;
816 else if(filters[i].mask == FORCE_QUANT){
818 ppMode->forcedQuant= 15;
820 for(o=0; options[o] && o<1; o++){
822 int val= strtol(options[o], &tail, 0);
823 if(tail==options[o]) break;
825 numOfUnknownOptions--;
826 ppMode->forcedQuant= val;
831 if(!filterNameOk) ppMode->error++;
832 ppMode->error += numOfUnknownOptions;
835 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
837 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
844 void pp_free_mode(pp_mode *mode){
848 static void reallocAlign(void **p, int size){
850 *p= av_mallocz(size);
853 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
854 int mbWidth = (width+15)>>4;
855 int mbHeight= (height+15)>>4;
859 c->qpStride= qpStride;
861 reallocAlign((void **)&c->tempDst, stride*24+32);
862 reallocAlign((void **)&c->tempSrc, stride*24);
863 reallocAlign((void **)&c->tempBlocks, 2*16*8);
864 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
866 c->yHistogram[i]= width*height/64*15/256;
869 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
870 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
871 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
874 reallocAlign((void **)&c->deintTemp, 2*width+32);
875 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
876 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
877 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
880 static const char * context_to_name(void * ptr) {
884 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
886 av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
887 PPContext *c= av_mallocz(sizeof(PPContext));
888 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
889 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
894 c->av_class = &av_codec_context_class;
895 if(cpuCaps&PP_FORMAT){
896 c->hChromaSubSample= cpuCaps&0x3;
897 c->vChromaSubSample= (cpuCaps>>4)&0x3;
899 c->hChromaSubSample= 1;
900 c->vChromaSubSample= 1;
902 if (cpuCaps & PP_CPU_CAPS_AUTO) {
903 c->cpuCaps = av_get_cpu_flags();
906 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
907 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
908 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
909 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
912 reallocBuffers(c, width, height, stride, qpStride);
919 av_cold void pp_free_context(void *vc){
920 PPContext *c = (PPContext*)vc;
923 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
924 av_free(c->tempBlurred[i]);
925 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
926 av_free(c->tempBlurredPast[i]);
928 av_free(c->tempBlocks);
929 av_free(c->yHistogram);
932 av_free(c->deintTemp);
933 av_free(c->stdQPTable);
934 av_free(c->nonBQPTable);
935 av_free(c->forcedQPTable);
937 memset(c, 0, sizeof(PPContext));
942 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
943 uint8_t * dst[3], const int dstStride[3],
944 int width, int height,
945 const QP_STORE_T *QP_store, int QPStride,
946 pp_mode *vm, void *vc, int pict_type)
948 int mbWidth = (width+15)>>4;
949 int mbHeight= (height+15)>>4;
952 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
953 int absQPStride = FFABS(QPStride);
955 // c->stride and c->QPStride are always positive
956 if(c->stride < minStride || c->qpStride < absQPStride)
957 reallocBuffers(c, width, height,
958 FFMAX(minStride, c->stride),
959 FFMAX(c->qpStride, absQPStride));
961 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
963 QP_store= c->forcedQPTable;
964 absQPStride = QPStride = 0;
965 if(mode->lumMode & FORCE_QUANT)
966 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
968 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
971 if(pict_type & PP_PICT_TYPE_QP2){
973 const int count= FFMAX(mbHeight * absQPStride, mbWidth);
974 for(i=0; i<(count>>2); i++){
975 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
977 for(i<<=2; i<count; i++){
978 c->stdQPTable[i] = QP_store[i]>>1;
980 QP_store= c->stdQPTable;
981 QPStride= absQPStride;
986 for(y=0; y<mbHeight; y++){
987 for(x=0; x<mbWidth; x++){
988 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
990 av_log(c, AV_LOG_INFO, "\n");
992 av_log(c, AV_LOG_INFO, "\n");
995 if((pict_type&7)!=3){
998 const int count= FFMAX(mbHeight * QPStride, mbWidth);
999 for(i=0; i<(count>>2); i++){
1000 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1002 for(i<<=2; i<count; i++){
1003 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1007 for(i=0; i<mbHeight; i++) {
1008 for(j=0; j<absQPStride; j++) {
1009 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1015 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1016 mode->lumMode, mode->chromMode);
1018 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1019 width, height, QP_store, QPStride, 0, mode, c);
1021 if (!(src[1] && src[2] && dst[1] && dst[2]))
1024 width = (width )>>c->hChromaSubSample;
1025 height = (height)>>c->vChromaSubSample;
1027 if(mode->chromMode){
1028 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1029 width, height, QP_store, QPStride, 1, mode, c);
1030 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1031 width, height, QP_store, QPStride, 2, mode, c);
1033 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1034 linecpy(dst[1], src[1], height, srcStride[1]);
1035 linecpy(dst[2], src[2], height, srcStride[2]);
1038 for(y=0; y<height; y++){
1039 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1040 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);