2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include "libavutil/intreadwrite.h"
84 //#undef HAVE_MMXEXT_INLINE
85 //#define HAVE_AMD3DNOW_INLINE
86 //#undef HAVE_MMX_INLINE
88 //#define DEBUG_BRIGHTNESS
89 #include "postprocess.h"
90 #include "postprocess_internal.h"
91 #include "libavutil/avstring.h"
93 #include "libavutil/ffversion.h"
94 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
96 unsigned postproc_version(void)
98 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
99 return LIBPOSTPROC_VERSION_INT;
102 const char *postproc_configuration(void)
104 return FFMPEG_CONFIGURATION;
107 const char *postproc_license(void)
109 #define LICENSE_PREFIX "libpostproc license: "
110 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
117 #define GET_MODE_BUFFER_SIZE 500
118 #define OPTIONS_ARRAY_SIZE 10
120 #define TEMP_STRIDE 8
121 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
123 #if ARCH_X86 && HAVE_INLINE_ASM
124 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
125 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
126 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
127 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
128 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
129 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
130 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
131 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
134 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
137 static const struct PPFilter filters[]=
139 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
140 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
141 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
142 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
143 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
144 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
145 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
146 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
147 {"dr", "dering", 1, 5, 6, DERING},
148 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
149 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
150 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
151 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
152 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
153 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
154 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
155 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
156 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
157 {"be", "bitexact", 1, 0, 0, BITEXACT},
158 {"vi", "visualize", 1, 0, 0, VISUALIZE},
159 {NULL, NULL,0,0,0,0} //End Marker
162 static const char * const replaceTable[]=
164 "default", "hb:a,vb:a,dr:a",
165 "de", "hb:a,vb:a,dr:a",
166 "fast", "h1:a,v1:a,dr:a",
167 "fa", "h1:a,v1:a,dr:a",
168 "ac", "ha:a:128:7,va:a,dr:a",
172 /* The horizontal functions exist only in C because the MMX
173 * code is faster with vertical filters and transposing. */
176 * Check if the given 8x8 Block is mostly "flat"
178 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
182 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
183 const int dcThreshold= dcOffset*2 + 1;
185 for(y=0; y<BLOCK_SIZE; y++){
186 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
187 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
188 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
189 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
190 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
191 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
192 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
195 return numEq > c->ppMode.flatnessThreshold;
199 * Check if the middle 8x8 Block in the given 8x16 block is flat
201 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206 const int dcThreshold= dcOffset*2 + 1;
208 src+= stride*4; // src points to begin of the 8x8 Block
209 for(y=0; y<BLOCK_SIZE-1; y++){
210 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
211 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
212 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
213 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
214 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
215 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
216 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
217 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
220 return numEq > c->ppMode.flatnessThreshold;
223 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
227 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
229 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
231 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
233 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
239 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
243 for(x=0; x<BLOCK_SIZE; x+=4){
244 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
245 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
246 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
247 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
252 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
254 if( isHorizDC_C(src, stride, c) ){
255 return isHorizMinMaxOk_C(src, stride, c->QP);
261 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
263 if( isVertDC_C(src, stride, c) ){
264 return isVertMinMaxOk_C(src, stride, c->QP);
270 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
273 for(y=0; y<BLOCK_SIZE; y++){
274 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
276 if(FFABS(middleEnergy) < 8*c->QP){
277 const int q=(dst[3] - dst[4])/2;
278 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
279 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
281 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
285 d*= FFSIGN(-middleEnergy);
306 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
307 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
309 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
312 for(y=0; y<BLOCK_SIZE; y++){
313 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
314 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
317 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
318 sums[1] = sums[0] - first + dst[3];
319 sums[2] = sums[1] - first + dst[4];
320 sums[3] = sums[2] - first + dst[5];
321 sums[4] = sums[3] - first + dst[6];
322 sums[5] = sums[4] - dst[0] + dst[7];
323 sums[6] = sums[5] - dst[1] + last;
324 sums[7] = sums[6] - dst[2] + last;
325 sums[8] = sums[7] - dst[3] + last;
326 sums[9] = sums[8] - dst[4] + last;
328 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
329 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
330 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
331 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
332 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
333 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
334 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
335 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
342 * Experimental Filter 1 (Horizontal)
343 * will not damage linear gradients
344 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
345 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
346 * MMX2 version does correct clipping C version does not
347 * not identical with the vertical one
349 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
352 static uint64_t lut[256];
358 int v= i < 128 ? 2*i : 2*(i-256);
360 //Simulate 112242211 9-Tap filter
361 uint64_t a= (v/16) & 0xFF;
362 uint64_t b= (v/8) & 0xFF;
363 uint64_t c= (v/4) & 0xFF;
364 uint64_t d= (3*v/8) & 0xFF;
366 //Simulate piecewise linear interpolation
367 uint64_t a= (v/16) & 0xFF;
368 uint64_t b= (v*3/16) & 0xFF;
369 uint64_t c= (v*5/16) & 0xFF;
370 uint64_t d= (7*v/16) & 0xFF;
371 uint64_t A= (0x100 - a)&0xFF;
372 uint64_t B= (0x100 - b)&0xFF;
373 uint64_t C= (0x100 - c)&0xFF;
374 uint64_t D= (0x100 - c)&0xFF;
376 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
377 (D<<24) | (C<<16) | (B<<8) | (A);
378 //lut[i] = (v<<32) | (v<<24);
382 for(y=0; y<BLOCK_SIZE; y++){
383 int a= src[1] - src[2];
384 int b= src[3] - src[4];
385 int c= src[5] - src[6];
387 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
390 int v = d * FFSIGN(-b);
404 * accurate deblock filter
406 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
407 int stride, const PPContext *c, int mode)
411 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
412 const int dcThreshold= dcOffset*2 + 1;
414 src+= step*4; // src points to begin of the 8x8 Block
418 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
419 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
420 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
421 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
422 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
423 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
424 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
425 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
426 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
427 if(numEq > c->ppMode.flatnessThreshold){
430 if(src[0] > src[step]){
438 if(src[x*step] > src[(x+1)*step]){
439 if(src[x *step] > max) max= src[ x *step];
440 if(src[(x+1)*step] < min) min= src[(x+1)*step];
442 if(src[(x+1)*step] > max) max= src[(x+1)*step];
443 if(src[ x *step] < min) min= src[ x *step];
447 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
448 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
451 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
452 sums[1] = sums[0] - first + src[3*step];
453 sums[2] = sums[1] - first + src[4*step];
454 sums[3] = sums[2] - first + src[5*step];
455 sums[4] = sums[3] - first + src[6*step];
456 sums[5] = sums[4] - src[0*step] + src[7*step];
457 sums[6] = sums[5] - src[1*step] + last;
458 sums[7] = sums[6] - src[2*step] + last;
459 sums[8] = sums[7] - src[3*step] + last;
460 sums[9] = sums[8] - src[4*step] + last;
462 if (mode & VISUALIZE) {
472 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
473 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
474 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
475 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
476 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
477 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
478 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
479 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
482 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
484 if(FFABS(middleEnergy) < 8*QP){
485 const int q=(src[3*step] - src[4*step])/2;
486 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
487 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
489 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
493 d*= FFSIGN(-middleEnergy);
503 if ((mode & VISUALIZE) && d) {
504 d= (d < 0) ? 32 : -32;
505 src[3*step]= av_clip_uint8(src[3*step] - d);
506 src[4*step]= av_clip_uint8(src[4*step] + d);
524 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
526 //we always compile C for testing which needs bitexactness
527 #define TEMPLATE_PP_C 1
528 #include "postprocess_template.c"
531 # define TEMPLATE_PP_ALTIVEC 1
532 # include "postprocess_altivec_template.c"
533 # include "postprocess_template.c"
536 #if ARCH_X86 && HAVE_INLINE_ASM
537 # if CONFIG_RUNTIME_CPUDETECT
538 # define TEMPLATE_PP_MMX 1
539 # include "postprocess_template.c"
540 # define TEMPLATE_PP_MMXEXT 1
541 # include "postprocess_template.c"
542 # define TEMPLATE_PP_3DNOW 1
543 # include "postprocess_template.c"
544 # define TEMPLATE_PP_SSE2 1
545 # include "postprocess_template.c"
547 # if HAVE_SSE2_INLINE
548 # define TEMPLATE_PP_SSE2 1
549 # include "postprocess_template.c"
550 # elif HAVE_MMXEXT_INLINE
551 # define TEMPLATE_PP_MMXEXT 1
552 # include "postprocess_template.c"
553 # elif HAVE_AMD3DNOW_INLINE
554 # define TEMPLATE_PP_3DNOW 1
555 # include "postprocess_template.c"
556 # elif HAVE_MMX_INLINE
557 # define TEMPLATE_PP_MMX 1
558 # include "postprocess_template.c"
563 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
564 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
566 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
567 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
569 pp_fn pp = postProcess_C;
570 PPContext *c= (PPContext *)vc;
571 PPMode *ppMode= (PPMode *)vm;
572 c->ppMode= *ppMode; //FIXME
574 if (!(ppMode->lumMode & BITEXACT)) {
575 #if CONFIG_RUNTIME_CPUDETECT
576 #if ARCH_X86 && HAVE_INLINE_ASM
577 // ordered per speed fastest first
578 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
579 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
580 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
581 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
583 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
585 #else /* CONFIG_RUNTIME_CPUDETECT */
587 pp = postProcess_SSE2;
588 #elif HAVE_MMXEXT_INLINE
589 pp = postProcess_MMX2;
590 #elif HAVE_AMD3DNOW_INLINE
591 pp = postProcess_3DNow;
592 #elif HAVE_MMX_INLINE
593 pp = postProcess_MMX;
595 pp = postProcess_altivec;
597 #endif /* !CONFIG_RUNTIME_CPUDETECT */
600 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
603 /* -pp Command line Help
605 const char pp_help[] =
606 "Available postprocessing filters:\n"
608 "short long name short long option Description\n"
609 "* * a autoq CPU power dependent enabler\n"
610 " c chrom chrominance filtering enabled\n"
611 " y nochrom chrominance filtering disabled\n"
612 " n noluma luma filtering disabled\n"
613 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
614 " 1. difference factor: default=32, higher -> more deblocking\n"
615 " 2. flatness threshold: default=39, lower -> more deblocking\n"
616 " the h & v deblocking filters share these\n"
617 " so you can't set different thresholds for h / v\n"
618 "vb vdeblock (2 threshold) vertical deblocking filter\n"
619 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
620 "va vadeblock (2 threshold) vertical deblocking filter\n"
621 "h1 x1hdeblock experimental h deblock filter 1\n"
622 "v1 x1vdeblock experimental v deblock filter 1\n"
623 "dr dering deringing filter\n"
624 "al autolevels automatic brightness / contrast\n"
625 " f fullyrange stretch luminance to (0..255)\n"
626 "lb linblenddeint linear blend deinterlacer\n"
627 "li linipoldeint linear interpolating deinterlace\n"
628 "ci cubicipoldeint cubic interpolating deinterlacer\n"
629 "md mediandeint median deinterlacer\n"
630 "fd ffmpegdeint ffmpeg deinterlacer\n"
631 "l5 lowpass5 FIR lowpass deinterlacer\n"
632 "de default hb:a,vb:a,dr:a\n"
633 "fa fast h1:a,v1:a,dr:a\n"
634 "ac ha:a:128:7,va:a,dr:a\n"
635 "tn tmpnoise (3 threshold) temporal noise reducer\n"
636 " 1. <= 2. <= 3. larger -> stronger filtering\n"
637 "fq forceQuant <quantizer> force quantizer\n"
639 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
640 "long form example:\n"
641 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
642 "short form example:\n"
643 "vb:a/hb:a/lb de,-vb\n"
649 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
651 char temp[GET_MODE_BUFFER_SIZE];
653 static const char filterDelimiters[] = ",/";
654 static const char optionDelimiters[] = ":|";
655 struct PPMode *ppMode;
659 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
663 if (!strcmp(name, "help")) {
665 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
666 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
667 av_log(NULL, AV_LOG_INFO, "%s", temp);
672 ppMode= av_malloc(sizeof(PPMode));
677 ppMode->chromMode= 0;
678 ppMode->maxTmpNoise[0]= 700;
679 ppMode->maxTmpNoise[1]= 1500;
680 ppMode->maxTmpNoise[2]= 3000;
681 ppMode->maxAllowedY= 234;
682 ppMode->minAllowedY= 16;
683 ppMode->baseDcDiff= 256/8;
684 ppMode->flatnessThreshold= 56-16-1;
685 ppMode->maxClippedThreshold= (AVRational){1,100};
688 memset(temp, 0, GET_MODE_BUFFER_SIZE);
689 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
691 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
694 const char *filterName;
695 int q= 1000000; //PP_QUALITY_MAX;
699 const char *options[OPTIONS_ARRAY_SIZE];
702 int numOfUnknownOptions=0;
703 int enable=1; //does the user want us to enabled or disabled the filter
706 filterToken= av_strtok(p, filterDelimiters, &tokstate);
707 if(!filterToken) break;
708 p+= strlen(filterToken) + 1; // p points to next filterToken
709 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
714 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
716 if(*filterName == '-'){
721 for(;;){ //for all options
722 option= av_strtok(NULL, optionDelimiters, &tokstate);
725 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
726 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
727 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
728 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
729 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
731 options[numOfUnknownOptions] = option;
732 numOfUnknownOptions++;
734 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
736 options[numOfUnknownOptions] = NULL;
738 /* replace stuff from the replace Table */
739 for(i=0; replaceTable[2*i]; i++){
740 if(!strcmp(replaceTable[2*i], filterName)){
741 size_t newlen = strlen(replaceTable[2*i + 1]);
748 spaceLeft= p - temp + plen;
749 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
753 memmove(p + newlen, p, plen+1);
754 memcpy(p, replaceTable[2*i + 1], newlen);
759 for(i=0; filters[i].shortName; i++){
760 if( !strcmp(filters[i].longName, filterName)
761 || !strcmp(filters[i].shortName, filterName)){
762 ppMode->lumMode &= ~filters[i].mask;
763 ppMode->chromMode &= ~filters[i].mask;
766 if(!enable) break; // user wants to disable it
768 if(q >= filters[i].minLumQuality && luma)
769 ppMode->lumMode|= filters[i].mask;
770 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
771 if(q >= filters[i].minChromQuality)
772 ppMode->chromMode|= filters[i].mask;
774 if(filters[i].mask == LEVEL_FIX){
776 ppMode->minAllowedY= 16;
777 ppMode->maxAllowedY= 234;
778 for(o=0; options[o]; o++){
779 if( !strcmp(options[o],"fullyrange")
780 ||!strcmp(options[o],"f")){
781 ppMode->minAllowedY= 0;
782 ppMode->maxAllowedY= 255;
783 numOfUnknownOptions--;
787 else if(filters[i].mask == TEMP_NOISE_FILTER)
792 for(o=0; options[o]; o++){
794 ppMode->maxTmpNoise[numOfNoises]=
795 strtol(options[o], &tail, 0);
796 if(tail!=options[o]){
798 numOfUnknownOptions--;
799 if(numOfNoises >= 3) break;
803 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
804 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
807 for(o=0; options[o] && o<2; o++){
809 int val= strtol(options[o], &tail, 0);
810 if(tail==options[o]) break;
812 numOfUnknownOptions--;
813 if(o==0) ppMode->baseDcDiff= val;
814 else ppMode->flatnessThreshold= val;
817 else if(filters[i].mask == FORCE_QUANT){
819 ppMode->forcedQuant= 15;
821 for(o=0; options[o] && o<1; o++){
823 int val= strtol(options[o], &tail, 0);
824 if(tail==options[o]) break;
826 numOfUnknownOptions--;
827 ppMode->forcedQuant= val;
832 if(!filterNameOk) ppMode->error++;
833 ppMode->error += numOfUnknownOptions;
836 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
838 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
845 void pp_free_mode(pp_mode *mode){
849 static void reallocAlign(void **p, int size){
851 *p= av_mallocz(size);
854 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
855 int mbWidth = (width+15)>>4;
856 int mbHeight= (height+15)>>4;
860 c->qpStride= qpStride;
862 reallocAlign((void **)&c->tempDst, stride*24+32);
863 reallocAlign((void **)&c->tempSrc, stride*24);
864 reallocAlign((void **)&c->tempBlocks, 2*16*8);
865 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
867 c->yHistogram[i]= width*height/64*15/256;
870 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
871 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
872 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
875 reallocAlign((void **)&c->deintTemp, 2*width+32);
876 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
877 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
878 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
881 static const char * context_to_name(void * ptr) {
885 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
887 av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
888 PPContext *c= av_mallocz(sizeof(PPContext));
889 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
890 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
895 c->av_class = &av_codec_context_class;
896 if(cpuCaps&PP_FORMAT){
897 c->hChromaSubSample= cpuCaps&0x3;
898 c->vChromaSubSample= (cpuCaps>>4)&0x3;
900 c->hChromaSubSample= 1;
901 c->vChromaSubSample= 1;
903 if (cpuCaps & PP_CPU_CAPS_AUTO) {
904 c->cpuCaps = av_get_cpu_flags();
907 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
908 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
909 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
910 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
913 reallocBuffers(c, width, height, stride, qpStride);
920 av_cold void pp_free_context(void *vc){
921 PPContext *c = (PPContext*)vc;
924 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
925 av_free(c->tempBlurred[i]);
926 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
927 av_free(c->tempBlurredPast[i]);
929 av_free(c->tempBlocks);
930 av_free(c->yHistogram);
933 av_free(c->deintTemp);
934 av_free(c->stdQPTable);
935 av_free(c->nonBQPTable);
936 av_free(c->forcedQPTable);
938 memset(c, 0, sizeof(PPContext));
943 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
944 uint8_t * dst[3], const int dstStride[3],
945 int width, int height,
946 const QP_STORE_T *QP_store, int QPStride,
947 pp_mode *vm, void *vc, int pict_type)
949 int mbWidth = (width+15)>>4;
950 int mbHeight= (height+15)>>4;
953 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
954 int absQPStride = FFABS(QPStride);
956 // c->stride and c->QPStride are always positive
957 if(c->stride < minStride || c->qpStride < absQPStride)
958 reallocBuffers(c, width, height,
959 FFMAX(minStride, c->stride),
960 FFMAX(c->qpStride, absQPStride));
962 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
964 QP_store= c->forcedQPTable;
965 absQPStride = QPStride = 0;
966 if(mode->lumMode & FORCE_QUANT)
967 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
969 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
972 if(pict_type & PP_PICT_TYPE_QP2){
974 const int count= FFMAX(mbHeight * absQPStride, mbWidth);
975 for(i=0; i<(count>>2); i++){
976 AV_WN32(c->stdQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) >> 1 & 0x7F7F7F7F);
978 for(i<<=2; i<count; i++){
979 c->stdQPTable[i] = QP_store[i]>>1;
981 QP_store= c->stdQPTable;
982 QPStride= absQPStride;
987 for(y=0; y<mbHeight; y++){
988 for(x=0; x<mbWidth; x++){
989 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
991 av_log(c, AV_LOG_INFO, "\n");
993 av_log(c, AV_LOG_INFO, "\n");
996 if((pict_type&7)!=3){
999 const int count= FFMAX(mbHeight * QPStride, mbWidth);
1000 for(i=0; i<(count>>2); i++){
1001 AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
1003 for(i<<=2; i<count; i++){
1004 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1008 for(i=0; i<mbHeight; i++) {
1009 for(j=0; j<absQPStride; j++) {
1010 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1016 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1017 mode->lumMode, mode->chromMode);
1019 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1020 width, height, QP_store, QPStride, 0, mode, c);
1022 if (!(src[1] && src[2] && dst[1] && dst[2]))
1025 width = (width )>>c->hChromaSubSample;
1026 height = (height)>>c->vChromaSubSample;
1028 if(mode->chromMode){
1029 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1030 width, height, QP_store, QPStride, 1, mode, c);
1031 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1032 width, height, QP_store, QPStride, 2, mode, c);
1034 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1035 linecpy(dst[1], src[1], height, srcStride[1]);
1036 linecpy(dst[2], src[2], height, srcStride[2]);
1039 for(y=0; y<height; y++){
1040 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1041 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);