2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include "libavutil/intreadwrite.h"
84 //#undef HAVE_MMXEXT_INLINE
85 //#define HAVE_AMD3DNOW_INLINE
86 //#undef HAVE_MMX_INLINE
88 //#define DEBUG_BRIGHTNESS
89 #include "postprocess.h"
90 #include "postprocess_internal.h"
91 #include "libavutil/avstring.h"
92 #include "libavutil/ppc/util_altivec.h"
94 #include "libavutil/ffversion.h"
95 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
97 unsigned postproc_version(void)
99 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
100 return LIBPOSTPROC_VERSION_INT;
103 const char *postproc_configuration(void)
105 return FFMPEG_CONFIGURATION;
108 const char *postproc_license(void)
110 #define LICENSE_PREFIX "libpostproc license: "
111 return &LICENSE_PREFIX FFMPEG_LICENSE[sizeof(LICENSE_PREFIX) - 1];
114 #define GET_MODE_BUFFER_SIZE 500
115 #define OPTIONS_ARRAY_SIZE 10
117 #define TEMP_STRIDE 8
118 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
120 #if ARCH_X86 && HAVE_INLINE_ASM
121 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
122 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
123 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
124 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
125 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
126 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
127 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
128 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
131 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
134 static const struct PPFilter filters[]=
136 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
137 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
138 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
139 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
140 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
141 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
142 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
143 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
144 {"dr", "dering", 1, 5, 6, DERING},
145 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
146 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
147 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
148 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
149 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
150 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
151 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
152 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
153 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
154 {"be", "bitexact", 1, 0, 0, BITEXACT},
155 {"vi", "visualize", 1, 0, 0, VISUALIZE},
156 {NULL, NULL,0,0,0,0} //End Marker
159 static const char * const replaceTable[]=
161 "default", "hb:a,vb:a,dr:a",
162 "de", "hb:a,vb:a,dr:a",
163 "fast", "h1:a,v1:a,dr:a",
164 "fa", "h1:a,v1:a,dr:a",
165 "ac", "ha:a:128:7,va:a,dr:a",
169 /* The horizontal functions exist only in C because the MMX
170 * code is faster with vertical filters and transposing. */
173 * Check if the given 8x8 Block is mostly "flat"
175 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
179 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
180 const int dcThreshold= dcOffset*2 + 1;
182 for(y=0; y<BLOCK_SIZE; y++){
183 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
184 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
185 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
186 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
187 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
188 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
189 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
192 return numEq > c->ppMode.flatnessThreshold;
196 * Check if the middle 8x8 Block in the given 8x16 block is flat
198 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
202 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
203 const int dcThreshold= dcOffset*2 + 1;
205 src+= stride*4; // src points to begin of the 8x8 Block
206 for(y=0; y<BLOCK_SIZE-1; y++){
207 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
208 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
209 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
210 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
211 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
212 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
213 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
214 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
217 return numEq > c->ppMode.flatnessThreshold;
220 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
224 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
226 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
228 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
230 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
236 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
240 for(x=0; x<BLOCK_SIZE; x+=4){
241 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
242 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
243 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
244 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
249 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
251 if( isHorizDC_C(src, stride, c) ){
252 return isHorizMinMaxOk_C(src, stride, c->QP);
258 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
260 if( isVertDC_C(src, stride, c) ){
261 return isVertMinMaxOk_C(src, stride, c->QP);
267 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
270 for(y=0; y<BLOCK_SIZE; y++){
271 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
273 if(FFABS(middleEnergy) < 8*c->QP){
274 const int q=(dst[3] - dst[4])/2;
275 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
276 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
278 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
282 d*= FFSIGN(-middleEnergy);
303 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
304 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
306 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
309 for(y=0; y<BLOCK_SIZE; y++){
310 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
311 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
314 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
315 sums[1] = sums[0] - first + dst[3];
316 sums[2] = sums[1] - first + dst[4];
317 sums[3] = sums[2] - first + dst[5];
318 sums[4] = sums[3] - first + dst[6];
319 sums[5] = sums[4] - dst[0] + dst[7];
320 sums[6] = sums[5] - dst[1] + last;
321 sums[7] = sums[6] - dst[2] + last;
322 sums[8] = sums[7] - dst[3] + last;
323 sums[9] = sums[8] - dst[4] + last;
325 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
326 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
327 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
328 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
329 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
330 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
331 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
332 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
339 * Experimental Filter 1 (Horizontal)
340 * will not damage linear gradients
341 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
342 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
343 * MMX2 version does correct clipping C version does not
344 * not identical with the vertical one
346 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
349 static uint64_t lut[256];
355 int v= i < 128 ? 2*i : 2*(i-256);
357 //Simulate 112242211 9-Tap filter
358 uint64_t a= (v/16) & 0xFF;
359 uint64_t b= (v/8) & 0xFF;
360 uint64_t c= (v/4) & 0xFF;
361 uint64_t d= (3*v/8) & 0xFF;
363 //Simulate piecewise linear interpolation
364 uint64_t a= (v/16) & 0xFF;
365 uint64_t b= (v*3/16) & 0xFF;
366 uint64_t c= (v*5/16) & 0xFF;
367 uint64_t d= (7*v/16) & 0xFF;
368 uint64_t A= (0x100 - a)&0xFF;
369 uint64_t B= (0x100 - b)&0xFF;
370 uint64_t C= (0x100 - c)&0xFF;
371 uint64_t D= (0x100 - c)&0xFF;
373 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
374 (D<<24) | (C<<16) | (B<<8) | (A);
375 //lut[i] = (v<<32) | (v<<24);
379 for(y=0; y<BLOCK_SIZE; y++){
380 int a= src[1] - src[2];
381 int b= src[3] - src[4];
382 int c= src[5] - src[6];
384 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
387 int v = d * FFSIGN(-b);
401 * accurate deblock filter
403 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
404 int stride, const PPContext *c, int mode)
408 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
409 const int dcThreshold= dcOffset*2 + 1;
411 src+= step*4; // src points to begin of the 8x8 Block
415 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
416 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
417 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
418 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
419 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
420 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
421 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
422 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
423 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
424 if(numEq > c->ppMode.flatnessThreshold){
427 if(src[0] > src[step]){
435 if(src[x*step] > src[(x+1)*step]){
436 if(src[x *step] > max) max= src[ x *step];
437 if(src[(x+1)*step] < min) min= src[(x+1)*step];
439 if(src[(x+1)*step] > max) max= src[(x+1)*step];
440 if(src[ x *step] < min) min= src[ x *step];
444 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
445 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
448 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
449 sums[1] = sums[0] - first + src[3*step];
450 sums[2] = sums[1] - first + src[4*step];
451 sums[3] = sums[2] - first + src[5*step];
452 sums[4] = sums[3] - first + src[6*step];
453 sums[5] = sums[4] - src[0*step] + src[7*step];
454 sums[6] = sums[5] - src[1*step] + last;
455 sums[7] = sums[6] - src[2*step] + last;
456 sums[8] = sums[7] - src[3*step] + last;
457 sums[9] = sums[8] - src[4*step] + last;
459 if (mode & VISUALIZE) {
469 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
470 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
471 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
472 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
473 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
474 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
475 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
476 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
479 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
481 if(FFABS(middleEnergy) < 8*QP){
482 const int q=(src[3*step] - src[4*step])/2;
483 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
484 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
486 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
490 d*= FFSIGN(-middleEnergy);
500 if ((mode & VISUALIZE) && d) {
501 d= (d < 0) ? 32 : -32;
502 src[3*step]= av_clip_uint8(src[3*step] - d);
503 src[4*step]= av_clip_uint8(src[4*step] + d);
516 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
518 //we always compile C for testing which needs bitexactness
519 #define TEMPLATE_PP_C 1
520 #include "postprocess_template.c"
523 # define TEMPLATE_PP_ALTIVEC 1
524 # include "postprocess_altivec_template.c"
525 # include "postprocess_template.c"
528 #if ARCH_X86 && HAVE_INLINE_ASM
529 # if CONFIG_RUNTIME_CPUDETECT
530 # define TEMPLATE_PP_MMX 1
531 # include "postprocess_template.c"
532 # define TEMPLATE_PP_MMXEXT 1
533 # include "postprocess_template.c"
534 # define TEMPLATE_PP_3DNOW 1
535 # include "postprocess_template.c"
536 # define TEMPLATE_PP_SSE2 1
537 # include "postprocess_template.c"
539 # if HAVE_SSE2_INLINE
540 # define TEMPLATE_PP_SSE2 1
541 # include "postprocess_template.c"
542 # elif HAVE_MMXEXT_INLINE
543 # define TEMPLATE_PP_MMXEXT 1
544 # include "postprocess_template.c"
545 # elif HAVE_AMD3DNOW_INLINE
546 # define TEMPLATE_PP_3DNOW 1
547 # include "postprocess_template.c"
548 # elif HAVE_MMX_INLINE
549 # define TEMPLATE_PP_MMX 1
550 # include "postprocess_template.c"
555 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
556 const int8_t QPs[], int QPStride, int isColor, PPContext *c2);
558 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
559 const int8_t QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
561 pp_fn pp = postProcess_C;
562 PPContext *c= (PPContext *)vc;
563 PPMode *ppMode= (PPMode *)vm;
564 c->ppMode= *ppMode; //FIXME
566 if (!(ppMode->lumMode & BITEXACT)) {
567 #if CONFIG_RUNTIME_CPUDETECT
568 #if ARCH_X86 && HAVE_INLINE_ASM
569 // ordered per speed fastest first
570 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
571 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
572 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
573 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
575 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
577 #else /* CONFIG_RUNTIME_CPUDETECT */
579 pp = postProcess_SSE2;
580 #elif HAVE_MMXEXT_INLINE
581 pp = postProcess_MMX2;
582 #elif HAVE_AMD3DNOW_INLINE
583 pp = postProcess_3DNow;
584 #elif HAVE_MMX_INLINE
585 pp = postProcess_MMX;
587 pp = postProcess_altivec;
589 #endif /* !CONFIG_RUNTIME_CPUDETECT */
592 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
595 /* -pp Command line Help
597 const char pp_help[] =
598 "Available postprocessing filters:\n"
600 "short long name short long option Description\n"
601 "* * a autoq CPU power dependent enabler\n"
602 " c chrom chrominance filtering enabled\n"
603 " y nochrom chrominance filtering disabled\n"
604 " n noluma luma filtering disabled\n"
605 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
606 " 1. difference factor: default=32, higher -> more deblocking\n"
607 " 2. flatness threshold: default=39, lower -> more deblocking\n"
608 " the h & v deblocking filters share these\n"
609 " so you can't set different thresholds for h / v\n"
610 "vb vdeblock (2 threshold) vertical deblocking filter\n"
611 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
612 "va vadeblock (2 threshold) vertical deblocking filter\n"
613 "h1 x1hdeblock experimental h deblock filter 1\n"
614 "v1 x1vdeblock experimental v deblock filter 1\n"
615 "dr dering deringing filter\n"
616 "al autolevels automatic brightness / contrast\n"
617 " f fullyrange stretch luminance to (0..255)\n"
618 "lb linblenddeint linear blend deinterlacer\n"
619 "li linipoldeint linear interpolating deinterlace\n"
620 "ci cubicipoldeint cubic interpolating deinterlacer\n"
621 "md mediandeint median deinterlacer\n"
622 "fd ffmpegdeint ffmpeg deinterlacer\n"
623 "l5 lowpass5 FIR lowpass deinterlacer\n"
624 "de default hb:a,vb:a,dr:a\n"
625 "fa fast h1:a,v1:a,dr:a\n"
626 "ac ha:a:128:7,va:a,dr:a\n"
627 "tn tmpnoise (3 threshold) temporal noise reducer\n"
628 " 1. <= 2. <= 3. larger -> stronger filtering\n"
629 "fq forceQuant <quantizer> force quantizer\n"
631 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
632 "long form example:\n"
633 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
634 "short form example:\n"
635 "vb:a/hb:a/lb de,-vb\n"
641 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
643 char temp[GET_MODE_BUFFER_SIZE];
645 static const char filterDelimiters[] = ",/";
646 static const char optionDelimiters[] = ":|";
647 struct PPMode *ppMode;
651 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
655 if (!strcmp(name, "help")) {
657 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
658 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
659 av_log(NULL, AV_LOG_INFO, "%s", temp);
664 ppMode= av_malloc(sizeof(PPMode));
669 ppMode->chromMode= 0;
670 ppMode->maxTmpNoise[0]= 700;
671 ppMode->maxTmpNoise[1]= 1500;
672 ppMode->maxTmpNoise[2]= 3000;
673 ppMode->maxAllowedY= 234;
674 ppMode->minAllowedY= 16;
675 ppMode->baseDcDiff= 256/8;
676 ppMode->flatnessThreshold= 56-16-1;
677 ppMode->maxClippedThreshold= (AVRational){1,100};
680 memset(temp, 0, GET_MODE_BUFFER_SIZE);
681 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
683 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
686 const char *filterName;
687 int q= 1000000; //PP_QUALITY_MAX;
691 const char *options[OPTIONS_ARRAY_SIZE];
694 int numOfUnknownOptions=0;
695 int enable=1; //does the user want us to enabled or disabled the filter
698 filterToken= av_strtok(p, filterDelimiters, &tokstate);
699 if(!filterToken) break;
700 p+= strlen(filterToken) + 1; // p points to next filterToken
701 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
706 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
708 if(*filterName == '-'){
713 for(;;){ //for all options
714 option= av_strtok(NULL, optionDelimiters, &tokstate);
717 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
718 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
719 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
720 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
721 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
723 options[numOfUnknownOptions] = option;
724 numOfUnknownOptions++;
726 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
728 options[numOfUnknownOptions] = NULL;
730 /* replace stuff from the replace Table */
731 for(i=0; replaceTable[2*i]; i++){
732 if(!strcmp(replaceTable[2*i], filterName)){
733 size_t newlen = strlen(replaceTable[2*i + 1]);
740 spaceLeft= p - temp + plen;
741 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
745 memmove(p + newlen, p, plen+1);
746 memcpy(p, replaceTable[2*i + 1], newlen);
751 for(i=0; filters[i].shortName; i++){
752 if( !strcmp(filters[i].longName, filterName)
753 || !strcmp(filters[i].shortName, filterName)){
754 ppMode->lumMode &= ~filters[i].mask;
755 ppMode->chromMode &= ~filters[i].mask;
758 if(!enable) break; // user wants to disable it
760 if(q >= filters[i].minLumQuality && luma)
761 ppMode->lumMode|= filters[i].mask;
762 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
763 if(q >= filters[i].minChromQuality)
764 ppMode->chromMode|= filters[i].mask;
766 if(filters[i].mask == LEVEL_FIX){
768 ppMode->minAllowedY= 16;
769 ppMode->maxAllowedY= 234;
770 for(o=0; options[o]; o++){
771 if( !strcmp(options[o],"fullyrange")
772 ||!strcmp(options[o],"f")){
773 ppMode->minAllowedY= 0;
774 ppMode->maxAllowedY= 255;
775 numOfUnknownOptions--;
779 else if(filters[i].mask == TEMP_NOISE_FILTER)
784 for(o=0; options[o]; o++){
786 ppMode->maxTmpNoise[numOfNoises]=
787 strtol(options[o], &tail, 0);
788 if(tail!=options[o]){
790 numOfUnknownOptions--;
791 if(numOfNoises >= 3) break;
795 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
796 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
799 for(o=0; options[o] && o<2; o++){
801 int val= strtol(options[o], &tail, 0);
802 if(tail==options[o]) break;
804 numOfUnknownOptions--;
805 if(o==0) ppMode->baseDcDiff= val;
806 else ppMode->flatnessThreshold= val;
809 else if(filters[i].mask == FORCE_QUANT){
811 ppMode->forcedQuant= 15;
813 for(o=0; options[o] && o<1; o++){
815 int val= strtol(options[o], &tail, 0);
816 if(tail==options[o]) break;
818 numOfUnknownOptions--;
819 ppMode->forcedQuant= val;
824 if(!filterNameOk) ppMode->error++;
825 ppMode->error += numOfUnknownOptions;
828 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
830 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
837 void pp_free_mode(pp_mode *mode){
841 static void reallocAlign(void **p, int size){
843 *p= av_mallocz(size);
846 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
847 int mbWidth = (width+15)>>4;
848 int mbHeight= (height+15)>>4;
852 c->qpStride= qpStride;
854 reallocAlign((void **)&c->tempDst, stride*24+32);
855 reallocAlign((void **)&c->tempSrc, stride*24);
856 reallocAlign((void **)&c->tempBlocks, 2*16*8);
857 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
859 c->yHistogram[i]= width*height/64*15/256;
862 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
863 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
864 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
867 reallocAlign((void **)&c->deintTemp, 2*width+32);
868 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(int8_t));
869 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(int8_t));
870 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(int8_t));
873 static const char * context_to_name(void * ptr) {
877 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
879 av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
880 PPContext *c= av_mallocz(sizeof(PPContext));
881 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
882 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
887 c->av_class = &av_codec_context_class;
888 if(cpuCaps&PP_FORMAT){
889 c->hChromaSubSample= cpuCaps&0x3;
890 c->vChromaSubSample= (cpuCaps>>4)&0x3;
892 c->hChromaSubSample= 1;
893 c->vChromaSubSample= 1;
895 if (cpuCaps & PP_CPU_CAPS_AUTO) {
896 c->cpuCaps = av_get_cpu_flags();
899 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
900 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
901 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
902 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
905 reallocBuffers(c, width, height, stride, qpStride);
912 av_cold void pp_free_context(void *vc){
913 PPContext *c = (PPContext*)vc;
916 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
917 av_free(c->tempBlurred[i]);
918 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
919 av_free(c->tempBlurredPast[i]);
921 av_free(c->tempBlocks);
922 av_free(c->yHistogram);
925 av_free(c->deintTemp);
926 av_free(c->stdQPTable);
927 av_free(c->nonBQPTable);
928 av_free(c->forcedQPTable);
930 memset(c, 0, sizeof(PPContext));
935 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
936 uint8_t * dst[3], const int dstStride[3],
937 int width, int height,
938 const int8_t *QP_store, int QPStride,
939 pp_mode *vm, void *vc, int pict_type)
941 int mbWidth = (width+15)>>4;
942 int mbHeight= (height+15)>>4;
945 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
946 int absQPStride = FFABS(QPStride);
948 // c->stride and c->QPStride are always positive
949 if(c->stride < minStride || c->qpStride < absQPStride)
950 reallocBuffers(c, width, height,
951 FFMAX(minStride, c->stride),
952 FFMAX(c->qpStride, absQPStride));
954 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
956 QP_store= c->forcedQPTable;
957 absQPStride = QPStride = 0;
958 if(mode->lumMode & FORCE_QUANT)
959 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
961 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
964 if(pict_type & PP_PICT_TYPE_QP2){
966 const int count= FFMAX(mbHeight * absQPStride, mbWidth);
967 for(i=0; i<(count>>2); i++){
968 AV_WN32(c->stdQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) >> 1 & 0x7F7F7F7F);
970 for(i<<=2; i<count; i++){
971 c->stdQPTable[i] = QP_store[i]>>1;
973 QP_store= c->stdQPTable;
974 QPStride= absQPStride;
979 for(y=0; y<mbHeight; y++){
980 for(x=0; x<mbWidth; x++){
981 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
983 av_log(c, AV_LOG_INFO, "\n");
985 av_log(c, AV_LOG_INFO, "\n");
988 if((pict_type&7)!=3){
991 const int count= FFMAX(mbHeight * QPStride, mbWidth);
992 for(i=0; i<(count>>2); i++){
993 AV_WN32(c->nonBQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) & 0x3F3F3F3F);
995 for(i<<=2; i<count; i++){
996 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1000 for(i=0; i<mbHeight; i++) {
1001 for(j=0; j<absQPStride; j++) {
1002 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1008 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1009 mode->lumMode, mode->chromMode);
1011 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1012 width, height, QP_store, QPStride, 0, mode, c);
1014 if (!(src[1] && src[2] && dst[1] && dst[2]))
1017 width = (width )>>c->hChromaSubSample;
1018 height = (height)>>c->vChromaSubSample;
1020 if(mode->chromMode){
1021 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1022 width, height, QP_store, QPStride, 1, mode, c);
1023 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1024 width, height, QP_store, QPStride, 2, mode, c);
1026 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1027 linecpy(dst[1], src[1], height, srcStride[1]);
1028 linecpy(dst[2], src[2], height, srcStride[2]);
1031 for(y=0; y<height; y++){
1032 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1033 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);