2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
92 #include "libavutil/ffversion.h"
93 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
95 unsigned postproc_version(void)
97 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
98 return LIBPOSTPROC_VERSION_INT;
101 const char *postproc_configuration(void)
103 return FFMPEG_CONFIGURATION;
106 const char *postproc_license(void)
108 #define LICENSE_PREFIX "libpostproc license: "
109 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
116 #define GET_MODE_BUFFER_SIZE 500
117 #define OPTIONS_ARRAY_SIZE 10
119 #define TEMP_STRIDE 8
120 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
122 #if ARCH_X86 && HAVE_INLINE_ASM
123 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
124 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
125 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
126 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
127 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
128 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
129 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
130 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
133 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
136 static const struct PPFilter filters[]=
138 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
139 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
140 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
141 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
142 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
143 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
144 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
145 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
146 {"dr", "dering", 1, 5, 6, DERING},
147 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
148 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
149 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
150 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
151 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
152 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
153 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
154 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
155 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
156 {"be", "bitexact", 1, 0, 0, BITEXACT},
157 {"vi", "visualize", 1, 0, 0, VISUALIZE},
158 {NULL, NULL,0,0,0,0} //End Marker
161 static const char * const replaceTable[]=
163 "default", "hb:a,vb:a,dr:a",
164 "de", "hb:a,vb:a,dr:a",
165 "fast", "h1:a,v1:a,dr:a",
166 "fa", "h1:a,v1:a,dr:a",
167 "ac", "ha:a:128:7,va:a,dr:a",
172 #if ARCH_X86 && HAVE_INLINE_ASM
173 static inline void prefetchnta(const void *p)
175 __asm__ volatile( "prefetchnta (%0)\n\t"
180 static inline void prefetcht0(const void *p)
182 __asm__ volatile( "prefetcht0 (%0)\n\t"
187 static inline void prefetcht1(const void *p)
189 __asm__ volatile( "prefetcht1 (%0)\n\t"
194 static inline void prefetcht2(const void *p)
196 __asm__ volatile( "prefetcht2 (%0)\n\t"
202 /* The horizontal functions exist only in C because the MMX
203 * code is faster with vertical filters and transposing. */
206 * Check if the given 8x8 Block is mostly "flat"
208 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
212 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
213 const int dcThreshold= dcOffset*2 + 1;
215 for(y=0; y<BLOCK_SIZE; y++){
216 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
217 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
218 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
219 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
220 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
221 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
222 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
225 return numEq > c->ppMode.flatnessThreshold;
229 * Check if the middle 8x8 Block in the given 8x16 block is flat
231 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
235 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
236 const int dcThreshold= dcOffset*2 + 1;
238 src+= stride*4; // src points to begin of the 8x8 Block
239 for(y=0; y<BLOCK_SIZE-1; y++){
240 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
241 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
242 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
243 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
244 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
245 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
246 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
247 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
250 return numEq > c->ppMode.flatnessThreshold;
253 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
257 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
259 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
261 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
263 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
269 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
273 for(x=0; x<BLOCK_SIZE; x+=4){
274 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
275 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
276 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
277 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
282 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
284 if( isHorizDC_C(src, stride, c) ){
285 return isHorizMinMaxOk_C(src, stride, c->QP);
291 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
293 if( isVertDC_C(src, stride, c) ){
294 return isVertMinMaxOk_C(src, stride, c->QP);
300 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
303 for(y=0; y<BLOCK_SIZE; y++){
304 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
306 if(FFABS(middleEnergy) < 8*c->QP){
307 const int q=(dst[3] - dst[4])/2;
308 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
309 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
311 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
315 d*= FFSIGN(-middleEnergy);
336 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
337 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
339 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
342 for(y=0; y<BLOCK_SIZE; y++){
343 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
344 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
347 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
348 sums[1] = sums[0] - first + dst[3];
349 sums[2] = sums[1] - first + dst[4];
350 sums[3] = sums[2] - first + dst[5];
351 sums[4] = sums[3] - first + dst[6];
352 sums[5] = sums[4] - dst[0] + dst[7];
353 sums[6] = sums[5] - dst[1] + last;
354 sums[7] = sums[6] - dst[2] + last;
355 sums[8] = sums[7] - dst[3] + last;
356 sums[9] = sums[8] - dst[4] + last;
358 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
359 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
360 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
361 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
362 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
363 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
364 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
365 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
372 * Experimental Filter 1 (Horizontal)
373 * will not damage linear gradients
374 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
375 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
376 * MMX2 version does correct clipping C version does not
377 * not identical with the vertical one
379 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
382 static uint64_t lut[256];
388 int v= i < 128 ? 2*i : 2*(i-256);
390 //Simulate 112242211 9-Tap filter
391 uint64_t a= (v/16) & 0xFF;
392 uint64_t b= (v/8) & 0xFF;
393 uint64_t c= (v/4) & 0xFF;
394 uint64_t d= (3*v/8) & 0xFF;
396 //Simulate piecewise linear interpolation
397 uint64_t a= (v/16) & 0xFF;
398 uint64_t b= (v*3/16) & 0xFF;
399 uint64_t c= (v*5/16) & 0xFF;
400 uint64_t d= (7*v/16) & 0xFF;
401 uint64_t A= (0x100 - a)&0xFF;
402 uint64_t B= (0x100 - b)&0xFF;
403 uint64_t C= (0x100 - c)&0xFF;
404 uint64_t D= (0x100 - c)&0xFF;
406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407 (D<<24) | (C<<16) | (B<<8) | (A);
408 //lut[i] = (v<<32) | (v<<24);
412 for(y=0; y<BLOCK_SIZE; y++){
413 int a= src[1] - src[2];
414 int b= src[3] - src[4];
415 int c= src[5] - src[6];
417 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
420 int v = d * FFSIGN(-b);
434 * accurate deblock filter
436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
437 int stride, const PPContext *c, int mode)
441 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
442 const int dcThreshold= dcOffset*2 + 1;
444 src+= step*4; // src points to begin of the 8x8 Block
448 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
449 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
450 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
451 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
452 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
453 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
454 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
455 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
456 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
457 if(numEq > c->ppMode.flatnessThreshold){
460 if(src[0] > src[step]){
468 if(src[x*step] > src[(x+1)*step]){
469 if(src[x *step] > max) max= src[ x *step];
470 if(src[(x+1)*step] < min) min= src[(x+1)*step];
472 if(src[(x+1)*step] > max) max= src[(x+1)*step];
473 if(src[ x *step] < min) min= src[ x *step];
477 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
478 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
481 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
482 sums[1] = sums[0] - first + src[3*step];
483 sums[2] = sums[1] - first + src[4*step];
484 sums[3] = sums[2] - first + src[5*step];
485 sums[4] = sums[3] - first + src[6*step];
486 sums[5] = sums[4] - src[0*step] + src[7*step];
487 sums[6] = sums[5] - src[1*step] + last;
488 sums[7] = sums[6] - src[2*step] + last;
489 sums[8] = sums[7] - src[3*step] + last;
490 sums[9] = sums[8] - src[4*step] + last;
492 if (mode & VISUALIZE) {
502 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
503 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
504 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
505 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
506 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
507 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
508 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
509 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
512 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
514 if(FFABS(middleEnergy) < 8*QP){
515 const int q=(src[3*step] - src[4*step])/2;
516 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
517 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
519 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
523 d*= FFSIGN(-middleEnergy);
533 if ((mode & VISUALIZE) && d) {
534 d= (d < 0) ? 32 : -32;
535 src[3*step]= av_clip_uint8(src[3*step] - d);
536 src[4*step]= av_clip_uint8(src[4*step] + d);
554 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
556 //we always compile C for testing which needs bitexactness
557 #define TEMPLATE_PP_C 1
558 #include "postprocess_template.c"
561 # define TEMPLATE_PP_ALTIVEC 1
562 # include "postprocess_altivec_template.c"
563 # include "postprocess_template.c"
566 #if ARCH_X86 && HAVE_INLINE_ASM
567 # if CONFIG_RUNTIME_CPUDETECT
568 # define TEMPLATE_PP_MMX 1
569 # include "postprocess_template.c"
570 # define TEMPLATE_PP_MMXEXT 1
571 # include "postprocess_template.c"
572 # define TEMPLATE_PP_3DNOW 1
573 # include "postprocess_template.c"
574 # define TEMPLATE_PP_SSE2 1
575 # include "postprocess_template.c"
577 # if HAVE_SSE2_INLINE
578 # define TEMPLATE_PP_SSE2 1
579 # include "postprocess_template.c"
580 # elif HAVE_MMXEXT_INLINE
581 # define TEMPLATE_PP_MMXEXT 1
582 # include "postprocess_template.c"
583 # elif HAVE_AMD3DNOW_INLINE
584 # define TEMPLATE_PP_3DNOW 1
585 # include "postprocess_template.c"
586 # elif HAVE_MMX_INLINE
587 # define TEMPLATE_PP_MMX 1
588 # include "postprocess_template.c"
593 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
594 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
596 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
597 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
599 pp_fn pp = postProcess_C;
600 PPContext *c= (PPContext *)vc;
601 PPMode *ppMode= (PPMode *)vm;
602 c->ppMode= *ppMode; //FIXME
604 if (!(ppMode->lumMode & BITEXACT)) {
605 #if CONFIG_RUNTIME_CPUDETECT
606 #if ARCH_X86 && HAVE_INLINE_ASM
607 // ordered per speed fastest first
608 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
609 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
610 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
611 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
613 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
615 #else /* CONFIG_RUNTIME_CPUDETECT */
617 pp = postProcess_SSE2;
618 #elif HAVE_MMXEXT_INLINE
619 pp = postProcess_MMX2;
620 #elif HAVE_AMD3DNOW_INLINE
621 pp = postProcess_3DNow;
622 #elif HAVE_MMX_INLINE
623 pp = postProcess_MMX;
625 pp = postProcess_altivec;
627 #endif /* !CONFIG_RUNTIME_CPUDETECT */
630 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
633 /* -pp Command line Help
635 const char pp_help[] =
636 "Available postprocessing filters:\n"
638 "short long name short long option Description\n"
639 "* * a autoq CPU power dependent enabler\n"
640 " c chrom chrominance filtering enabled\n"
641 " y nochrom chrominance filtering disabled\n"
642 " n noluma luma filtering disabled\n"
643 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
644 " 1. difference factor: default=32, higher -> more deblocking\n"
645 " 2. flatness threshold: default=39, lower -> more deblocking\n"
646 " the h & v deblocking filters share these\n"
647 " so you can't set different thresholds for h / v\n"
648 "vb vdeblock (2 threshold) vertical deblocking filter\n"
649 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
650 "va vadeblock (2 threshold) vertical deblocking filter\n"
651 "h1 x1hdeblock experimental h deblock filter 1\n"
652 "v1 x1vdeblock experimental v deblock filter 1\n"
653 "dr dering deringing filter\n"
654 "al autolevels automatic brightness / contrast\n"
655 " f fullyrange stretch luminance to (0..255)\n"
656 "lb linblenddeint linear blend deinterlacer\n"
657 "li linipoldeint linear interpolating deinterlace\n"
658 "ci cubicipoldeint cubic interpolating deinterlacer\n"
659 "md mediandeint median deinterlacer\n"
660 "fd ffmpegdeint ffmpeg deinterlacer\n"
661 "l5 lowpass5 FIR lowpass deinterlacer\n"
662 "de default hb:a,vb:a,dr:a\n"
663 "fa fast h1:a,v1:a,dr:a\n"
664 "ac ha:a:128:7,va:a,dr:a\n"
665 "tn tmpnoise (3 threshold) temporal noise reducer\n"
666 " 1. <= 2. <= 3. larger -> stronger filtering\n"
667 "fq forceQuant <quantizer> force quantizer\n"
669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
670 "long form example:\n"
671 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
672 "short form example:\n"
673 "vb:a/hb:a/lb de,-vb\n"
679 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
681 char temp[GET_MODE_BUFFER_SIZE];
683 static const char filterDelimiters[] = ",/";
684 static const char optionDelimiters[] = ":|";
685 struct PPMode *ppMode;
689 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
693 if (!strcmp(name, "help")) {
695 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
696 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
697 av_log(NULL, AV_LOG_INFO, "%s", temp);
702 ppMode= av_malloc(sizeof(PPMode));
705 ppMode->chromMode= 0;
706 ppMode->maxTmpNoise[0]= 700;
707 ppMode->maxTmpNoise[1]= 1500;
708 ppMode->maxTmpNoise[2]= 3000;
709 ppMode->maxAllowedY= 234;
710 ppMode->minAllowedY= 16;
711 ppMode->baseDcDiff= 256/8;
712 ppMode->flatnessThreshold= 56-16-1;
713 ppMode->maxClippedThreshold= 0.01;
716 memset(temp, 0, GET_MODE_BUFFER_SIZE);
717 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
719 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
722 const char *filterName;
723 int q= 1000000; //PP_QUALITY_MAX;
727 const char *options[OPTIONS_ARRAY_SIZE];
730 int numOfUnknownOptions=0;
731 int enable=1; //does the user want us to enabled or disabled the filter
734 filterToken= av_strtok(p, filterDelimiters, &tokstate);
735 if(!filterToken) break;
736 p+= strlen(filterToken) + 1; // p points to next filterToken
737 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
742 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
744 if(*filterName == '-'){
749 for(;;){ //for all options
750 option= av_strtok(NULL, optionDelimiters, &tokstate);
753 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
754 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
755 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
756 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
757 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
759 options[numOfUnknownOptions] = option;
760 numOfUnknownOptions++;
762 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
764 options[numOfUnknownOptions] = NULL;
766 /* replace stuff from the replace Table */
767 for(i=0; replaceTable[2*i]; i++){
768 if(!strcmp(replaceTable[2*i], filterName)){
769 int newlen= strlen(replaceTable[2*i + 1]);
776 spaceLeft= p - temp + plen;
777 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
781 memmove(p + newlen, p, plen+1);
782 memcpy(p, replaceTable[2*i + 1], newlen);
787 for(i=0; filters[i].shortName; i++){
788 if( !strcmp(filters[i].longName, filterName)
789 || !strcmp(filters[i].shortName, filterName)){
790 ppMode->lumMode &= ~filters[i].mask;
791 ppMode->chromMode &= ~filters[i].mask;
794 if(!enable) break; // user wants to disable it
796 if(q >= filters[i].minLumQuality && luma)
797 ppMode->lumMode|= filters[i].mask;
798 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
799 if(q >= filters[i].minChromQuality)
800 ppMode->chromMode|= filters[i].mask;
802 if(filters[i].mask == LEVEL_FIX){
804 ppMode->minAllowedY= 16;
805 ppMode->maxAllowedY= 234;
806 for(o=0; options[o]; o++){
807 if( !strcmp(options[o],"fullyrange")
808 ||!strcmp(options[o],"f")){
809 ppMode->minAllowedY= 0;
810 ppMode->maxAllowedY= 255;
811 numOfUnknownOptions--;
815 else if(filters[i].mask == TEMP_NOISE_FILTER)
820 for(o=0; options[o]; o++){
822 ppMode->maxTmpNoise[numOfNoises]=
823 strtol(options[o], &tail, 0);
824 if(tail!=options[o]){
826 numOfUnknownOptions--;
827 if(numOfNoises >= 3) break;
831 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
832 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
835 for(o=0; options[o] && o<2; o++){
837 int val= strtol(options[o], &tail, 0);
838 if(tail==options[o]) break;
840 numOfUnknownOptions--;
841 if(o==0) ppMode->baseDcDiff= val;
842 else ppMode->flatnessThreshold= val;
845 else if(filters[i].mask == FORCE_QUANT){
847 ppMode->forcedQuant= 15;
849 for(o=0; options[o] && o<1; o++){
851 int val= strtol(options[o], &tail, 0);
852 if(tail==options[o]) break;
854 numOfUnknownOptions--;
855 ppMode->forcedQuant= val;
860 if(!filterNameOk) ppMode->error++;
861 ppMode->error += numOfUnknownOptions;
864 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
866 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
873 void pp_free_mode(pp_mode *mode){
877 static void reallocAlign(void **p, int size){
879 *p= av_mallocz(size);
882 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
883 int mbWidth = (width+15)>>4;
884 int mbHeight= (height+15)>>4;
888 c->qpStride= qpStride;
890 reallocAlign((void **)&c->tempDst, stride*24+32);
891 reallocAlign((void **)&c->tempSrc, stride*24);
892 reallocAlign((void **)&c->tempBlocks, 2*16*8);
893 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
895 c->yHistogram[i]= width*height/64*15/256;
898 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
899 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
900 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
903 reallocAlign((void **)&c->deintTemp, 2*width+32);
904 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
905 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
906 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
909 static const char * context_to_name(void * ptr) {
913 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
915 pp_context *pp_get_context(int width, int height, int cpuCaps){
916 PPContext *c= av_malloc(sizeof(PPContext));
917 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
918 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
920 memset(c, 0, sizeof(PPContext));
921 c->av_class = &av_codec_context_class;
922 if(cpuCaps&PP_FORMAT){
923 c->hChromaSubSample= cpuCaps&0x3;
924 c->vChromaSubSample= (cpuCaps>>4)&0x3;
926 c->hChromaSubSample= 1;
927 c->vChromaSubSample= 1;
929 if (cpuCaps & PP_CPU_CAPS_AUTO) {
930 c->cpuCaps = av_get_cpu_flags();
933 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
934 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
935 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
936 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
939 reallocBuffers(c, width, height, stride, qpStride);
946 void pp_free_context(void *vc){
947 PPContext *c = (PPContext*)vc;
950 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
951 av_free(c->tempBlurred[i]);
952 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
953 av_free(c->tempBlurredPast[i]);
955 av_free(c->tempBlocks);
956 av_free(c->yHistogram);
959 av_free(c->deintTemp);
960 av_free(c->stdQPTable);
961 av_free(c->nonBQPTable);
962 av_free(c->forcedQPTable);
964 memset(c, 0, sizeof(PPContext));
969 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
970 uint8_t * dst[3], const int dstStride[3],
971 int width, int height,
972 const QP_STORE_T *QP_store, int QPStride,
973 pp_mode *vm, void *vc, int pict_type)
975 int mbWidth = (width+15)>>4;
976 int mbHeight= (height+15)>>4;
979 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
980 int absQPStride = FFABS(QPStride);
982 // c->stride and c->QPStride are always positive
983 if(c->stride < minStride || c->qpStride < absQPStride)
984 reallocBuffers(c, width, height,
985 FFMAX(minStride, c->stride),
986 FFMAX(c->qpStride, absQPStride));
988 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
990 QP_store= c->forcedQPTable;
991 absQPStride = QPStride = 0;
992 if(mode->lumMode & FORCE_QUANT)
993 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
995 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
998 if(pict_type & PP_PICT_TYPE_QP2){
1000 const int count= FFMAX(mbHeight * absQPStride, mbWidth);
1001 for(i=0; i<(count>>2); i++){
1002 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1004 for(i<<=2; i<count; i++){
1005 c->stdQPTable[i] = QP_store[i]>>1;
1007 QP_store= c->stdQPTable;
1008 QPStride= absQPStride;
1013 for(y=0; y<mbHeight; y++){
1014 for(x=0; x<mbWidth; x++){
1015 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1017 av_log(c, AV_LOG_INFO, "\n");
1019 av_log(c, AV_LOG_INFO, "\n");
1022 if((pict_type&7)!=3){
1025 const int count= FFMAX(mbHeight * QPStride, mbWidth);
1026 for(i=0; i<(count>>2); i++){
1027 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1029 for(i<<=2; i<count; i++){
1030 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1034 for(i=0; i<mbHeight; i++) {
1035 for(j=0; j<absQPStride; j++) {
1036 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1042 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1043 mode->lumMode, mode->chromMode);
1045 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1046 width, height, QP_store, QPStride, 0, mode, c);
1048 if (!(src[1] && src[2] && dst[1] && dst[2]))
1051 width = (width )>>c->hChromaSubSample;
1052 height = (height)>>c->vChromaSubSample;
1054 if(mode->chromMode){
1055 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1056 width, height, QP_store, QPStride, 1, mode, c);
1057 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1058 width, height, QP_store, QPStride, 2, mode, c);
1060 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1061 linecpy(dst[1], src[1], height, srcStride[1]);
1062 linecpy(dst[2], src[2], height, srcStride[2]);
1065 for(y=0; y<height; y++){
1066 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1067 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);