2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
92 unsigned postproc_version(void)
94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95 return LIBPOSTPROC_VERSION_INT;
98 const char *postproc_configuration(void)
100 return FFMPEG_CONFIGURATION;
103 const char *postproc_license(void)
105 #define LICENSE_PREFIX "libpostproc license: "
106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
113 #define GET_MODE_BUFFER_SIZE 500
114 #define OPTIONS_ARRAY_SIZE 10
116 #define TEMP_STRIDE 8
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
119 #if ARCH_X86 && HAVE_INLINE_ASM
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
133 static const struct PPFilter filters[]=
135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143 {"dr", "dering", 1, 5, 6, DERING},
144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153 {"be", "bitexact", 1, 0, 0, BITEXACT},
154 {"vi", "visualize", 1, 0, 0, VISUALIZE},
155 {NULL, NULL,0,0,0,0} //End Marker
158 static const char * const replaceTable[]=
160 "default", "hb:a,vb:a,dr:a",
161 "de", "hb:a,vb:a,dr:a",
162 "fast", "h1:a,v1:a,dr:a",
163 "fa", "h1:a,v1:a,dr:a",
164 "ac", "ha:a:128:7,va:a,dr:a",
169 #if ARCH_X86 && HAVE_INLINE_ASM
170 static inline void prefetchnta(const void *p)
172 __asm__ volatile( "prefetchnta (%0)\n\t"
177 static inline void prefetcht0(const void *p)
179 __asm__ volatile( "prefetcht0 (%0)\n\t"
184 static inline void prefetcht1(const void *p)
186 __asm__ volatile( "prefetcht1 (%0)\n\t"
191 static inline void prefetcht2(const void *p)
193 __asm__ volatile( "prefetcht2 (%0)\n\t"
199 /* The horizontal functions exist only in C because the MMX
200 * code is faster with vertical filters and transposing. */
203 * Check if the given 8x8 Block is mostly "flat"
205 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
209 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
210 const int dcThreshold= dcOffset*2 + 1;
212 for(y=0; y<BLOCK_SIZE; y++){
213 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
214 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
215 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
216 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
217 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
218 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
219 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
222 return numEq > c->ppMode.flatnessThreshold;
226 * Check if the middle 8x8 Block in the given 8x16 block is flat
228 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
232 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
233 const int dcThreshold= dcOffset*2 + 1;
235 src+= stride*4; // src points to begin of the 8x8 Block
236 for(y=0; y<BLOCK_SIZE-1; y++){
237 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
238 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
239 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
240 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
241 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
242 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
243 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
244 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
247 return numEq > c->ppMode.flatnessThreshold;
250 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
254 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
256 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
258 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
260 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
266 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
270 for(x=0; x<BLOCK_SIZE; x+=4){
271 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
281 if( isHorizDC_C(src, stride, c) ){
282 return isHorizMinMaxOk_C(src, stride, c->QP);
288 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
290 if( isVertDC_C(src, stride, c) ){
291 return isVertMinMaxOk_C(src, stride, c->QP);
297 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
300 for(y=0; y<BLOCK_SIZE; y++){
301 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
303 if(FFABS(middleEnergy) < 8*c->QP){
304 const int q=(dst[3] - dst[4])/2;
305 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
306 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
308 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
312 d*= FFSIGN(-middleEnergy);
333 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
334 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
336 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
339 for(y=0; y<BLOCK_SIZE; y++){
340 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
341 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
344 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
345 sums[1] = sums[0] - first + dst[3];
346 sums[2] = sums[1] - first + dst[4];
347 sums[3] = sums[2] - first + dst[5];
348 sums[4] = sums[3] - first + dst[6];
349 sums[5] = sums[4] - dst[0] + dst[7];
350 sums[6] = sums[5] - dst[1] + last;
351 sums[7] = sums[6] - dst[2] + last;
352 sums[8] = sums[7] - dst[3] + last;
353 sums[9] = sums[8] - dst[4] + last;
355 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
356 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
357 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
358 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
359 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
360 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
361 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
362 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
369 * Experimental Filter 1 (Horizontal)
370 * will not damage linear gradients
371 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
372 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
373 * MMX2 version does correct clipping C version does not
374 * not identical with the vertical one
376 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
379 static uint64_t lut[256];
385 int v= i < 128 ? 2*i : 2*(i-256);
387 //Simulate 112242211 9-Tap filter
388 uint64_t a= (v/16) & 0xFF;
389 uint64_t b= (v/8) & 0xFF;
390 uint64_t c= (v/4) & 0xFF;
391 uint64_t d= (3*v/8) & 0xFF;
393 //Simulate piecewise linear interpolation
394 uint64_t a= (v/16) & 0xFF;
395 uint64_t b= (v*3/16) & 0xFF;
396 uint64_t c= (v*5/16) & 0xFF;
397 uint64_t d= (7*v/16) & 0xFF;
398 uint64_t A= (0x100 - a)&0xFF;
399 uint64_t B= (0x100 - b)&0xFF;
400 uint64_t C= (0x100 - c)&0xFF;
401 uint64_t D= (0x100 - c)&0xFF;
403 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
404 (D<<24) | (C<<16) | (B<<8) | (A);
405 //lut[i] = (v<<32) | (v<<24);
409 for(y=0; y<BLOCK_SIZE; y++){
410 int a= src[1] - src[2];
411 int b= src[3] - src[4];
412 int c= src[5] - src[6];
414 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
417 int v = d * FFSIGN(-b);
431 * accurate deblock filter
433 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
434 int stride, const PPContext *c, int mode)
438 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
439 const int dcThreshold= dcOffset*2 + 1;
441 src+= step*4; // src points to begin of the 8x8 Block
445 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
446 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
447 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
448 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
449 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
450 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
451 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
452 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
453 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
454 if(numEq > c->ppMode.flatnessThreshold){
457 if(src[0] > src[step]){
465 if(src[x*step] > src[(x+1)*step]){
466 if(src[x *step] > max) max= src[ x *step];
467 if(src[(x+1)*step] < min) min= src[(x+1)*step];
469 if(src[(x+1)*step] > max) max= src[(x+1)*step];
470 if(src[ x *step] < min) min= src[ x *step];
474 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
475 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
478 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
479 sums[1] = sums[0] - first + src[3*step];
480 sums[2] = sums[1] - first + src[4*step];
481 sums[3] = sums[2] - first + src[5*step];
482 sums[4] = sums[3] - first + src[6*step];
483 sums[5] = sums[4] - src[0*step] + src[7*step];
484 sums[6] = sums[5] - src[1*step] + last;
485 sums[7] = sums[6] - src[2*step] + last;
486 sums[8] = sums[7] - src[3*step] + last;
487 sums[9] = sums[8] - src[4*step] + last;
489 if (mode & VISUALIZE) {
499 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
500 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
501 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
502 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
503 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
504 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
505 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
506 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
509 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
511 if(FFABS(middleEnergy) < 8*QP){
512 const int q=(src[3*step] - src[4*step])/2;
513 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
514 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
516 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
520 d*= FFSIGN(-middleEnergy);
530 if ((mode & VISUALIZE) && d) {
531 d= (d < 0) ? 32 : -32;
532 src[3*step]= av_clip_uint8(src[3*step] - d);
533 src[4*step]= av_clip_uint8(src[4*step] + d);
551 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
553 //we always compile C for testing which needs bitexactness
554 #define TEMPLATE_PP_C 1
555 #include "postprocess_template.c"
558 # define TEMPLATE_PP_ALTIVEC 1
559 # include "postprocess_altivec_template.c"
560 # include "postprocess_template.c"
563 #if ARCH_X86 && HAVE_INLINE_ASM
564 # if CONFIG_RUNTIME_CPUDETECT
565 # define TEMPLATE_PP_MMX 1
566 # include "postprocess_template.c"
567 # define TEMPLATE_PP_MMXEXT 1
568 # include "postprocess_template.c"
569 # define TEMPLATE_PP_3DNOW 1
570 # include "postprocess_template.c"
571 # define TEMPLATE_PP_SSE2 1
572 # include "postprocess_template.c"
574 # if HAVE_SSE2_INLINE
575 # define TEMPLATE_PP_SSE2 1
576 # include "postprocess_template.c"
577 # elif HAVE_MMXEXT_INLINE
578 # define TEMPLATE_PP_MMXEXT 1
579 # include "postprocess_template.c"
580 # elif HAVE_AMD3DNOW_INLINE
581 # define TEMPLATE_PP_3DNOW 1
582 # include "postprocess_template.c"
583 # elif HAVE_MMX_INLINE
584 # define TEMPLATE_PP_MMX 1
585 # include "postprocess_template.c"
590 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
591 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
593 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
594 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
596 pp_fn pp = postProcess_C;
597 PPContext *c= (PPContext *)vc;
598 PPMode *ppMode= (PPMode *)vm;
599 c->ppMode= *ppMode; //FIXME
601 if (!(ppMode->lumMode & BITEXACT)) {
602 #if CONFIG_RUNTIME_CPUDETECT
603 #if ARCH_X86 && HAVE_INLINE_ASM
604 // ordered per speed fastest first
605 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
606 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
607 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
608 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
610 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
612 #else /* CONFIG_RUNTIME_CPUDETECT */
614 pp = postProcess_SSE2;
615 #elif HAVE_MMXEXT_INLINE
616 pp = postProcess_MMX2;
617 #elif HAVE_AMD3DNOW_INLINE
618 pp = postProcess_3DNow;
619 #elif HAVE_MMX_INLINE
620 pp = postProcess_MMX;
622 pp = postProcess_altivec;
624 #endif /* !CONFIG_RUNTIME_CPUDETECT */
627 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
630 /* -pp Command line Help
632 const char pp_help[] =
633 "Available postprocessing filters:\n"
635 "short long name short long option Description\n"
636 "* * a autoq CPU power dependent enabler\n"
637 " c chrom chrominance filtering enabled\n"
638 " y nochrom chrominance filtering disabled\n"
639 " n noluma luma filtering disabled\n"
640 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
641 " 1. difference factor: default=32, higher -> more deblocking\n"
642 " 2. flatness threshold: default=39, lower -> more deblocking\n"
643 " the h & v deblocking filters share these\n"
644 " so you can't set different thresholds for h / v\n"
645 "vb vdeblock (2 threshold) vertical deblocking filter\n"
646 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
647 "va vadeblock (2 threshold) vertical deblocking filter\n"
648 "h1 x1hdeblock experimental h deblock filter 1\n"
649 "v1 x1vdeblock experimental v deblock filter 1\n"
650 "dr dering deringing filter\n"
651 "al autolevels automatic brightness / contrast\n"
652 " f fullyrange stretch luminance to (0..255)\n"
653 "lb linblenddeint linear blend deinterlacer\n"
654 "li linipoldeint linear interpolating deinterlace\n"
655 "ci cubicipoldeint cubic interpolating deinterlacer\n"
656 "md mediandeint median deinterlacer\n"
657 "fd ffmpegdeint ffmpeg deinterlacer\n"
658 "l5 lowpass5 FIR lowpass deinterlacer\n"
659 "de default hb:a,vb:a,dr:a\n"
660 "fa fast h1:a,v1:a,dr:a\n"
661 "ac ha:a:128:7,va:a,dr:a\n"
662 "tn tmpnoise (3 threshold) temporal noise reducer\n"
663 " 1. <= 2. <= 3. larger -> stronger filtering\n"
664 "fq forceQuant <quantizer> force quantizer\n"
666 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
667 "long form example:\n"
668 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
669 "short form example:\n"
670 "vb:a/hb:a/lb de,-vb\n"
676 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
678 char temp[GET_MODE_BUFFER_SIZE];
680 static const char filterDelimiters[] = ",/";
681 static const char optionDelimiters[] = ":|";
682 struct PPMode *ppMode;
686 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
690 if (!strcmp(name, "help")) {
692 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
693 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
694 av_log(NULL, AV_LOG_INFO, "%s", temp);
699 ppMode= av_malloc(sizeof(PPMode));
702 ppMode->chromMode= 0;
703 ppMode->maxTmpNoise[0]= 700;
704 ppMode->maxTmpNoise[1]= 1500;
705 ppMode->maxTmpNoise[2]= 3000;
706 ppMode->maxAllowedY= 234;
707 ppMode->minAllowedY= 16;
708 ppMode->baseDcDiff= 256/8;
709 ppMode->flatnessThreshold= 56-16-1;
710 ppMode->maxClippedThreshold= 0.01;
713 memset(temp, 0, GET_MODE_BUFFER_SIZE);
714 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
716 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
719 const char *filterName;
720 int q= 1000000; //PP_QUALITY_MAX;
724 const char *options[OPTIONS_ARRAY_SIZE];
727 int numOfUnknownOptions=0;
728 int enable=1; //does the user want us to enabled or disabled the filter
731 filterToken= av_strtok(p, filterDelimiters, &tokstate);
732 if(!filterToken) break;
733 p+= strlen(filterToken) + 1; // p points to next filterToken
734 filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
739 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
741 if(*filterName == '-'){
746 for(;;){ //for all options
747 option= av_strtok(NULL, optionDelimiters, &tokstate);
750 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
751 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
752 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
753 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
754 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
756 options[numOfUnknownOptions] = option;
757 numOfUnknownOptions++;
759 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
761 options[numOfUnknownOptions] = NULL;
763 /* replace stuff from the replace Table */
764 for(i=0; replaceTable[2*i]; i++){
765 if(!strcmp(replaceTable[2*i], filterName)){
766 int newlen= strlen(replaceTable[2*i + 1]);
773 spaceLeft= p - temp + plen;
774 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
778 memmove(p + newlen, p, plen+1);
779 memcpy(p, replaceTable[2*i + 1], newlen);
784 for(i=0; filters[i].shortName; i++){
785 if( !strcmp(filters[i].longName, filterName)
786 || !strcmp(filters[i].shortName, filterName)){
787 ppMode->lumMode &= ~filters[i].mask;
788 ppMode->chromMode &= ~filters[i].mask;
791 if(!enable) break; // user wants to disable it
793 if(q >= filters[i].minLumQuality && luma)
794 ppMode->lumMode|= filters[i].mask;
795 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
796 if(q >= filters[i].minChromQuality)
797 ppMode->chromMode|= filters[i].mask;
799 if(filters[i].mask == LEVEL_FIX){
801 ppMode->minAllowedY= 16;
802 ppMode->maxAllowedY= 234;
803 for(o=0; options[o]; o++){
804 if( !strcmp(options[o],"fullyrange")
805 ||!strcmp(options[o],"f")){
806 ppMode->minAllowedY= 0;
807 ppMode->maxAllowedY= 255;
808 numOfUnknownOptions--;
812 else if(filters[i].mask == TEMP_NOISE_FILTER)
817 for(o=0; options[o]; o++){
819 ppMode->maxTmpNoise[numOfNoises]=
820 strtol(options[o], &tail, 0);
821 if(tail!=options[o]){
823 numOfUnknownOptions--;
824 if(numOfNoises >= 3) break;
828 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
829 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
832 for(o=0; options[o] && o<2; o++){
834 int val= strtol(options[o], &tail, 0);
835 if(tail==options[o]) break;
837 numOfUnknownOptions--;
838 if(o==0) ppMode->baseDcDiff= val;
839 else ppMode->flatnessThreshold= val;
842 else if(filters[i].mask == FORCE_QUANT){
844 ppMode->forcedQuant= 15;
846 for(o=0; options[o] && o<1; o++){
848 int val= strtol(options[o], &tail, 0);
849 if(tail==options[o]) break;
851 numOfUnknownOptions--;
852 ppMode->forcedQuant= val;
857 if(!filterNameOk) ppMode->error++;
858 ppMode->error += numOfUnknownOptions;
861 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
863 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
870 void pp_free_mode(pp_mode *mode){
874 static void reallocAlign(void **p, int size){
876 *p= av_mallocz(size);
879 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
880 int mbWidth = (width+15)>>4;
881 int mbHeight= (height+15)>>4;
885 c->qpStride= qpStride;
887 reallocAlign((void **)&c->tempDst, stride*24+32);
888 reallocAlign((void **)&c->tempSrc, stride*24);
889 reallocAlign((void **)&c->tempBlocks, 2*16*8);
890 reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
892 c->yHistogram[i]= width*height/64*15/256;
895 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
896 reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
897 reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
900 reallocAlign((void **)&c->deintTemp, 2*width+32);
901 reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
902 reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
903 reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
906 static const char * context_to_name(void * ptr) {
910 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
912 pp_context *pp_get_context(int width, int height, int cpuCaps){
913 PPContext *c= av_malloc(sizeof(PPContext));
914 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
915 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
917 memset(c, 0, sizeof(PPContext));
918 c->av_class = &av_codec_context_class;
919 if(cpuCaps&PP_FORMAT){
920 c->hChromaSubSample= cpuCaps&0x3;
921 c->vChromaSubSample= (cpuCaps>>4)&0x3;
923 c->hChromaSubSample= 1;
924 c->vChromaSubSample= 1;
926 if (cpuCaps & PP_CPU_CAPS_AUTO) {
927 c->cpuCaps = av_get_cpu_flags();
930 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
931 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
932 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
933 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
936 reallocBuffers(c, width, height, stride, qpStride);
943 void pp_free_context(void *vc){
944 PPContext *c = (PPContext*)vc;
947 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
948 av_free(c->tempBlurred[i]);
949 for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
950 av_free(c->tempBlurredPast[i]);
952 av_free(c->tempBlocks);
953 av_free(c->yHistogram);
956 av_free(c->deintTemp);
957 av_free(c->stdQPTable);
958 av_free(c->nonBQPTable);
959 av_free(c->forcedQPTable);
961 memset(c, 0, sizeof(PPContext));
966 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
967 uint8_t * dst[3], const int dstStride[3],
968 int width, int height,
969 const QP_STORE_T *QP_store, int QPStride,
970 pp_mode *vm, void *vc, int pict_type)
972 int mbWidth = (width+15)>>4;
973 int mbHeight= (height+15)>>4;
976 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
977 int absQPStride = FFABS(QPStride);
979 // c->stride and c->QPStride are always positive
980 if(c->stride < minStride || c->qpStride < absQPStride)
981 reallocBuffers(c, width, height,
982 FFMAX(minStride, c->stride),
983 FFMAX(c->qpStride, absQPStride));
985 if(!QP_store || (mode->lumMode & FORCE_QUANT)){
987 QP_store= c->forcedQPTable;
988 absQPStride = QPStride = 0;
989 if(mode->lumMode & FORCE_QUANT)
990 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
992 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
995 if(pict_type & PP_PICT_TYPE_QP2){
997 const int count= mbHeight * absQPStride;
998 for(i=0; i<(count>>2); i++){
999 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1001 for(i<<=2; i<count; i++){
1002 c->stdQPTable[i] = QP_store[i]>>1;
1004 QP_store= c->stdQPTable;
1005 QPStride= absQPStride;
1010 for(y=0; y<mbHeight; y++){
1011 for(x=0; x<mbWidth; x++){
1012 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1014 av_log(c, AV_LOG_INFO, "\n");
1016 av_log(c, AV_LOG_INFO, "\n");
1019 if((pict_type&7)!=3){
1022 const int count= mbHeight * QPStride;
1023 for(i=0; i<(count>>2); i++){
1024 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1026 for(i<<=2; i<count; i++){
1027 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1031 for(i=0; i<mbHeight; i++) {
1032 for(j=0; j<absQPStride; j++) {
1033 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1039 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1040 mode->lumMode, mode->chromMode);
1042 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1043 width, height, QP_store, QPStride, 0, mode, c);
1045 width = (width )>>c->hChromaSubSample;
1046 height = (height)>>c->vChromaSubSample;
1048 if(mode->chromMode){
1049 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1050 width, height, QP_store, QPStride, 1, mode, c);
1051 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1052 width, height, QP_store, QPStride, 2, mode, c);
1054 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1055 linecpy(dst[1], src[1], height, srcStride[1]);
1056 linecpy(dst[2], src[2], height, srcStride[2]);
1059 for(y=0; y<height; y++){
1060 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1061 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);