2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
92 unsigned postproc_version(void)
94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95 return LIBPOSTPROC_VERSION_INT;
98 const char *postproc_configuration(void)
100 return FFMPEG_CONFIGURATION;
103 const char *postproc_license(void)
105 #define LICENSE_PREFIX "libpostproc license: "
106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
113 #define GET_MODE_BUFFER_SIZE 500
114 #define OPTIONS_ARRAY_SIZE 10
116 #define TEMP_STRIDE 8
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
119 #if ARCH_X86 && HAVE_INLINE_ASM
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
133 static const struct PPFilter filters[]=
135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143 {"dr", "dering", 1, 5, 6, DERING},
144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153 {"be", "bitexact", 1, 0, 0, BITEXACT},
154 {NULL, NULL,0,0,0,0} //End Marker
157 static const char *replaceTable[]=
159 "default", "hb:a,vb:a,dr:a",
160 "de", "hb:a,vb:a,dr:a",
161 "fast", "h1:a,v1:a,dr:a",
162 "fa", "h1:a,v1:a,dr:a",
163 "ac", "ha:a:128:7,va:a,dr:a",
168 #if ARCH_X86 && HAVE_INLINE_ASM
169 static inline void prefetchnta(void *p)
171 __asm__ volatile( "prefetchnta (%0)\n\t"
176 static inline void prefetcht0(void *p)
178 __asm__ volatile( "prefetcht0 (%0)\n\t"
183 static inline void prefetcht1(void *p)
185 __asm__ volatile( "prefetcht1 (%0)\n\t"
190 static inline void prefetcht2(void *p)
192 __asm__ volatile( "prefetcht2 (%0)\n\t"
198 /* The horizontal functions exist only in C because the MMX
199 * code is faster with vertical filters and transposing. */
202 * Check if the given 8x8 Block is mostly "flat"
204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
208 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
209 const int dcThreshold= dcOffset*2 + 1;
211 for(y=0; y<BLOCK_SIZE; y++){
212 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
216 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
217 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
218 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
221 return numEq > c->ppMode.flatnessThreshold;
225 * Check if the middle 8x8 Block in the given 8x16 block is flat
227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
231 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
232 const int dcThreshold= dcOffset*2 + 1;
234 src+= stride*4; // src points to begin of the 8x8 Block
235 for(y=0; y<BLOCK_SIZE-1; y++){
236 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
241 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
242 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
243 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
246 return numEq > c->ppMode.flatnessThreshold;
249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
253 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
255 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
257 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
259 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
269 for(x=0; x<BLOCK_SIZE; x+=4){
270 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
271 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
280 if( isHorizDC_C(src, stride, c) ){
281 if( isHorizMinMaxOk_C(src, stride, c->QP) )
290 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
292 if( isVertDC_C(src, stride, c) ){
293 if( isVertMinMaxOk_C(src, stride, c->QP) )
302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
305 for(y=0; y<BLOCK_SIZE; y++){
306 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
308 if(FFABS(middleEnergy) < 8*c->QP){
309 const int q=(dst[3] - dst[4])/2;
310 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
311 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
313 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
317 d*= FFSIGN(-middleEnergy);
338 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
339 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
344 for(y=0; y<BLOCK_SIZE; y++){
345 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
346 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
349 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
350 sums[1] = sums[0] - first + dst[3];
351 sums[2] = sums[1] - first + dst[4];
352 sums[3] = sums[2] - first + dst[5];
353 sums[4] = sums[3] - first + dst[6];
354 sums[5] = sums[4] - dst[0] + dst[7];
355 sums[6] = sums[5] - dst[1] + last;
356 sums[7] = sums[6] - dst[2] + last;
357 sums[8] = sums[7] - dst[3] + last;
358 sums[9] = sums[8] - dst[4] + last;
360 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
361 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
362 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
363 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
364 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
365 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
366 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
367 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
374 * Experimental Filter 1 (Horizontal)
375 * will not damage linear gradients
376 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
377 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
378 * MMX2 version does correct clipping C version does not
379 * not identical with the vertical one
381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
384 static uint64_t *lut= NULL;
388 lut = av_malloc(256*8);
391 int v= i < 128 ? 2*i : 2*(i-256);
393 //Simulate 112242211 9-Tap filter
394 uint64_t a= (v/16) & 0xFF;
395 uint64_t b= (v/8) & 0xFF;
396 uint64_t c= (v/4) & 0xFF;
397 uint64_t d= (3*v/8) & 0xFF;
399 //Simulate piecewise linear interpolation
400 uint64_t a= (v/16) & 0xFF;
401 uint64_t b= (v*3/16) & 0xFF;
402 uint64_t c= (v*5/16) & 0xFF;
403 uint64_t d= (7*v/16) & 0xFF;
404 uint64_t A= (0x100 - a)&0xFF;
405 uint64_t B= (0x100 - b)&0xFF;
406 uint64_t C= (0x100 - c)&0xFF;
407 uint64_t D= (0x100 - c)&0xFF;
409 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
410 (D<<24) | (C<<16) | (B<<8) | (A);
411 //lut[i] = (v<<32) | (v<<24);
415 for(y=0; y<BLOCK_SIZE; y++){
416 int a= src[1] - src[2];
417 int b= src[3] - src[4];
418 int c= src[5] - src[6];
420 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
423 int v = d * FFSIGN(-b);
437 * accurate deblock filter
439 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
440 int stride, const PPContext *c)
444 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
445 const int dcThreshold= dcOffset*2 + 1;
447 src+= step*4; // src points to begin of the 8x8 Block
451 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
452 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
453 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
454 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
455 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
456 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
457 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
458 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
459 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
460 if(numEq > c->ppMode.flatnessThreshold){
463 if(src[0] > src[step]){
471 if(src[x*step] > src[(x+1)*step]){
472 if(src[x *step] > max) max= src[ x *step];
473 if(src[(x+1)*step] < min) min= src[(x+1)*step];
475 if(src[(x+1)*step] > max) max= src[(x+1)*step];
476 if(src[ x *step] < min) min= src[ x *step];
480 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
481 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
484 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
485 sums[1] = sums[0] - first + src[3*step];
486 sums[2] = sums[1] - first + src[4*step];
487 sums[3] = sums[2] - first + src[5*step];
488 sums[4] = sums[3] - first + src[6*step];
489 sums[5] = sums[4] - src[0*step] + src[7*step];
490 sums[6] = sums[5] - src[1*step] + last;
491 sums[7] = sums[6] - src[2*step] + last;
492 sums[8] = sums[7] - src[3*step] + last;
493 sums[9] = sums[8] - src[4*step] + last;
495 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
496 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
497 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
498 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
499 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
500 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
501 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
502 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
505 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
507 if(FFABS(middleEnergy) < 8*QP){
508 const int q=(src[3*step] - src[4*step])/2;
509 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
510 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
512 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
516 d*= FFSIGN(-middleEnergy);
540 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
542 //we always compile C for testing which needs bitexactness
543 #define TEMPLATE_PP_C 1
544 #include "postprocess_template.c"
547 # define TEMPLATE_PP_ALTIVEC 1
548 # include "postprocess_altivec_template.c"
549 # include "postprocess_template.c"
552 #if ARCH_X86 && HAVE_INLINE_ASM
553 # if CONFIG_RUNTIME_CPUDETECT
554 # define TEMPLATE_PP_MMX 1
555 # include "postprocess_template.c"
556 # define TEMPLATE_PP_MMXEXT 1
557 # include "postprocess_template.c"
558 # define TEMPLATE_PP_3DNOW 1
559 # include "postprocess_template.c"
560 # define TEMPLATE_PP_SSE2 1
561 # include "postprocess_template.c"
563 # if HAVE_SSE2_INLINE
564 # define TEMPLATE_PP_SSE2 1
565 # include "postprocess_template.c"
566 # elif HAVE_MMXEXT_INLINE
567 # define TEMPLATE_PP_MMXEXT 1
568 # include "postprocess_template.c"
569 # elif HAVE_AMD3DNOW_INLINE
570 # define TEMPLATE_PP_3DNOW 1
571 # include "postprocess_template.c"
572 # elif HAVE_MMX_INLINE
573 # define TEMPLATE_PP_MMX 1
574 # include "postprocess_template.c"
579 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
580 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
582 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
583 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
585 pp_fn pp = postProcess_C;
586 PPContext *c= (PPContext *)vc;
587 PPMode *ppMode= (PPMode *)vm;
588 c->ppMode= *ppMode; //FIXME
590 if (!(ppMode->lumMode & BITEXACT)) {
591 #if CONFIG_RUNTIME_CPUDETECT
592 #if ARCH_X86 && HAVE_INLINE_ASM
593 // ordered per speed fastest first
594 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
595 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
596 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
597 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
599 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
601 #else /* CONFIG_RUNTIME_CPUDETECT */
603 pp = postProcess_SSE2;
604 #elif HAVE_MMXEXT_INLINE
605 pp = postProcess_MMX2;
606 #elif HAVE_AMD3DNOW_INLINE
607 pp = postProcess_3DNow;
608 #elif HAVE_MMX_INLINE
609 pp = postProcess_MMX;
611 pp = postProcess_altivec;
613 #endif /* !CONFIG_RUNTIME_CPUDETECT */
616 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
619 /* -pp Command line Help
621 #if LIBPOSTPROC_VERSION_INT < (52<<16)
622 const char *const pp_help=
624 const char pp_help[] =
626 "Available postprocessing filters:\n"
628 "short long name short long option Description\n"
629 "* * a autoq CPU power dependent enabler\n"
630 " c chrom chrominance filtering enabled\n"
631 " y nochrom chrominance filtering disabled\n"
632 " n noluma luma filtering disabled\n"
633 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
634 " 1. difference factor: default=32, higher -> more deblocking\n"
635 " 2. flatness threshold: default=39, lower -> more deblocking\n"
636 " the h & v deblocking filters share these\n"
637 " so you can't set different thresholds for h / v\n"
638 "vb vdeblock (2 threshold) vertical deblocking filter\n"
639 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
640 "va vadeblock (2 threshold) vertical deblocking filter\n"
641 "h1 x1hdeblock experimental h deblock filter 1\n"
642 "v1 x1vdeblock experimental v deblock filter 1\n"
643 "dr dering deringing filter\n"
644 "al autolevels automatic brightness / contrast\n"
645 " f fullyrange stretch luminance to (0..255)\n"
646 "lb linblenddeint linear blend deinterlacer\n"
647 "li linipoldeint linear interpolating deinterlace\n"
648 "ci cubicipoldeint cubic interpolating deinterlacer\n"
649 "md mediandeint median deinterlacer\n"
650 "fd ffmpegdeint ffmpeg deinterlacer\n"
651 "l5 lowpass5 FIR lowpass deinterlacer\n"
652 "de default hb:a,vb:a,dr:a\n"
653 "fa fast h1:a,v1:a,dr:a\n"
654 "ac ha:a:128:7,va:a,dr:a\n"
655 "tn tmpnoise (3 threshold) temporal noise reducer\n"
656 " 1. <= 2. <= 3. larger -> stronger filtering\n"
657 "fq forceQuant <quantizer> force quantizer\n"
659 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
660 "long form example:\n"
661 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
662 "short form example:\n"
663 "vb:a/hb:a/lb de,-vb\n"
669 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
671 char temp[GET_MODE_BUFFER_SIZE];
673 static const char filterDelimiters[] = ",/";
674 static const char optionDelimiters[] = ":";
675 struct PPMode *ppMode;
679 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
683 if (!strcmp(name, "help")) {
685 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
686 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
687 av_log(NULL, AV_LOG_INFO, "%s", temp);
692 ppMode= av_malloc(sizeof(PPMode));
695 ppMode->chromMode= 0;
696 ppMode->maxTmpNoise[0]= 700;
697 ppMode->maxTmpNoise[1]= 1500;
698 ppMode->maxTmpNoise[2]= 3000;
699 ppMode->maxAllowedY= 234;
700 ppMode->minAllowedY= 16;
701 ppMode->baseDcDiff= 256/8;
702 ppMode->flatnessThreshold= 56-16-1;
703 ppMode->maxClippedThreshold= 0.01;
706 memset(temp, 0, GET_MODE_BUFFER_SIZE);
707 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
709 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
713 int q= 1000000; //PP_QUALITY_MAX;
717 char *options[OPTIONS_ARRAY_SIZE];
720 int numOfUnknownOptions=0;
721 int enable=1; //does the user want us to enabled or disabled the filter
723 filterToken= strtok(p, filterDelimiters);
724 if(filterToken == NULL) break;
725 p+= strlen(filterToken) + 1; // p points to next filterToken
726 filterName= strtok(filterToken, optionDelimiters);
727 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
729 if(*filterName == '-'){
734 for(;;){ //for all options
735 option= strtok(NULL, optionDelimiters);
736 if(option == NULL) break;
738 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
739 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
740 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
741 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
742 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
744 options[numOfUnknownOptions] = option;
745 numOfUnknownOptions++;
747 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
749 options[numOfUnknownOptions] = NULL;
751 /* replace stuff from the replace Table */
752 for(i=0; replaceTable[2*i]!=NULL; i++){
753 if(!strcmp(replaceTable[2*i], filterName)){
754 int newlen= strlen(replaceTable[2*i + 1]);
761 spaceLeft= p - temp + plen;
762 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
766 memmove(p + newlen, p, plen+1);
767 memcpy(p, replaceTable[2*i + 1], newlen);
772 for(i=0; filters[i].shortName!=NULL; i++){
773 if( !strcmp(filters[i].longName, filterName)
774 || !strcmp(filters[i].shortName, filterName)){
775 ppMode->lumMode &= ~filters[i].mask;
776 ppMode->chromMode &= ~filters[i].mask;
779 if(!enable) break; // user wants to disable it
781 if(q >= filters[i].minLumQuality && luma)
782 ppMode->lumMode|= filters[i].mask;
783 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
784 if(q >= filters[i].minChromQuality)
785 ppMode->chromMode|= filters[i].mask;
787 if(filters[i].mask == LEVEL_FIX){
789 ppMode->minAllowedY= 16;
790 ppMode->maxAllowedY= 234;
791 for(o=0; options[o]!=NULL; o++){
792 if( !strcmp(options[o],"fullyrange")
793 ||!strcmp(options[o],"f")){
794 ppMode->minAllowedY= 0;
795 ppMode->maxAllowedY= 255;
796 numOfUnknownOptions--;
800 else if(filters[i].mask == TEMP_NOISE_FILTER)
805 for(o=0; options[o]!=NULL; o++){
807 ppMode->maxTmpNoise[numOfNoises]=
808 strtol(options[o], &tail, 0);
809 if(tail!=options[o]){
811 numOfUnknownOptions--;
812 if(numOfNoises >= 3) break;
816 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
817 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
820 for(o=0; options[o]!=NULL && o<2; o++){
822 int val= strtol(options[o], &tail, 0);
823 if(tail==options[o]) break;
825 numOfUnknownOptions--;
826 if(o==0) ppMode->baseDcDiff= val;
827 else ppMode->flatnessThreshold= val;
830 else if(filters[i].mask == FORCE_QUANT){
832 ppMode->forcedQuant= 15;
834 for(o=0; options[o]!=NULL && o<1; o++){
836 int val= strtol(options[o], &tail, 0);
837 if(tail==options[o]) break;
839 numOfUnknownOptions--;
840 ppMode->forcedQuant= val;
845 if(!filterNameOk) ppMode->error++;
846 ppMode->error += numOfUnknownOptions;
849 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
851 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
858 void pp_free_mode(pp_mode *mode){
862 static void reallocAlign(void **p, int alignment, int size){
864 *p= av_mallocz(size);
867 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
868 int mbWidth = (width+15)>>4;
869 int mbHeight= (height+15)>>4;
873 c->qpStride= qpStride;
875 reallocAlign((void **)&c->tempDst, 8, stride*24);
876 reallocAlign((void **)&c->tempSrc, 8, stride*24);
877 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
878 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
880 c->yHistogram[i]= width*height/64*15/256;
883 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
884 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
885 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
888 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
889 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
890 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
891 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
894 static const char * context_to_name(void * ptr) {
898 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
900 pp_context *pp_get_context(int width, int height, int cpuCaps){
901 PPContext *c= av_malloc(sizeof(PPContext));
902 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
903 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
905 memset(c, 0, sizeof(PPContext));
906 c->av_class = &av_codec_context_class;
907 if(cpuCaps&PP_FORMAT){
908 c->hChromaSubSample= cpuCaps&0x3;
909 c->vChromaSubSample= (cpuCaps>>4)&0x3;
911 c->hChromaSubSample= 1;
912 c->vChromaSubSample= 1;
914 if (cpuCaps & PP_CPU_CAPS_AUTO) {
915 c->cpuCaps = av_get_cpu_flags();
918 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
919 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
920 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
921 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
924 reallocBuffers(c, width, height, stride, qpStride);
931 void pp_free_context(void *vc){
932 PPContext *c = (PPContext*)vc;
935 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
936 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
938 av_free(c->tempBlocks);
939 av_free(c->yHistogram);
942 av_free(c->deintTemp);
943 av_free(c->stdQPTable);
944 av_free(c->nonBQPTable);
945 av_free(c->forcedQPTable);
947 memset(c, 0, sizeof(PPContext));
952 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
953 uint8_t * dst[3], const int dstStride[3],
954 int width, int height,
955 const QP_STORE_T *QP_store, int QPStride,
956 pp_mode *vm, void *vc, int pict_type)
958 int mbWidth = (width+15)>>4;
959 int mbHeight= (height+15)>>4;
960 PPMode *mode = (PPMode*)vm;
961 PPContext *c = (PPContext*)vc;
962 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
963 int absQPStride = FFABS(QPStride);
965 // c->stride and c->QPStride are always positive
966 if(c->stride < minStride || c->qpStride < absQPStride)
967 reallocBuffers(c, width, height,
968 FFMAX(minStride, c->stride),
969 FFMAX(c->qpStride, absQPStride));
971 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
973 QP_store= c->forcedQPTable;
974 absQPStride = QPStride = 0;
975 if(mode->lumMode & FORCE_QUANT)
976 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
978 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
981 if(pict_type & PP_PICT_TYPE_QP2){
983 const int count= mbHeight * absQPStride;
984 for(i=0; i<(count>>2); i++){
985 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
987 for(i<<=2; i<count; i++){
988 c->stdQPTable[i] = QP_store[i]>>1;
990 QP_store= c->stdQPTable;
991 QPStride= absQPStride;
996 for(y=0; y<mbHeight; y++){
997 for(x=0; x<mbWidth; x++){
998 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1000 av_log(c, AV_LOG_INFO, "\n");
1002 av_log(c, AV_LOG_INFO, "\n");
1005 if((pict_type&7)!=3){
1008 const int count= mbHeight * QPStride;
1009 for(i=0; i<(count>>2); i++){
1010 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1012 for(i<<=2; i<count; i++){
1013 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1017 for(i=0; i<mbHeight; i++) {
1018 for(j=0; j<absQPStride; j++) {
1019 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1025 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1026 mode->lumMode, mode->chromMode);
1028 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1029 width, height, QP_store, QPStride, 0, mode, c);
1031 width = (width )>>c->hChromaSubSample;
1032 height = (height)>>c->vChromaSubSample;
1034 if(mode->chromMode){
1035 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1036 width, height, QP_store, QPStride, 1, mode, c);
1037 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1038 width, height, QP_store, QPStride, 2, mode, c);
1040 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1041 linecpy(dst[1], src[1], height, srcStride[1]);
1042 linecpy(dst[2], src[2], height, srcStride[2]);
1045 for(y=0; y<height; y++){
1046 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1047 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);