2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
84 //#define HAVE_AMD3DNOW
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
92 unsigned postproc_version(void)
94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95 return LIBPOSTPROC_VERSION_INT;
98 const char *postproc_configuration(void)
100 return FFMPEG_CONFIGURATION;
103 const char *postproc_license(void)
105 #define LICENSE_PREFIX "libpostproc license: "
106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
113 #define GET_MODE_BUFFER_SIZE 500
114 #define OPTIONS_ARRAY_SIZE 10
116 #define TEMP_STRIDE 8
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
133 static struct PPFilter filters[]=
135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143 {"dr", "dering", 1, 5, 6, DERING},
144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153 {NULL, NULL,0,0,0,0} //End Marker
156 static const char *replaceTable[]=
158 "default", "hb:a,vb:a,dr:a",
159 "de", "hb:a,vb:a,dr:a",
160 "fast", "h1:a,v1:a,dr:a",
161 "fa", "h1:a,v1:a,dr:a",
162 "ac", "ha:a:128:7,va:a,dr:a",
168 static inline void prefetchnta(void *p)
170 __asm__ volatile( "prefetchnta (%0)\n\t"
175 static inline void prefetcht0(void *p)
177 __asm__ volatile( "prefetcht0 (%0)\n\t"
182 static inline void prefetcht1(void *p)
184 __asm__ volatile( "prefetcht1 (%0)\n\t"
189 static inline void prefetcht2(void *p)
191 __asm__ volatile( "prefetcht2 (%0)\n\t"
197 /* The horizontal functions exist only in C because the MMX
198 * code is faster with vertical filters and transposing. */
201 * Check if the given 8x8 Block is mostly "flat"
203 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
207 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
208 const int dcThreshold= dcOffset*2 + 1;
210 for(y=0; y<BLOCK_SIZE; y++){
211 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
216 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
217 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
220 return numEq > c->ppMode.flatnessThreshold;
224 * Check if the middle 8x8 Block in the given 8x16 block is flat
226 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
230 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
231 const int dcThreshold= dcOffset*2 + 1;
233 src+= stride*4; // src points to begin of the 8x8 Block
234 for(y=0; y<BLOCK_SIZE-1; y++){
235 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
241 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
242 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
245 return numEq > c->ppMode.flatnessThreshold;
248 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
252 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
254 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
256 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
258 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
264 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
268 for(x=0; x<BLOCK_SIZE; x+=4){
269 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
271 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
277 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
279 if( isHorizDC_C(src, stride, c) ){
280 if( isHorizMinMaxOk_C(src, stride, c->QP) )
289 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
291 if( isVertDC_C(src, stride, c) ){
292 if( isVertMinMaxOk_C(src, stride, c->QP) )
301 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
304 for(y=0; y<BLOCK_SIZE; y++){
305 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
307 if(FFABS(middleEnergy) < 8*c->QP){
308 const int q=(dst[3] - dst[4])/2;
309 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
310 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
312 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
316 d*= FFSIGN(-middleEnergy);
337 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
338 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
340 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
343 for(y=0; y<BLOCK_SIZE; y++){
344 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
345 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
348 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
349 sums[1] = sums[0] - first + dst[3];
350 sums[2] = sums[1] - first + dst[4];
351 sums[3] = sums[2] - first + dst[5];
352 sums[4] = sums[3] - first + dst[6];
353 sums[5] = sums[4] - dst[0] + dst[7];
354 sums[6] = sums[5] - dst[1] + last;
355 sums[7] = sums[6] - dst[2] + last;
356 sums[8] = sums[7] - dst[3] + last;
357 sums[9] = sums[8] - dst[4] + last;
359 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
360 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
361 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
362 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
363 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
364 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
365 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
366 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
373 * Experimental Filter 1 (Horizontal)
374 * will not damage linear gradients
375 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
376 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
377 * MMX2 version does correct clipping C version does not
378 * not identical with the vertical one
380 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
383 static uint64_t *lut= NULL;
387 lut = av_malloc(256*8);
390 int v= i < 128 ? 2*i : 2*(i-256);
392 //Simulate 112242211 9-Tap filter
393 uint64_t a= (v/16) & 0xFF;
394 uint64_t b= (v/8) & 0xFF;
395 uint64_t c= (v/4) & 0xFF;
396 uint64_t d= (3*v/8) & 0xFF;
398 //Simulate piecewise linear interpolation
399 uint64_t a= (v/16) & 0xFF;
400 uint64_t b= (v*3/16) & 0xFF;
401 uint64_t c= (v*5/16) & 0xFF;
402 uint64_t d= (7*v/16) & 0xFF;
403 uint64_t A= (0x100 - a)&0xFF;
404 uint64_t B= (0x100 - b)&0xFF;
405 uint64_t C= (0x100 - c)&0xFF;
406 uint64_t D= (0x100 - c)&0xFF;
408 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
409 (D<<24) | (C<<16) | (B<<8) | (A);
410 //lut[i] = (v<<32) | (v<<24);
414 for(y=0; y<BLOCK_SIZE; y++){
415 int a= src[1] - src[2];
416 int b= src[3] - src[4];
417 int c= src[5] - src[6];
419 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
422 int v = d * FFSIGN(-b);
436 * accurate deblock filter
438 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
441 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
442 const int dcThreshold= dcOffset*2 + 1;
444 src+= step*4; // src points to begin of the 8x8 Block
448 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
449 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
450 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
451 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
452 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
453 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
454 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
455 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
456 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
457 if(numEq > c->ppMode.flatnessThreshold){
460 if(src[0] > src[step]){
468 if(src[x*step] > src[(x+1)*step]){
469 if(src[x *step] > max) max= src[ x *step];
470 if(src[(x+1)*step] < min) min= src[(x+1)*step];
472 if(src[(x+1)*step] > max) max= src[(x+1)*step];
473 if(src[ x *step] < min) min= src[ x *step];
477 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
478 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
481 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
482 sums[1] = sums[0] - first + src[3*step];
483 sums[2] = sums[1] - first + src[4*step];
484 sums[3] = sums[2] - first + src[5*step];
485 sums[4] = sums[3] - first + src[6*step];
486 sums[5] = sums[4] - src[0*step] + src[7*step];
487 sums[6] = sums[5] - src[1*step] + last;
488 sums[7] = sums[6] - src[2*step] + last;
489 sums[8] = sums[7] - src[3*step] + last;
490 sums[9] = sums[8] - src[4*step] + last;
492 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
493 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
494 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
495 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
496 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
497 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
498 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
499 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
502 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
504 if(FFABS(middleEnergy) < 8*QP){
505 const int q=(src[3*step] - src[4*step])/2;
506 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
507 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
509 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
513 d*= FFSIGN(-middleEnergy);
537 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
539 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
544 #define COMPILE_ALTIVEC
545 #endif //HAVE_ALTIVEC
549 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
553 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
557 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
558 #define COMPILE_3DNOW
560 #endif /* ARCH_X86 */
567 #define HAVE_AMD3DNOW 0
569 #define HAVE_ALTIVEC 0
572 #define RENAME(a) a ## _C
573 #include "postprocess_template.c"
576 #ifdef COMPILE_ALTIVEC
579 #define HAVE_ALTIVEC 1
580 #define RENAME(a) a ## _altivec
581 #include "postprocess_altivec_template.c"
582 #include "postprocess_template.c"
590 #define RENAME(a) a ## _MMX
591 #include "postprocess_template.c"
601 #define RENAME(a) a ## _MMX2
602 #include "postprocess_template.c"
613 #define HAVE_AMD3DNOW 1
614 #define RENAME(a) a ## _3DNow
615 #include "postprocess_template.c"
618 // minor note: the HAVE_xyz is messed up after that line so do not use it.
620 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
621 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
623 PPContext *c= (PPContext *)vc;
624 PPMode *ppMode= (PPMode *)vm;
625 c->ppMode= *ppMode; //FIXME
627 // Using ifs here as they are faster than function pointers although the
628 // difference would not be measurable here but it is much better because
629 // someone might exchange the CPU whithout restarting MPlayer ;)
630 #if CONFIG_RUNTIME_CPUDETECT
632 // ordered per speed fastest first
633 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
634 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
635 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
636 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
637 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
638 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
640 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
643 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
644 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
647 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
649 #else /* CONFIG_RUNTIME_CPUDETECT */
651 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661 #endif /* !CONFIG_RUNTIME_CPUDETECT */
664 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
665 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
667 /* -pp Command line Help
669 #if LIBPOSTPROC_VERSION_INT < (52<<16)
670 const char *const pp_help=
672 const char pp_help[] =
674 "Available postprocessing filters:\n"
676 "short long name short long option Description\n"
677 "* * a autoq CPU power dependent enabler\n"
678 " c chrom chrominance filtering enabled\n"
679 " y nochrom chrominance filtering disabled\n"
680 " n noluma luma filtering disabled\n"
681 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
682 " 1. difference factor: default=32, higher -> more deblocking\n"
683 " 2. flatness threshold: default=39, lower -> more deblocking\n"
684 " the h & v deblocking filters share these\n"
685 " so you can't set different thresholds for h / v\n"
686 "vb vdeblock (2 threshold) vertical deblocking filter\n"
687 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
688 "va vadeblock (2 threshold) vertical deblocking filter\n"
689 "h1 x1hdeblock experimental h deblock filter 1\n"
690 "v1 x1vdeblock experimental v deblock filter 1\n"
691 "dr dering deringing filter\n"
692 "al autolevels automatic brightness / contrast\n"
693 " f fullyrange stretch luminance to (0..255)\n"
694 "lb linblenddeint linear blend deinterlacer\n"
695 "li linipoldeint linear interpolating deinterlace\n"
696 "ci cubicipoldeint cubic interpolating deinterlacer\n"
697 "md mediandeint median deinterlacer\n"
698 "fd ffmpegdeint ffmpeg deinterlacer\n"
699 "l5 lowpass5 FIR lowpass deinterlacer\n"
700 "de default hb:a,vb:a,dr:a\n"
701 "fa fast h1:a,v1:a,dr:a\n"
702 "ac ha:a:128:7,va:a,dr:a\n"
703 "tn tmpnoise (3 threshold) temporal noise reducer\n"
704 " 1. <= 2. <= 3. larger -> stronger filtering\n"
705 "fq forceQuant <quantizer> force quantizer\n"
707 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
708 "long form example:\n"
709 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
710 "short form example:\n"
711 "vb:a/hb:a/lb de,-vb\n"
717 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
719 char temp[GET_MODE_BUFFER_SIZE];
721 static const char filterDelimiters[] = ",/";
722 static const char optionDelimiters[] = ":";
723 struct PPMode *ppMode;
726 ppMode= av_malloc(sizeof(PPMode));
729 ppMode->chromMode= 0;
730 ppMode->maxTmpNoise[0]= 700;
731 ppMode->maxTmpNoise[1]= 1500;
732 ppMode->maxTmpNoise[2]= 3000;
733 ppMode->maxAllowedY= 234;
734 ppMode->minAllowedY= 16;
735 ppMode->baseDcDiff= 256/8;
736 ppMode->flatnessThreshold= 56-16-1;
737 ppMode->maxClippedThreshold= 0.01;
740 memset(temp, 0, GET_MODE_BUFFER_SIZE);
741 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
743 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
747 int q= 1000000; //PP_QUALITY_MAX;
751 char *options[OPTIONS_ARRAY_SIZE];
754 int numOfUnknownOptions=0;
755 int enable=1; //does the user want us to enabled or disabled the filter
757 filterToken= strtok(p, filterDelimiters);
758 if(filterToken == NULL) break;
759 p+= strlen(filterToken) + 1; // p points to next filterToken
760 filterName= strtok(filterToken, optionDelimiters);
761 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
763 if(*filterName == '-'){
768 for(;;){ //for all options
769 option= strtok(NULL, optionDelimiters);
770 if(option == NULL) break;
772 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
773 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
774 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
775 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
776 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
778 options[numOfUnknownOptions] = option;
779 numOfUnknownOptions++;
781 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
783 options[numOfUnknownOptions] = NULL;
785 /* replace stuff from the replace Table */
786 for(i=0; replaceTable[2*i]!=NULL; i++){
787 if(!strcmp(replaceTable[2*i], filterName)){
788 int newlen= strlen(replaceTable[2*i + 1]);
795 spaceLeft= p - temp + plen;
796 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
800 memmove(p + newlen, p, plen+1);
801 memcpy(p, replaceTable[2*i + 1], newlen);
806 for(i=0; filters[i].shortName!=NULL; i++){
807 if( !strcmp(filters[i].longName, filterName)
808 || !strcmp(filters[i].shortName, filterName)){
809 ppMode->lumMode &= ~filters[i].mask;
810 ppMode->chromMode &= ~filters[i].mask;
813 if(!enable) break; // user wants to disable it
815 if(q >= filters[i].minLumQuality && luma)
816 ppMode->lumMode|= filters[i].mask;
817 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
818 if(q >= filters[i].minChromQuality)
819 ppMode->chromMode|= filters[i].mask;
821 if(filters[i].mask == LEVEL_FIX){
823 ppMode->minAllowedY= 16;
824 ppMode->maxAllowedY= 234;
825 for(o=0; options[o]!=NULL; o++){
826 if( !strcmp(options[o],"fullyrange")
827 ||!strcmp(options[o],"f")){
828 ppMode->minAllowedY= 0;
829 ppMode->maxAllowedY= 255;
830 numOfUnknownOptions--;
834 else if(filters[i].mask == TEMP_NOISE_FILTER)
839 for(o=0; options[o]!=NULL; o++){
841 ppMode->maxTmpNoise[numOfNoises]=
842 strtol(options[o], &tail, 0);
843 if(tail!=options[o]){
845 numOfUnknownOptions--;
846 if(numOfNoises >= 3) break;
850 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
851 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
854 for(o=0; options[o]!=NULL && o<2; o++){
856 int val= strtol(options[o], &tail, 0);
857 if(tail==options[o]) break;
859 numOfUnknownOptions--;
860 if(o==0) ppMode->baseDcDiff= val;
861 else ppMode->flatnessThreshold= val;
864 else if(filters[i].mask == FORCE_QUANT){
866 ppMode->forcedQuant= 15;
868 for(o=0; options[o]!=NULL && o<1; o++){
870 int val= strtol(options[o], &tail, 0);
871 if(tail==options[o]) break;
873 numOfUnknownOptions--;
874 ppMode->forcedQuant= val;
879 if(!filterNameOk) ppMode->error++;
880 ppMode->error += numOfUnknownOptions;
883 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
885 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
892 void pp_free_mode(pp_mode *mode){
896 static void reallocAlign(void **p, int alignment, int size){
898 *p= av_mallocz(size);
901 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
902 int mbWidth = (width+15)>>4;
903 int mbHeight= (height+15)>>4;
907 c->qpStride= qpStride;
909 reallocAlign((void **)&c->tempDst, 8, stride*24);
910 reallocAlign((void **)&c->tempSrc, 8, stride*24);
911 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
912 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
914 c->yHistogram[i]= width*height/64*15/256;
917 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
918 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
919 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
922 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
923 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
924 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
925 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
928 static const char * context_to_name(void * ptr) {
932 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
934 pp_context *pp_get_context(int width, int height, int cpuCaps){
935 PPContext *c= av_malloc(sizeof(PPContext));
936 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
937 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
939 memset(c, 0, sizeof(PPContext));
940 c->av_class = &av_codec_context_class;
942 if(cpuCaps&PP_FORMAT){
943 c->hChromaSubSample= cpuCaps&0x3;
944 c->vChromaSubSample= (cpuCaps>>4)&0x3;
946 c->hChromaSubSample= 1;
947 c->vChromaSubSample= 1;
950 reallocBuffers(c, width, height, stride, qpStride);
957 void pp_free_context(void *vc){
958 PPContext *c = (PPContext*)vc;
961 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
962 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
964 av_free(c->tempBlocks);
965 av_free(c->yHistogram);
968 av_free(c->deintTemp);
969 av_free(c->stdQPTable);
970 av_free(c->nonBQPTable);
971 av_free(c->forcedQPTable);
973 memset(c, 0, sizeof(PPContext));
978 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
979 uint8_t * dst[3], const int dstStride[3],
980 int width, int height,
981 const QP_STORE_T *QP_store, int QPStride,
982 pp_mode *vm, void *vc, int pict_type)
984 int mbWidth = (width+15)>>4;
985 int mbHeight= (height+15)>>4;
986 PPMode *mode = (PPMode*)vm;
987 PPContext *c = (PPContext*)vc;
988 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
989 int absQPStride = FFABS(QPStride);
991 // c->stride and c->QPStride are always positive
992 if(c->stride < minStride || c->qpStride < absQPStride)
993 reallocBuffers(c, width, height,
994 FFMAX(minStride, c->stride),
995 FFMAX(c->qpStride, absQPStride));
997 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
999 QP_store= c->forcedQPTable;
1000 absQPStride = QPStride = 0;
1001 if(mode->lumMode & FORCE_QUANT)
1002 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1004 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1007 if(pict_type & PP_PICT_TYPE_QP2){
1009 const int count= mbHeight * absQPStride;
1010 for(i=0; i<(count>>2); i++){
1011 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1013 for(i<<=2; i<count; i++){
1014 c->stdQPTable[i] = QP_store[i]>>1;
1016 QP_store= c->stdQPTable;
1017 QPStride= absQPStride;
1022 for(y=0; y<mbHeight; y++){
1023 for(x=0; x<mbWidth; x++){
1024 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1026 av_log(c, AV_LOG_INFO, "\n");
1028 av_log(c, AV_LOG_INFO, "\n");
1031 if((pict_type&7)!=3){
1034 const int count= mbHeight * QPStride;
1035 for(i=0; i<(count>>2); i++){
1036 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1038 for(i<<=2; i<count; i++){
1039 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1043 for(i=0; i<mbHeight; i++) {
1044 for(j=0; j<absQPStride; j++) {
1045 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1051 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1052 mode->lumMode, mode->chromMode);
1054 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1055 width, height, QP_store, QPStride, 0, mode, c);
1057 width = (width )>>c->hChromaSubSample;
1058 height = (height)>>c->vChromaSubSample;
1060 if(mode->chromMode){
1061 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1062 width, height, QP_store, QPStride, 1, mode, c);
1063 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1064 width, height, QP_store, QPStride, 2, mode, c);
1066 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1067 linecpy(dst[1], src[1], height, srcStride[1]);
1068 linecpy(dst[2], src[2], height, srcStride[2]);
1071 for(y=0; y<height; y++){
1072 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1073 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);