2 Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 doVertDefFilter Ec Ec e e
28 doHorizDefFilter Ec Ec e e
30 Vertical RKAlgo1 E a a
31 Horizontal RKAlgo1 a a
34 LinIpolDeinterlace e E E*
35 CubicIpolDeinterlace a e e*
36 LinBlendDeinterlace e E E*
37 MedianDeinterlace# Ec Ec
40 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41 # more or less selfinvented filters so the exactness isnt too meaningfull
42 E = Exact implementation
43 e = allmost exact implementation (slightly different rounding,...)
44 a = alternative / approximate impl
45 c = checked against the other implementations (-vo md5)
50 remove global/static vars
51 reduce the time wasted on the mem transfer
52 implement everything in C at least (done at the moment but ...)
53 unroll stuff if instructions depend too much on the prior one
54 we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
55 move YScale thing to the end instead of fixing QP
56 write a faster and higher quality deblocking filter :)
57 make the mainloop more flexible (variable number of blocks at once
58 (the if/else stuff per block is slowing things down)
59 compare the quality & speed of all filters
63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
65 commandline option for the deblock / dering thresholds
67 dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect uses ARCH_X86)
71 //Changelog: use the CVS log
73 #include "../config.h"
86 //#define DEBUG_BRIGHTNESS
87 //#include "../libvo/fastmemcpy.h"
88 #include "postprocess.h"
89 #include "../cpudetect.h"
90 #include "../mangle.h"
92 #define MIN(a,b) ((a) > (b) ? (b) : (a))
93 #define MAX(a,b) ((a) < (b) ? (b) : (a))
94 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
95 #define SIGN(a) ((a) > 0 ? 1 : -1)
97 #define GET_MODE_BUFFER_SIZE 500
98 #define OPTIONS_ARRAY_SIZE 10
101 #define CAN_COMPILE_X86_ASM
104 #ifdef CAN_COMPILE_X86_ASM
105 static volatile uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL;
106 static volatile uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL;
107 static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
108 static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
109 static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL;
110 static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL;
111 static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL;
112 static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL;
113 static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL;
114 static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL;
115 static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL;
116 static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL;
117 static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL;
118 static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL;
119 static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL;
120 static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL;
121 static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL;
122 static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
123 static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
124 static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
125 static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL;
126 static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL;
127 static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
128 static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL;
129 static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL;
130 static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
131 static uint64_t __attribute__((aligned(8))) mmxDCOffset= 0x7E7E7E7E7E7E7E7ELL;
132 static uint64_t __attribute__((aligned(8))) mmxDCThreshold= 0x7C7C7C7C7C7C7C7CLL;
133 static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL;
134 static uint64_t __attribute__((aligned(8))) temp0=0;
135 static uint64_t __attribute__((aligned(8))) temp1=0;
136 static uint64_t __attribute__((aligned(8))) temp2=0;
137 static uint64_t __attribute__((aligned(8))) temp3=0;
138 static uint64_t __attribute__((aligned(8))) temp4=0;
139 static uint64_t __attribute__((aligned(8))) temp5=0;
140 static uint64_t __attribute__((aligned(8))) pQPb=0;
141 static uint64_t __attribute__((aligned(8))) pQPb2=0;
142 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code
143 static uint32_t __attribute__((aligned(4))) maxTmpNoise[4];
145 static uint64_t packedYOffset= 0x0000000000000000LL;
146 static uint64_t packedYScale= 0x0100010001000100LL;
149 extern int divx_quality;
150 int newPPFlag=0; //is set if -npp is used
151 struct PPMode gPPMode[GET_PP_QUALITY_MAX+1];
152 static int firstTime = 0, firstTime2 = 0;
156 int hFlatnessThreshold= 56 - 16;
157 int vFlatnessThreshold= 56 - 16;
158 int deringThreshold= 20;
161 static int dcThreshold;
163 //amount of "black" u r willing to loose to get a brightness corrected picture
164 double maxClippedThreshold= 0.01;
166 static struct PPFilter filters[]=
168 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
169 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
170 {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
171 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},
172 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
173 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
174 {"dr", "dering", 1, 5, 6, DERING},
175 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
176 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
177 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
178 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
179 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
180 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
181 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
182 {NULL, NULL,0,0,0,0} //End Marker
185 static char *replaceTable[]=
187 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
188 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
189 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
190 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
194 #ifdef CAN_COMPILE_X86_ASM
195 static inline void unusedVariableWarningFixer()
198 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000
199 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110
200 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F
201 + bFF + b20 + b04+ b08 + pQPb2 + b80 + mmxDCOffset + mmxDCThreshold + b3F + temp0 + temp1 + temp2 + temp3 + temp4
202 + temp5 + pQPb== 0) b00=0;
207 static inline long long rdtsc()
210 asm volatile( "rdtsc\n\t"
213 // printf("%d\n", int(l/1000));
218 #ifdef CAN_COMPILE_X86_ASM
219 static inline void prefetchnta(void *p)
221 asm volatile( "prefetchnta (%0)\n\t"
226 static inline void prefetcht0(void *p)
228 asm volatile( "prefetcht0 (%0)\n\t"
233 static inline void prefetcht1(void *p)
235 asm volatile( "prefetcht1 (%0)\n\t"
240 static inline void prefetcht2(void *p)
242 asm volatile( "prefetcht2 (%0)\n\t"
248 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
251 * Check if the given 8x8 Block is mostly "flat"
253 static inline int isHorizDC(uint8_t src[], int stride)
257 for(y=0; y<BLOCK_SIZE; y++)
259 if(((src[0] - src[1] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
260 if(((src[1] - src[2] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
261 if(((src[2] - src[3] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
262 if(((src[3] - src[4] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
263 if(((src[4] - src[5] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
264 if(((src[5] - src[6] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
265 if(((src[6] - src[7] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
268 return numEq > hFlatnessThreshold;
271 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
273 if(abs(src[0] - src[7]) > 2*QP) return 0;
278 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
281 for(y=0; y<BLOCK_SIZE; y++)
283 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
285 if(ABS(middleEnergy) < 8*QP)
287 const int q=(dst[3] - dst[4])/2;
288 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
289 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
291 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
295 d*= SIGN(-middleEnergy);
316 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
317 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
319 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
323 for(y=0; y<BLOCK_SIZE; y++)
325 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
326 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
329 sums[0] = first + dst[0];
330 sums[1] = dst[0] + dst[1];
331 sums[2] = dst[1] + dst[2];
332 sums[3] = dst[2] + dst[3];
333 sums[4] = dst[3] + dst[4];
334 sums[5] = dst[4] + dst[5];
335 sums[6] = dst[5] + dst[6];
336 sums[7] = dst[6] + dst[7];
337 sums[8] = dst[7] + last;
339 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
340 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
341 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
342 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
343 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
344 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
345 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
346 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
353 * Experimental Filter 1 (Horizontal)
354 * will not damage linear gradients
355 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
356 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
357 * MMX2 version does correct clipping C version doesnt
358 * not identical with the vertical one
360 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
363 static uint64_t *lut= NULL;
367 lut= (uint64_t*)memalign(8, 256*8);
370 int v= i < 128 ? 2*i : 2*(i-256);
372 //Simulate 112242211 9-Tap filter
373 uint64_t a= (v/16) & 0xFF;
374 uint64_t b= (v/8) & 0xFF;
375 uint64_t c= (v/4) & 0xFF;
376 uint64_t d= (3*v/8) & 0xFF;
378 //Simulate piecewise linear interpolation
379 uint64_t a= (v/16) & 0xFF;
380 uint64_t b= (v*3/16) & 0xFF;
381 uint64_t c= (v*5/16) & 0xFF;
382 uint64_t d= (7*v/16) & 0xFF;
383 uint64_t A= (0x100 - a)&0xFF;
384 uint64_t B= (0x100 - b)&0xFF;
385 uint64_t C= (0x100 - c)&0xFF;
386 uint64_t D= (0x100 - c)&0xFF;
388 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
389 (D<<24) | (C<<16) | (B<<8) | (A);
390 //lut[i] = (v<<32) | (v<<24);
394 for(y=0; y<BLOCK_SIZE; y++)
396 int a= src[1] - src[2];
397 int b= src[3] - src[4];
398 int c= src[5] - src[6];
400 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
404 int v = d * SIGN(-b);
419 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
421 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
425 #ifdef CAN_COMPILE_X86_ASM
427 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
431 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
435 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
436 #define COMPILE_3DNOW
438 #endif //CAN_COMPILE_X86_ASM
450 #define RENAME(a) a ## _C
451 #include "postprocess_template.c"
461 #define RENAME(a) a ## _MMX
462 #include "postprocess_template.c"
472 #define RENAME(a) a ## _MMX2
473 #include "postprocess_template.c"
483 #define RENAME(a) a ## _3DNow
484 #include "postprocess_template.c"
487 // minor note: the HAVE_xyz is messed up after that line so dont use it
489 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
490 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode)
492 // useing ifs here as they are faster than function pointers allthough the
493 // difference wouldnt be messureable here but its much better because
494 // someone might exchange the cpu whithout restarting mplayer ;)
495 #ifdef RUNTIME_CPUDETECT
496 #ifdef CAN_COMPILE_X86_ASM
497 // ordered per speed fasterst first
499 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
500 else if(gCpuCaps.has3DNow)
501 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
502 else if(gCpuCaps.hasMMX)
503 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
505 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
507 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
509 #else //RUNTIME_CPUDETECT
511 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
512 #elif defined (HAVE_3DNOW)
513 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
514 #elif defined (HAVE_MMX)
515 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
517 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
519 #endif //!RUNTIME_CPUDETECT
522 #ifdef HAVE_ODIVX_POSTPROCESS
523 #include "../opendivx/postprocess.h"
527 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
528 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
530 /* -pp Command line Help
531 NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)?
534 "-npp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...\n"
535 "long form example:\n"
536 "-npp vdeblock:autoq,hdeblock:autoq,linblenddeint -npp default,-vdeblock\n"
537 "short form example:\n"
538 "-npp vb:a,hb:a,lb -npp de,-vb\n"
540 "-npp tn:64:128:256\n"
542 "short long name short long option Description\n"
543 "* * a autoq cpu power dependant enabler\n"
544 " c chrom chrominance filtring enabled\n"
545 " y nochrom chrominance filtring disabled\n"
546 "hb hdeblock (2 Threshold) horizontal deblocking filter\n"
547 " 1. Threshold: default=1, higher -> more deblocking\n"
548 " 2. Threshold: default=40, lower -> more deblocking\n"
549 " the h & v deblocking filters share these\n"
550 " so u cant set different thresholds for h / v\n"
551 "vb vdeblock (2 Threshold) vertical deblocking filter\n"
554 "h1 x1hdeblock Experimental h deblock filter 1\n"
555 "v1 x1vdeblock Experimental v deblock filter 1\n"
556 "dr dering Deringing filter\n"
557 "al autolevels automatic brightness / contrast\n"
558 " f fullyrange stretch luminance to (0..255)\n"
559 "lb linblenddeint linear blend deinterlacer\n"
560 "li linipoldeint linear interpolating deinterlace\n"
561 "ci cubicipoldeint cubic interpolating deinterlacer\n"
562 "md mediandeint median deinterlacer\n"
563 "de default hb:a,vb:a,dr:a,al\n"
564 "fa fast h1:a,v1:a,dr:a,al\n"
565 "tn tmpnoise (3 Thresholds) Temporal Noise Reducer\n"
566 " 1. <= 2. <= 3. larger -> stronger filtering\n"
567 "fq forceQuant <quantizer> Force quantizer\n"
571 * returns a PPMode struct which will have a non 0 error variable if an error occured
572 * name is the string after "-pp" on the command line
573 * quality is a number from 0 to GET_PP_QUALITY_MAX
575 struct PPMode getPPModeByNameAndQuality(char *name, int quality)
577 char temp[GET_MODE_BUFFER_SIZE];
579 char *filterDelimiters= ",";
580 char *optionDelimiters= ":";
581 struct PPMode ppMode= {0,0,0,0,0,0,{150,200,400}};
584 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
586 if(verbose>1) printf("pp: %s\n", name);
590 int q= 1000000; //GET_PP_QUALITY_MAX;
593 char *options[OPTIONS_ARRAY_SIZE];
596 int numOfUnknownOptions=0;
597 int enable=1; //does the user want us to enabled or disabled the filter
599 filterToken= strtok(p, filterDelimiters);
600 if(filterToken == NULL) break;
601 p+= strlen(filterToken) + 1; // p points to next filterToken
602 filterName= strtok(filterToken, optionDelimiters);
603 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
605 if(*filterName == '-')
611 for(;;){ //for all options
612 option= strtok(NULL, optionDelimiters);
613 if(option == NULL) break;
615 if(verbose>1) printf("pp: option: %s\n", option);
616 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
617 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
618 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
621 options[numOfUnknownOptions] = option;
622 numOfUnknownOptions++;
624 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
626 options[numOfUnknownOptions] = NULL;
628 /* replace stuff from the replace Table */
629 for(i=0; replaceTable[2*i]!=NULL; i++)
631 if(!strcmp(replaceTable[2*i], filterName))
633 int newlen= strlen(replaceTable[2*i + 1]);
637 if(p==NULL) p= temp, *p=0; //last filter
638 else p--, *p=','; //not last filter
641 spaceLeft= (int)p - (int)temp + plen;
642 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
647 memmove(p + newlen, p, plen+1);
648 memcpy(p, replaceTable[2*i + 1], newlen);
653 for(i=0; filters[i].shortName!=NULL; i++)
655 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
656 if( !strcmp(filters[i].longName, filterName)
657 || !strcmp(filters[i].shortName, filterName))
659 ppMode.lumMode &= ~filters[i].mask;
660 ppMode.chromMode &= ~filters[i].mask;
663 if(!enable) break; // user wants to disable it
665 if(q >= filters[i].minLumQuality)
666 ppMode.lumMode|= filters[i].mask;
667 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
668 if(q >= filters[i].minChromQuality)
669 ppMode.chromMode|= filters[i].mask;
671 if(filters[i].mask == LEVEL_FIX)
674 ppMode.minAllowedY= 16;
675 ppMode.maxAllowedY= 234;
676 for(o=0; options[o]!=NULL; o++)
678 if( !strcmp(options[o],"fullyrange")
679 ||!strcmp(options[o],"f"))
681 ppMode.minAllowedY= 0;
682 ppMode.maxAllowedY= 255;
683 numOfUnknownOptions--;
687 else if(filters[i].mask == TEMP_NOISE_FILTER)
691 ppMode.maxTmpNoise[0]= 150;
692 ppMode.maxTmpNoise[1]= 200;
693 ppMode.maxTmpNoise[2]= 400;
695 for(o=0; options[o]!=NULL; o++)
698 ppMode.maxTmpNoise[numOfNoises]=
699 strtol(options[o], &tail, 0);
703 numOfUnknownOptions--;
704 if(numOfNoises >= 3) break;
708 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
712 // hFlatnessThreshold= 40;
713 // vFlatnessThreshold= 40;
715 for(o=0; options[o]!=NULL && o<2; o++)
718 int val= strtol(options[o], &tail, 0);
719 if(tail==options[o]) break;
721 numOfUnknownOptions--;
722 if(o==0) ppMode.maxDcDiff= val;
723 else hFlatnessThreshold=
724 vFlatnessThreshold= val;
727 else if(filters[i].mask == FORCE_QUANT)
730 ppMode.forcedQuant= 15;
732 for(o=0; options[o]!=NULL && o<1; o++)
735 int val= strtol(options[o], &tail, 0);
736 if(tail==options[o]) break;
738 numOfUnknownOptions--;
739 ppMode.forcedQuant= val;
744 if(!filterNameOk) ppMode.error++;
745 ppMode.error += numOfUnknownOptions;
748 #ifdef HAVE_ODIVX_POSTPROCESS
749 if(ppMode.lumMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_H;
750 if(ppMode.lumMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_V;
751 if(ppMode.chromMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_H;
752 if(ppMode.chromMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_V;
753 if(ppMode.lumMode & DERING) ppMode.oldMode |= PP_DERING_Y;
754 if(ppMode.chromMode & DERING) ppMode.oldMode |= PP_DERING_C;
757 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode.lumMode, ppMode.chromMode);
762 * Check and load the -npp part of the cmd line
764 int readNPPOpt(void *conf, char *arg)
768 if(!strcmp("help", arg))
774 for(quality=0; quality<GET_PP_QUALITY_MAX+1; quality++)
776 gPPMode[quality]= getPPModeByNameAndQuality(arg, quality);
778 if(gPPMode[quality].error) return -1;
782 //divx_quality is passed to postprocess if autoq if off
783 divx_quality= GET_PP_QUALITY_MAX;
784 firstTime = firstTime2 = 1;
788 int readPPOpt(void *conf, char *arg)
793 return -2; // ERR_MISSING_PARAM
795 val = (int)strtol(arg,NULL,0);
797 return -4; // What about include cfgparser.h and use ERR_* defines */
799 return -3; // ERR_OUT_OF_RANGE
802 firstTime = firstTime2 = 1;
807 void revertPPOpt(void *conf, char* opt)
815 * Obsolete, dont use it, use postprocess2() instead
816 * this will check newPPFlag automatically and use postprocess2 if it is set
817 * mode = quality if newPPFlag
819 void postprocess(unsigned char * src[], int src_stride,
820 unsigned char * dst[], int dst_stride,
821 int horizontal_size, int vertical_size,
822 QP_STORE_T *QP_store, int QP_stride,
825 struct PPMode ppMode;
826 static QP_STORE_T zeroArray[2048/8];
830 ppMode= gPPMode[mode];
831 // printf("%d \n",QP_store[5]);
832 postprocess2(src, src_stride, dst, dst_stride,
833 horizontal_size, vertical_size, QP_store, QP_stride, &ppMode);
838 if(firstTime && verbose)
840 printf("using pp filters 0x%X\n", mode);
850 ppMode.lumMode= mode;
851 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00);
852 ppMode.chromMode= mode;
853 ppMode.maxTmpNoise[0]= 700;
854 ppMode.maxTmpNoise[1]= 1500;
855 ppMode.maxTmpNoise[2]= 3000;
856 ppMode.maxAllowedY= 234;
857 ppMode.minAllowedY= 16;
860 #ifdef HAVE_ODIVX_POSTPROCESS
861 // Note: I could make this shit outside of this file, but it would mean one
862 // more function call...
864 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode);
869 postProcess(src[0], src_stride, dst[0], dst_stride,
870 horizontal_size, vertical_size, QP_store, QP_stride, 0, &ppMode);
872 horizontal_size >>= 1;
879 postProcess(src[1], src_stride, dst[1], dst_stride,
880 horizontal_size, vertical_size, QP_store, QP_stride, 1, &ppMode);
881 postProcess(src[2], src_stride, dst[2], dst_stride,
882 horizontal_size, vertical_size, QP_store, QP_stride, 2, &ppMode);
884 else if(src_stride == dst_stride)
886 memcpy(dst[1], src[1], src_stride*vertical_size);
887 memcpy(dst[2], src[2], src_stride*vertical_size);
892 for(y=0; y<vertical_size; y++)
894 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
895 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
900 memset(dst[1], 128, dst_stride*vertical_size);
901 memset(dst[2], 128, dst_stride*vertical_size);
905 void postprocess2(unsigned char * src[], int src_stride,
906 unsigned char * dst[], int dst_stride,
907 int horizontal_size, int vertical_size,
908 QP_STORE_T *QP_store, int QP_stride,
912 QP_STORE_T quantArray[2048/8];
914 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
917 QP_store= quantArray;
919 if(mode->lumMode & FORCE_QUANT)
920 for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
922 for(i=0; i<2048/8; i++) quantArray[i]= 1;
925 if(firstTime2 && verbose)
927 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
931 #ifdef HAVE_ODIVX_POSTPROCESS
932 // Note: I could make this shit outside of this file, but it would mean one
933 // more function call...
935 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,
941 postProcess(src[0], src_stride, dst[0], dst_stride,
942 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode);
944 horizontal_size >>= 1;
951 postProcess(src[1], src_stride, dst[1], dst_stride,
952 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode);
953 postProcess(src[2], src_stride, dst[2], dst_stride,
954 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode);
956 else if(src_stride == dst_stride)
958 memcpy(dst[1], src[1], src_stride*vertical_size);
959 memcpy(dst[2], src[2], src_stride*vertical_size);
964 for(y=0; y<vertical_size; y++)
966 memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
967 memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
974 * gets the mode flags for a given quality (larger values mean slower but better postprocessing)
975 * with -npp it simply returns quality
978 int getPpModeForQuality(int quality){
979 int modes[1+GET_PP_QUALITY_MAX]= {
982 // horizontal filters first
984 LUM_H_DEBLOCK | LUM_V_DEBLOCK,
985 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK,
986 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK,
987 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING,
988 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING | CHROM_DERING
990 // vertical filters first
992 LUM_V_DEBLOCK | LUM_H_DEBLOCK,
993 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK,
994 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK,
995 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING,
996 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING | CHROM_DERING
1000 #ifdef HAVE_ODIVX_POSTPROCESS
1001 int odivx_modes[1+GET_PP_QUALITY_MAX]= {
1004 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V,
1005 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H,
1006 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V,
1007 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y,
1008 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y|PP_DERING_C
1010 if(use_old_pp) return odivx_modes[quality];
1012 if(newPPFlag) return quality;
1013 else return modes[quality];