]> git.sesse.net Git - ffmpeg/blob - postproc/postprocess.c
divx 5.01 support
[ffmpeg] / postproc / postprocess.c
1 /*
2     Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 */
18
19 /*
20                         C       MMX     MMX2    3DNow
21 isVertDC                Ec      Ec
22 isVertMinMaxOk          Ec      Ec
23 doVertLowPass           E               e       e
24 doVertDefFilter         Ec      Ec      e       e
25 isHorizDC               Ec      Ec
26 isHorizMinMaxOk         a       E
27 doHorizLowPass          E               e       e
28 doHorizDefFilter        Ec      Ec      e       e
29 deRing                  E               e       e*
30 Vertical RKAlgo1        E               a       a
31 Horizontal RKAlgo1                      a       a
32 Vertical X1#            a               E       E
33 Horizontal X1#          a               E       E
34 LinIpolDeinterlace      e               E       E*
35 CubicIpolDeinterlace    a               e       e*
36 LinBlendDeinterlace     e               E       E*
37 MedianDeinterlace#              Ec      Ec
38 TempDeNoiser#           E               e       e
39
40 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41 # more or less selfinvented filters so the exactness isnt too meaningfull
42 E = Exact implementation
43 e = allmost exact implementation (slightly different rounding,...)
44 a = alternative / approximate impl
45 c = checked against the other implementations (-vo md5)
46 */
47
48 /*
49 TODO:
50 remove global/static vars
51 reduce the time wasted on the mem transfer
52 implement everything in C at least (done at the moment but ...)
53 unroll stuff if instructions depend too much on the prior one
54 we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
55 move YScale thing to the end instead of fixing QP
56 write a faster and higher quality deblocking filter :)
57 make the mainloop more flexible (variable number of blocks at once
58         (the if/else stuff per block is slowing things down)
59 compare the quality & speed of all filters
60 split this huge file
61 border remover
62 optimize c versions
63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
64 smart blur
65 commandline option for   the deblock / dering thresholds
66 put fastmemcpy back
67 dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect uses ARCH_X86)
68 ...
69 */
70
71 //Changelog: use the CVS log
72
73 #include "../config.h"
74 #include <inttypes.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <errno.h>
79 #ifdef HAVE_MALLOC_H
80 #include <malloc.h>
81 #endif
82 //#undef HAVE_MMX2
83 //#define HAVE_3DNOW
84 //#undef HAVE_MMX
85 //#undef ARCH_X86
86 //#define DEBUG_BRIGHTNESS
87 //#include "../libvo/fastmemcpy.h"
88 #include "postprocess.h"
89 #include "../cpudetect.h"
90 #include "../mangle.h"
91
92 #define MIN(a,b) ((a) > (b) ? (b) : (a))
93 #define MAX(a,b) ((a) < (b) ? (b) : (a))
94 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
95 #define SIGN(a) ((a) > 0 ? 1 : -1)
96
97 #define GET_MODE_BUFFER_SIZE 500
98 #define OPTIONS_ARRAY_SIZE 10
99
100 #ifdef ARCH_X86
101 #define CAN_COMPILE_X86_ASM
102 #endif
103
104 #ifdef CAN_COMPILE_X86_ASM
105 static volatile uint64_t __attribute__((aligned(8))) packedYOffset=     0x0000000000000000LL;
106 static volatile uint64_t __attribute__((aligned(8))) packedYScale=      0x0100010001000100LL;
107 static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
108 static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
109 static uint64_t __attribute__((aligned(8))) w1400=              0x1400140014001400LL;
110 static uint64_t __attribute__((aligned(8))) bm00000001=         0x00000000000000FFLL;
111 static uint64_t __attribute__((aligned(8))) bm00010000=         0x000000FF00000000LL;
112 static uint64_t __attribute__((aligned(8))) bm00001000=         0x00000000FF000000LL;
113 static uint64_t __attribute__((aligned(8))) bm10000000=         0xFF00000000000000LL;
114 static uint64_t __attribute__((aligned(8))) bm10000001=         0xFF000000000000FFLL;
115 static uint64_t __attribute__((aligned(8))) bm11000011=         0xFFFF00000000FFFFLL;
116 static uint64_t __attribute__((aligned(8))) bm00000011=         0x000000000000FFFFLL;
117 static uint64_t __attribute__((aligned(8))) bm11111110=         0xFFFFFFFFFFFFFF00LL;
118 static uint64_t __attribute__((aligned(8))) bm11000000=         0xFFFF000000000000LL;
119 static uint64_t __attribute__((aligned(8))) bm00011000=         0x000000FFFF000000LL;
120 static uint64_t __attribute__((aligned(8))) bm00110011=         0x0000FFFF0000FFFFLL;
121 static uint64_t __attribute__((aligned(8))) bm11001100=         0xFFFF0000FFFF0000LL;
122 static uint64_t __attribute__((aligned(8))) b00=                0x0000000000000000LL;
123 static uint64_t __attribute__((aligned(8))) b01=                0x0101010101010101LL;
124 static uint64_t __attribute__((aligned(8))) b02=                0x0202020202020202LL;
125 static uint64_t __attribute__((aligned(8))) b0F=                0x0F0F0F0F0F0F0F0FLL;
126 static uint64_t __attribute__((aligned(8))) b04=                0x0404040404040404LL;
127 static uint64_t __attribute__((aligned(8))) b08=                0x0808080808080808LL;
128 static uint64_t __attribute__((aligned(8))) bFF=                0xFFFFFFFFFFFFFFFFLL;
129 static uint64_t __attribute__((aligned(8))) b20=                0x2020202020202020LL;
130 static uint64_t __attribute__((aligned(8))) b80=                0x8080808080808080LL;
131 static uint64_t __attribute__((aligned(8))) mmxDCOffset=        0x7E7E7E7E7E7E7E7ELL;
132 static uint64_t __attribute__((aligned(8))) mmxDCThreshold=     0x7C7C7C7C7C7C7C7CLL;
133 static uint64_t __attribute__((aligned(8))) b3F=                0x3F3F3F3F3F3F3F3FLL;
134 static uint64_t __attribute__((aligned(8))) temp0=0;
135 static uint64_t __attribute__((aligned(8))) temp1=0;
136 static uint64_t __attribute__((aligned(8))) temp2=0;
137 static uint64_t __attribute__((aligned(8))) temp3=0;
138 static uint64_t __attribute__((aligned(8))) temp4=0;
139 static uint64_t __attribute__((aligned(8))) temp5=0;
140 static uint64_t __attribute__((aligned(8))) pQPb=0;
141 static uint64_t __attribute__((aligned(8))) pQPb2=0;
142 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code
143 static uint32_t __attribute__((aligned(4))) maxTmpNoise[4];
144 #else
145 static uint64_t packedYOffset=  0x0000000000000000LL;
146 static uint64_t packedYScale=   0x0100010001000100LL;
147 #endif
148
149 extern int divx_quality;
150 int newPPFlag=0; //is set if -npp is used
151 struct PPMode gPPMode[GET_PP_QUALITY_MAX+1];
152 static int firstTime = 0, firstTime2 = 0;
153
154 extern int verbose;
155
156 int hFlatnessThreshold= 56 - 16;
157 int vFlatnessThreshold= 56 - 16;
158 int deringThreshold= 20;
159
160 static int dcOffset;
161 static int dcThreshold;
162
163 //amount of "black" u r willing to loose to get a brightness corrected picture
164 double maxClippedThreshold= 0.01;
165
166 static struct PPFilter filters[]=
167 {
168         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
169         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
170         {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
171         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},
172         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
173         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
174         {"dr", "dering",                1, 5, 6, DERING},
175         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
176         {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
177         {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
178         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
179         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
180         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
181         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
182         {NULL, NULL,0,0,0,0} //End Marker
183 };
184
185 static char *replaceTable[]=
186 {
187         "default",      "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
188         "de",           "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
189         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
190         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
191         NULL //End Marker
192 };
193
194 #ifdef CAN_COMPILE_X86_ASM
195 static inline void unusedVariableWarningFixer()
196 {
197 if(
198  packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000
199  + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110
200  + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F
201  + bFF + b20 + b04+ b08 + pQPb2 + b80 + mmxDCOffset + mmxDCThreshold + b3F + temp0 + temp1 + temp2 + temp3 + temp4
202  + temp5 + pQPb== 0) b00=0;
203 }
204 #endif
205
206 #ifdef TIMING
207 static inline long long rdtsc()
208 {
209         long long l;
210         asm volatile(   "rdtsc\n\t"
211                 : "=A" (l)
212         );
213 //      printf("%d\n", int(l/1000));
214         return l;
215 }
216 #endif
217
218 #ifdef CAN_COMPILE_X86_ASM
219 static inline void prefetchnta(void *p)
220 {
221         asm volatile(   "prefetchnta (%0)\n\t"
222                 : : "r" (p)
223         );
224 }
225
226 static inline void prefetcht0(void *p)
227 {
228         asm volatile(   "prefetcht0 (%0)\n\t"
229                 : : "r" (p)
230         );
231 }
232
233 static inline void prefetcht1(void *p)
234 {
235         asm volatile(   "prefetcht1 (%0)\n\t"
236                 : : "r" (p)
237         );
238 }
239
240 static inline void prefetcht2(void *p)
241 {
242         asm volatile(   "prefetcht2 (%0)\n\t"
243                 : : "r" (p)
244         );
245 }
246 #endif
247
248 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
249
250 /**
251  * Check if the given 8x8 Block is mostly "flat"
252  */
253 static inline int isHorizDC(uint8_t src[], int stride)
254 {
255         int numEq= 0;
256         int y;
257         for(y=0; y<BLOCK_SIZE; y++)
258         {
259                 if(((src[0] - src[1] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
260                 if(((src[1] - src[2] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
261                 if(((src[2] - src[3] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
262                 if(((src[3] - src[4] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
263                 if(((src[4] - src[5] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
264                 if(((src[5] - src[6] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
265                 if(((src[6] - src[7] + dcOffset) & 0xFFFF) < dcThreshold) numEq++;
266                 src+= stride;
267         }
268         return numEq > hFlatnessThreshold;
269 }
270
271 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
272 {
273         if(abs(src[0] - src[7]) > 2*QP) return 0;
274
275         return 1;
276 }
277
278 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
279 {
280         int y;
281         for(y=0; y<BLOCK_SIZE; y++)
282         {
283                 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
284
285                 if(ABS(middleEnergy) < 8*QP)
286                 {
287                         const int q=(dst[3] - dst[4])/2;
288                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
289                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
290
291                         int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
292                         d= MAX(d, 0);
293
294                         d= (5*d + 32) >> 6;
295                         d*= SIGN(-middleEnergy);
296
297                         if(q>0)
298                         {
299                                 d= d<0 ? 0 : d;
300                                 d= d>q ? q : d;
301                         }
302                         else
303                         {
304                                 d= d>0 ? 0 : d;
305                                 d= d<q ? q : d;
306                         }
307
308                         dst[3]-= d;
309                         dst[4]+= d;
310                 }
311                 dst+= stride;
312         }
313 }
314
315 /**
316  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
317  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
318  */
319 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
320 {
321
322         int y;
323         for(y=0; y<BLOCK_SIZE; y++)
324         {
325                 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
326                 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
327
328                 int sums[9];
329                 sums[0] = first + dst[0];
330                 sums[1] = dst[0] + dst[1];
331                 sums[2] = dst[1] + dst[2];
332                 sums[3] = dst[2] + dst[3];
333                 sums[4] = dst[3] + dst[4];
334                 sums[5] = dst[4] + dst[5];
335                 sums[6] = dst[5] + dst[6];
336                 sums[7] = dst[6] + dst[7];
337                 sums[8] = dst[7] + last;
338
339                 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
340                 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
341                 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
342                 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
343                 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
344                 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
345                 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
346                 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
347
348                 dst+= stride;
349         }
350 }
351
352 /**
353  * Experimental Filter 1 (Horizontal)
354  * will not damage linear gradients
355  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
356  * can only smooth blocks at the expected locations (it cant smooth them if they did move)
357  * MMX2 version does correct clipping C version doesnt
358  * not identical with the vertical one
359  */
360 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
361 {
362         int y;
363         static uint64_t *lut= NULL;
364         if(lut==NULL)
365         {
366                 int i;
367                 lut= (uint64_t*)memalign(8, 256*8);
368                 for(i=0; i<256; i++)
369                 {
370                         int v= i < 128 ? 2*i : 2*(i-256);
371 /*
372 //Simulate 112242211 9-Tap filter
373                         uint64_t a= (v/16) & 0xFF;
374                         uint64_t b= (v/8) & 0xFF;
375                         uint64_t c= (v/4) & 0xFF;
376                         uint64_t d= (3*v/8) & 0xFF;
377 */
378 //Simulate piecewise linear interpolation
379                         uint64_t a= (v/16) & 0xFF;
380                         uint64_t b= (v*3/16) & 0xFF;
381                         uint64_t c= (v*5/16) & 0xFF;
382                         uint64_t d= (7*v/16) & 0xFF;
383                         uint64_t A= (0x100 - a)&0xFF;
384                         uint64_t B= (0x100 - b)&0xFF;
385                         uint64_t C= (0x100 - c)&0xFF;
386                         uint64_t D= (0x100 - c)&0xFF;
387
388                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
389                                 (D<<24) | (C<<16) | (B<<8) | (A);
390                         //lut[i] = (v<<32) | (v<<24);
391                 }
392         }
393
394         for(y=0; y<BLOCK_SIZE; y++)
395         {
396                 int a= src[1] - src[2];
397                 int b= src[3] - src[4];
398                 int c= src[5] - src[6];
399
400                 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
401
402                 if(d < QP)
403                 {
404                         int v = d * SIGN(-b);
405
406                         src[1] +=v/8;
407                         src[2] +=v/4;
408                         src[3] +=3*v/8;
409                         src[4] -=3*v/8;
410                         src[5] -=v/4;
411                         src[6] -=v/8;
412
413                 }
414                 src+=stride;
415         }
416 }
417
418
419 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
420 //Plain C versions
421 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
422 #define COMPILE_C
423 #endif
424
425 #ifdef CAN_COMPILE_X86_ASM
426
427 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
428 #define COMPILE_MMX
429 #endif
430
431 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
432 #define COMPILE_MMX2
433 #endif
434
435 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
436 #define COMPILE_3DNOW
437 #endif
438 #endif //CAN_COMPILE_X86_ASM
439
440 #undef HAVE_MMX
441 #undef HAVE_MMX2
442 #undef HAVE_3DNOW
443 #undef ARCH_X86
444
445 #ifdef COMPILE_C
446 #undef HAVE_MMX
447 #undef HAVE_MMX2
448 #undef HAVE_3DNOW
449 #undef ARCH_X86
450 #define RENAME(a) a ## _C
451 #include "postprocess_template.c"
452 #endif
453
454 //MMX versions
455 #ifdef COMPILE_MMX
456 #undef RENAME
457 #define HAVE_MMX
458 #undef HAVE_MMX2
459 #undef HAVE_3DNOW
460 #define ARCH_X86
461 #define RENAME(a) a ## _MMX
462 #include "postprocess_template.c"
463 #endif
464
465 //MMX2 versions
466 #ifdef COMPILE_MMX2
467 #undef RENAME
468 #define HAVE_MMX
469 #define HAVE_MMX2
470 #undef HAVE_3DNOW
471 #define ARCH_X86
472 #define RENAME(a) a ## _MMX2
473 #include "postprocess_template.c"
474 #endif
475
476 //3DNOW versions
477 #ifdef COMPILE_3DNOW
478 #undef RENAME
479 #define HAVE_MMX
480 #undef HAVE_MMX2
481 #define HAVE_3DNOW
482 #define ARCH_X86
483 #define RENAME(a) a ## _3DNow
484 #include "postprocess_template.c"
485 #endif
486
487 // minor note: the HAVE_xyz is messed up after that line so dont use it
488
489 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
490         QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode)
491 {
492         // useing ifs here as they are faster than function pointers allthough the
493         // difference wouldnt be messureable here but its much better because
494         // someone might exchange the cpu whithout restarting mplayer ;)
495 #ifdef RUNTIME_CPUDETECT
496 #ifdef CAN_COMPILE_X86_ASM
497         // ordered per speed fasterst first
498         if(gCpuCaps.hasMMX2)
499                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
500         else if(gCpuCaps.has3DNow)
501                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
502         else if(gCpuCaps.hasMMX)
503                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
504         else
505                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
506 #else
507                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
508 #endif
509 #else //RUNTIME_CPUDETECT
510 #ifdef HAVE_MMX2
511                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
512 #elif defined (HAVE_3DNOW)
513                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
514 #elif defined (HAVE_MMX)
515                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
516 #else
517                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, ppMode);
518 #endif
519 #endif //!RUNTIME_CPUDETECT
520 }
521
522 #ifdef HAVE_ODIVX_POSTPROCESS
523 #include "../opendivx/postprocess.h"
524 int use_old_pp=0;
525 #endif
526
527 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
528 //      QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
529
530 /* -pp Command line Help
531 NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)?
532 */
533 char *help=
534 "-npp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...\n"
535 "long form example:\n"
536 "-npp vdeblock:autoq,hdeblock:autoq,linblenddeint       -npp default,-vdeblock\n"
537 "short form example:\n"
538 "-npp vb:a,hb:a,lb                                      -npp de,-vb\n"
539 "more examples:\n"
540 "-npp tn:64:128:256\n"
541 "Filters                        Options\n"
542 "short  long name       short   long option     Description\n"
543 "*      *               a       autoq           cpu power dependant enabler\n"
544 "                       c       chrom           chrominance filtring enabled\n"
545 "                       y       nochrom         chrominance filtring disabled\n"
546 "hb     hdeblock        (2 Threshold)           horizontal deblocking filter\n"
547 "                        1. Threshold: default=1, higher -> more deblocking\n"
548 "                        2. Threshold: default=40, lower -> more deblocking\n"
549 "                       the h & v deblocking filters share these\n"
550 "                       so u cant set different thresholds for h / v\n"
551 "vb     vdeblock        (2 Threshold)           vertical deblocking filter\n"
552 "hr     rkhdeblock\n"
553 "vr     rkvdeblock\n"
554 "h1     x1hdeblock                              Experimental h deblock filter 1\n"
555 "v1     x1vdeblock                              Experimental v deblock filter 1\n"
556 "dr     dering                                  Deringing filter\n"
557 "al     autolevels                              automatic brightness / contrast\n"
558 "                       f       fullyrange      stretch luminance to (0..255)\n"
559 "lb     linblenddeint                           linear blend deinterlacer\n"
560 "li     linipoldeint                            linear interpolating deinterlace\n"
561 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
562 "md     mediandeint                             median deinterlacer\n"
563 "de     default                                 hb:a,vb:a,dr:a,al\n"
564 "fa     fast                                    h1:a,v1:a,dr:a,al\n"
565 "tn     tmpnoise        (3 Thresholds)          Temporal Noise Reducer\n"
566 "                       1. <= 2. <= 3.          larger -> stronger filtering\n"
567 "fq     forceQuant      <quantizer>             Force quantizer\n"
568 ;
569
570 /**
571  * returns a PPMode struct which will have a non 0 error variable if an error occured
572  * name is the string after "-pp" on the command line
573  * quality is a number from 0 to GET_PP_QUALITY_MAX
574  */
575 struct PPMode getPPModeByNameAndQuality(char *name, int quality)
576 {
577         char temp[GET_MODE_BUFFER_SIZE];
578         char *p= temp;
579         char *filterDelimiters= ",";
580         char *optionDelimiters= ":";
581         struct PPMode ppMode= {0,0,0,0,0,0,{150,200,400}};
582         char *filterToken;
583
584         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
585
586         if(verbose>1) printf("pp: %s\n", name);
587
588         for(;;){
589                 char *filterName;
590                 int q= 1000000; //GET_PP_QUALITY_MAX;
591                 int chrom=-1;
592                 char *option;
593                 char *options[OPTIONS_ARRAY_SIZE];
594                 int i;
595                 int filterNameOk=0;
596                 int numOfUnknownOptions=0;
597                 int enable=1; //does the user want us to enabled or disabled the filter
598
599                 filterToken= strtok(p, filterDelimiters);
600                 if(filterToken == NULL) break;
601                 p+= strlen(filterToken) + 1; // p points to next filterToken
602                 filterName= strtok(filterToken, optionDelimiters);
603                 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
604
605                 if(*filterName == '-')
606                 {
607                         enable=0;
608                         filterName++;
609                 }
610
611                 for(;;){ //for all options
612                         option= strtok(NULL, optionDelimiters);
613                         if(option == NULL) break;
614
615                         if(verbose>1) printf("pp: option: %s\n", option);
616                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
617                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
618                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
619                         else
620                         {
621                                 options[numOfUnknownOptions] = option;
622                                 numOfUnknownOptions++;
623                         }
624                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
625                 }
626                 options[numOfUnknownOptions] = NULL;
627
628                 /* replace stuff from the replace Table */
629                 for(i=0; replaceTable[2*i]!=NULL; i++)
630                 {
631                         if(!strcmp(replaceTable[2*i], filterName))
632                         {
633                                 int newlen= strlen(replaceTable[2*i + 1]);
634                                 int plen;
635                                 int spaceLeft;
636
637                                 if(p==NULL) p= temp, *p=0;      //last filter
638                                 else p--, *p=',';               //not last filter
639
640                                 plen= strlen(p);
641                                 spaceLeft= (int)p - (int)temp + plen;
642                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
643                                 {
644                                         ppMode.error++;
645                                         break;
646                                 }
647                                 memmove(p + newlen, p, plen+1);
648                                 memcpy(p, replaceTable[2*i + 1], newlen);
649                                 filterNameOk=1;
650                         }
651                 }
652
653                 for(i=0; filters[i].shortName!=NULL; i++)
654                 {
655 //                      printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
656                         if(   !strcmp(filters[i].longName, filterName)
657                            || !strcmp(filters[i].shortName, filterName))
658                         {
659                                 ppMode.lumMode &= ~filters[i].mask;
660                                 ppMode.chromMode &= ~filters[i].mask;
661
662                                 filterNameOk=1;
663                                 if(!enable) break; // user wants to disable it
664
665                                 if(q >= filters[i].minLumQuality)
666                                         ppMode.lumMode|= filters[i].mask;
667                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
668                                         if(q >= filters[i].minChromQuality)
669                                                 ppMode.chromMode|= filters[i].mask;
670
671                                 if(filters[i].mask == LEVEL_FIX)
672                                 {
673                                         int o;
674                                         ppMode.minAllowedY= 16;
675                                         ppMode.maxAllowedY= 234;
676                                         for(o=0; options[o]!=NULL; o++)
677                                         {
678                                                 if(  !strcmp(options[o],"fullyrange")
679                                                    ||!strcmp(options[o],"f"))
680                                                 {
681                                                         ppMode.minAllowedY= 0;
682                                                         ppMode.maxAllowedY= 255;
683                                                         numOfUnknownOptions--;
684                                                 }
685                                         }
686                                 }
687                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
688                                 {
689                                         int o;
690                                         int numOfNoises=0;
691                                         ppMode.maxTmpNoise[0]= 150;
692                                         ppMode.maxTmpNoise[1]= 200;
693                                         ppMode.maxTmpNoise[2]= 400;
694
695                                         for(o=0; options[o]!=NULL; o++)
696                                         {
697                                                 char *tail;
698                                                 ppMode.maxTmpNoise[numOfNoises]=
699                                                         strtol(options[o], &tail, 0);
700                                                 if(tail!=options[o])
701                                                 {
702                                                         numOfNoises++;
703                                                         numOfUnknownOptions--;
704                                                         if(numOfNoises >= 3) break;
705                                                 }
706                                         }
707                                 }
708                                 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
709                                 {
710                                         int o;
711                                         ppMode.maxDcDiff=1;
712 //                                      hFlatnessThreshold= 40;
713 //                                      vFlatnessThreshold= 40;
714
715                                         for(o=0; options[o]!=NULL && o<2; o++)
716                                         {
717                                                 char *tail;
718                                                 int val= strtol(options[o], &tail, 0);
719                                                 if(tail==options[o]) break;
720
721                                                 numOfUnknownOptions--;
722                                                 if(o==0) ppMode.maxDcDiff= val;
723                                                 else hFlatnessThreshold=
724                                                      vFlatnessThreshold= val;
725                                         }
726                                 }
727                                 else if(filters[i].mask == FORCE_QUANT)
728                                 {
729                                         int o;
730                                         ppMode.forcedQuant= 15;
731
732                                         for(o=0; options[o]!=NULL && o<1; o++)
733                                         {
734                                                 char *tail;
735                                                 int val= strtol(options[o], &tail, 0);
736                                                 if(tail==options[o]) break;
737
738                                                 numOfUnknownOptions--;
739                                                 ppMode.forcedQuant= val;
740                                         }
741                                 }
742                         }
743                 }
744                 if(!filterNameOk) ppMode.error++;
745                 ppMode.error += numOfUnknownOptions;
746         }
747
748 #ifdef HAVE_ODIVX_POSTPROCESS
749         if(ppMode.lumMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_H;
750         if(ppMode.lumMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_Y_V;
751         if(ppMode.chromMode & H_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_H;
752         if(ppMode.chromMode & V_DEBLOCK) ppMode.oldMode |= PP_DEBLOCK_C_V;
753         if(ppMode.lumMode & DERING) ppMode.oldMode |= PP_DERING_Y;
754         if(ppMode.chromMode & DERING) ppMode.oldMode |= PP_DERING_C;
755 #endif
756
757         if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode.lumMode, ppMode.chromMode);
758         return ppMode;
759 }
760
761 /**
762  * Check and load the -npp part of the cmd line
763  */
764 int readNPPOpt(void *conf, char *arg)
765 {
766         int quality;
767         
768         if(!strcmp("help", arg))
769         {
770                 printf("%s", help);
771                 exit(1);
772         }
773         
774         for(quality=0; quality<GET_PP_QUALITY_MAX+1; quality++)
775         {
776                 gPPMode[quality]= getPPModeByNameAndQuality(arg, quality);
777
778                 if(gPPMode[quality].error) return -1;
779         }
780         newPPFlag=1;
781
782 //divx_quality is passed to postprocess if autoq if off
783         divx_quality= GET_PP_QUALITY_MAX;
784         firstTime = firstTime2 = 1;
785         return 1;
786 }
787
788 int readPPOpt(void *conf, char *arg)
789 {
790   int val;
791
792   if(arg == NULL)
793     return -2; // ERR_MISSING_PARAM
794   errno = 0;
795   val = (int)strtol(arg,NULL,0);
796   if(errno != 0)
797     return -4;  // What about include cfgparser.h and use ERR_* defines */
798   if(val < 0)
799     return -3; // ERR_OUT_OF_RANGE
800
801   divx_quality = val;
802   firstTime = firstTime2 = 1;
803
804   return 1;
805 }
806   
807 void revertPPOpt(void *conf, char* opt) 
808 {
809   newPPFlag=0;
810   divx_quality=0;
811 }
812
813
814 /**
815  * Obsolete, dont use it, use postprocess2() instead
816  * this will check newPPFlag automatically and use postprocess2 if it is set
817  * mode = quality if newPPFlag
818  */
819 void  postprocess(unsigned char * src[], int src_stride,
820                  unsigned char * dst[], int dst_stride,
821                  int horizontal_size,   int vertical_size,
822                  QP_STORE_T *QP_store,  int QP_stride,
823                                           int mode)
824 {
825         struct PPMode ppMode;
826         static QP_STORE_T zeroArray[2048/8];
827
828         if(newPPFlag)
829         {
830                 ppMode= gPPMode[mode];
831 //              printf("%d \n",QP_store[5]);
832                 postprocess2(src, src_stride, dst, dst_stride,
833                         horizontal_size, vertical_size, QP_store, QP_stride, &ppMode);
834
835                 return;
836         }
837
838         if(firstTime && verbose)
839         {
840                 printf("using pp filters 0x%X\n", mode);
841                 firstTime=0;
842         }
843
844         if(QP_store==NULL)
845         {
846                 QP_store= zeroArray;
847                 QP_stride= 0;
848         }
849
850         ppMode.lumMode= mode;
851         mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00);
852         ppMode.chromMode= mode;
853         ppMode.maxTmpNoise[0]= 700;
854         ppMode.maxTmpNoise[1]= 1500;
855         ppMode.maxTmpNoise[2]= 3000;
856         ppMode.maxAllowedY= 234;
857         ppMode.minAllowedY= 16;
858         ppMode.maxDcDiff= 1;
859
860 #ifdef HAVE_ODIVX_POSTPROCESS
861 // Note: I could make this shit outside of this file, but it would mean one
862 // more function call...
863         if(use_old_pp){
864             odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode);
865             return;
866         }
867 #endif
868
869         postProcess(src[0], src_stride, dst[0], dst_stride,
870                 horizontal_size, vertical_size, QP_store, QP_stride, 0, &ppMode);
871
872         horizontal_size >>= 1;
873         vertical_size   >>= 1;
874         src_stride      >>= 1;
875         dst_stride      >>= 1;
876
877         if(ppMode.chromMode)
878         {
879                 postProcess(src[1], src_stride, dst[1], dst_stride,
880                         horizontal_size, vertical_size, QP_store, QP_stride, 1, &ppMode);
881                 postProcess(src[2], src_stride, dst[2], dst_stride,
882                         horizontal_size, vertical_size, QP_store, QP_stride, 2, &ppMode);
883         }
884         else if(src_stride == dst_stride)
885         {
886                 memcpy(dst[1], src[1], src_stride*vertical_size);
887                 memcpy(dst[2], src[2], src_stride*vertical_size);
888         }
889         else
890         {
891                 int y;
892                 for(y=0; y<vertical_size; y++)
893                 {
894                         memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
895                         memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
896                 }
897         }
898
899 #if 0
900                 memset(dst[1], 128, dst_stride*vertical_size);
901                 memset(dst[2], 128, dst_stride*vertical_size);
902 #endif
903 }
904
905 void  postprocess2(unsigned char * src[], int src_stride,
906                  unsigned char * dst[], int dst_stride,
907                  int horizontal_size,   int vertical_size,
908                  QP_STORE_T *QP_store,  int QP_stride,
909                  struct PPMode *mode)
910 {
911
912         QP_STORE_T quantArray[2048/8];
913         
914         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
915         {
916                 int i;
917                 QP_store= quantArray;
918                 QP_stride= 0;
919                 if(mode->lumMode & FORCE_QUANT)
920                         for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
921                 else
922                         for(i=0; i<2048/8; i++) quantArray[i]= 1;
923         }
924
925         if(firstTime2 && verbose)
926         {
927                 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
928                 firstTime2=0;
929         }
930
931 #ifdef HAVE_ODIVX_POSTPROCESS
932 // Note: I could make this shit outside of this file, but it would mean one
933 // more function call...
934         if(use_old_pp){
935             odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,
936             mode->oldMode);
937             return;
938         }
939 #endif
940
941         postProcess(src[0], src_stride, dst[0], dst_stride,
942                 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode);
943
944         horizontal_size >>= 1;
945         vertical_size   >>= 1;
946         src_stride      >>= 1;
947         dst_stride      >>= 1;
948
949         if(mode->chromMode)
950         {
951                 postProcess(src[1], src_stride, dst[1], dst_stride,
952                         horizontal_size, vertical_size, QP_store, QP_stride, 1, mode);
953                 postProcess(src[2], src_stride, dst[2], dst_stride,
954                         horizontal_size, vertical_size, QP_store, QP_stride, 2, mode);
955         }
956         else if(src_stride == dst_stride)
957         {
958                 memcpy(dst[1], src[1], src_stride*vertical_size);
959                 memcpy(dst[2], src[2], src_stride*vertical_size);
960         }
961         else
962         {
963                 int y;
964                 for(y=0; y<vertical_size; y++)
965                 {
966                         memcpy(&(dst[1][y*dst_stride]), &(src[1][y*src_stride]), horizontal_size);
967                         memcpy(&(dst[2][y*dst_stride]), &(src[2][y*src_stride]), horizontal_size);
968                 }
969         }
970 }
971
972
973 /**
974  * gets the mode flags for a given quality (larger values mean slower but better postprocessing)
975  * with -npp it simply returns quality 
976  * 0 <= quality <= 6
977  */
978 int getPpModeForQuality(int quality){
979         int modes[1+GET_PP_QUALITY_MAX]= {
980                 0,
981 #if 1
982                 // horizontal filters first
983                 LUM_H_DEBLOCK,
984                 LUM_H_DEBLOCK | LUM_V_DEBLOCK,
985                 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK,
986                 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK,
987                 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING,
988                 LUM_H_DEBLOCK | LUM_V_DEBLOCK | CHROM_H_DEBLOCK | CHROM_V_DEBLOCK | LUM_DERING | CHROM_DERING
989 #else
990                 // vertical filters first
991                 LUM_V_DEBLOCK,
992                 LUM_V_DEBLOCK | LUM_H_DEBLOCK,
993                 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK,
994                 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK,
995                 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING,
996                 LUM_V_DEBLOCK | LUM_H_DEBLOCK | CHROM_V_DEBLOCK | CHROM_H_DEBLOCK | LUM_DERING | CHROM_DERING
997 #endif
998         };
999
1000 #ifdef HAVE_ODIVX_POSTPROCESS
1001         int odivx_modes[1+GET_PP_QUALITY_MAX]= {
1002                 0,
1003                 PP_DEBLOCK_Y_H,
1004                 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V,
1005                 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H,
1006                 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V,
1007                 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y,
1008                 PP_DEBLOCK_Y_H|PP_DEBLOCK_Y_V|PP_DEBLOCK_C_H|PP_DEBLOCK_C_V|PP_DERING_Y|PP_DERING_C
1009         };
1010         if(use_old_pp) return odivx_modes[quality];
1011 #endif
1012         if(newPPFlag)   return quality;
1013         else            return modes[quality];
1014 }
1015
1016