2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
92 unsigned postproc_version(void)
94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95 return LIBPOSTPROC_VERSION_INT;
98 const char *postproc_configuration(void)
100 return FFMPEG_CONFIGURATION;
103 const char *postproc_license(void)
105 #define LICENSE_PREFIX "libpostproc license: "
106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
113 #define GET_MODE_BUFFER_SIZE 500
114 #define OPTIONS_ARRAY_SIZE 10
116 #define TEMP_STRIDE 8
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
119 #if ARCH_X86 && HAVE_INLINE_ASM
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
133 static struct PPFilter filters[]=
135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
143 {"dr", "dering", 1, 5, 6, DERING},
144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
153 {"be", "bitexact", 1, 0, 0, BITEXACT},
154 {NULL, NULL,0,0,0,0} //End Marker
157 static const char *replaceTable[]=
159 "default", "hb:a,vb:a,dr:a",
160 "de", "hb:a,vb:a,dr:a",
161 "fast", "h1:a,v1:a,dr:a",
162 "fa", "h1:a,v1:a,dr:a",
163 "ac", "ha:a:128:7,va:a,dr:a",
168 #if ARCH_X86 && HAVE_INLINE_ASM
169 static inline void prefetchnta(void *p)
171 __asm__ volatile( "prefetchnta (%0)\n\t"
176 static inline void prefetcht0(void *p)
178 __asm__ volatile( "prefetcht0 (%0)\n\t"
183 static inline void prefetcht1(void *p)
185 __asm__ volatile( "prefetcht1 (%0)\n\t"
190 static inline void prefetcht2(void *p)
192 __asm__ volatile( "prefetcht2 (%0)\n\t"
198 /* The horizontal functions exist only in C because the MMX
199 * code is faster with vertical filters and transposing. */
202 * Check if the given 8x8 Block is mostly "flat"
204 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
208 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
209 const int dcThreshold= dcOffset*2 + 1;
211 for(y=0; y<BLOCK_SIZE; y++){
212 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
216 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
217 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
218 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
221 return numEq > c->ppMode.flatnessThreshold;
225 * Check if the middle 8x8 Block in the given 8x16 block is flat
227 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
231 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
232 const int dcThreshold= dcOffset*2 + 1;
234 src+= stride*4; // src points to begin of the 8x8 Block
235 for(y=0; y<BLOCK_SIZE-1; y++){
236 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
241 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
242 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
243 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
246 return numEq > c->ppMode.flatnessThreshold;
249 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
253 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
255 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
257 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
259 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
265 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269 for(x=0; x<BLOCK_SIZE; x+=4){
270 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
271 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
272 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
273 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
278 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
280 if( isHorizDC_C(src, stride, c) ){
281 if( isHorizMinMaxOk_C(src, stride, c->QP) )
290 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
292 if( isVertDC_C(src, stride, c) ){
293 if( isVertMinMaxOk_C(src, stride, c->QP) )
302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
305 for(y=0; y<BLOCK_SIZE; y++){
306 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
308 if(FFABS(middleEnergy) < 8*c->QP){
309 const int q=(dst[3] - dst[4])/2;
310 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
311 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
313 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
317 d*= FFSIGN(-middleEnergy);
338 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
339 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
344 for(y=0; y<BLOCK_SIZE; y++){
345 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
346 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
349 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
350 sums[1] = sums[0] - first + dst[3];
351 sums[2] = sums[1] - first + dst[4];
352 sums[3] = sums[2] - first + dst[5];
353 sums[4] = sums[3] - first + dst[6];
354 sums[5] = sums[4] - dst[0] + dst[7];
355 sums[6] = sums[5] - dst[1] + last;
356 sums[7] = sums[6] - dst[2] + last;
357 sums[8] = sums[7] - dst[3] + last;
358 sums[9] = sums[8] - dst[4] + last;
360 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
361 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
362 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
363 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
364 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
365 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
366 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
367 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
374 * Experimental Filter 1 (Horizontal)
375 * will not damage linear gradients
376 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
377 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
378 * MMX2 version does correct clipping C version does not
379 * not identical with the vertical one
381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
384 static uint64_t *lut= NULL;
388 lut = av_malloc(256*8);
391 int v= i < 128 ? 2*i : 2*(i-256);
393 //Simulate 112242211 9-Tap filter
394 uint64_t a= (v/16) & 0xFF;
395 uint64_t b= (v/8) & 0xFF;
396 uint64_t c= (v/4) & 0xFF;
397 uint64_t d= (3*v/8) & 0xFF;
399 //Simulate piecewise linear interpolation
400 uint64_t a= (v/16) & 0xFF;
401 uint64_t b= (v*3/16) & 0xFF;
402 uint64_t c= (v*5/16) & 0xFF;
403 uint64_t d= (7*v/16) & 0xFF;
404 uint64_t A= (0x100 - a)&0xFF;
405 uint64_t B= (0x100 - b)&0xFF;
406 uint64_t C= (0x100 - c)&0xFF;
407 uint64_t D= (0x100 - c)&0xFF;
409 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
410 (D<<24) | (C<<16) | (B<<8) | (A);
411 //lut[i] = (v<<32) | (v<<24);
415 for(y=0; y<BLOCK_SIZE; y++){
416 int a= src[1] - src[2];
417 int b= src[3] - src[4];
418 int c= src[5] - src[6];
420 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
423 int v = d * FFSIGN(-b);
437 * accurate deblock filter
439 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
442 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
443 const int dcThreshold= dcOffset*2 + 1;
445 src+= step*4; // src points to begin of the 8x8 Block
449 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
450 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
451 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
452 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
453 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
454 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
455 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
456 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
457 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
458 if(numEq > c->ppMode.flatnessThreshold){
461 if(src[0] > src[step]){
469 if(src[x*step] > src[(x+1)*step]){
470 if(src[x *step] > max) max= src[ x *step];
471 if(src[(x+1)*step] < min) min= src[(x+1)*step];
473 if(src[(x+1)*step] > max) max= src[(x+1)*step];
474 if(src[ x *step] < min) min= src[ x *step];
478 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
479 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
482 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
483 sums[1] = sums[0] - first + src[3*step];
484 sums[2] = sums[1] - first + src[4*step];
485 sums[3] = sums[2] - first + src[5*step];
486 sums[4] = sums[3] - first + src[6*step];
487 sums[5] = sums[4] - src[0*step] + src[7*step];
488 sums[6] = sums[5] - src[1*step] + last;
489 sums[7] = sums[6] - src[2*step] + last;
490 sums[8] = sums[7] - src[3*step] + last;
491 sums[9] = sums[8] - src[4*step] + last;
493 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
494 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
495 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
496 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
497 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
498 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
499 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
500 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
503 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
505 if(FFABS(middleEnergy) < 8*QP){
506 const int q=(src[3*step] - src[4*step])/2;
507 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
508 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
510 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
514 d*= FFSIGN(-middleEnergy);
538 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
540 //we always compile C for testing which needs bitexactness
544 #define COMPILE_ALTIVEC
545 #endif //HAVE_ALTIVEC
547 #if ARCH_X86 && HAVE_INLINE_ASM
549 #if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
553 #if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
557 #if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
558 #define COMPILE_3DNOW
560 #endif /* ARCH_X86 */
562 #undef HAVE_MMX_INLINE
563 #define HAVE_MMX_INLINE 0
564 #undef HAVE_MMXEXT_INLINE
565 #define HAVE_MMXEXT_INLINE 0
566 #undef HAVE_AMD3DNOW_INLINE
567 #define HAVE_AMD3DNOW_INLINE 0
569 #define HAVE_ALTIVEC 0
572 #define RENAME(a) a ## _C
573 #include "postprocess_template.c"
576 #ifdef COMPILE_ALTIVEC
579 #define HAVE_ALTIVEC 1
580 #define RENAME(a) a ## _altivec
581 #include "postprocess_altivec_template.c"
582 #include "postprocess_template.c"
588 #undef HAVE_MMX_INLINE
589 #define HAVE_MMX_INLINE 1
590 #define RENAME(a) a ## _MMX
591 #include "postprocess_template.c"
597 #undef HAVE_MMX_INLINE
598 #undef HAVE_MMXEXT_INLINE
599 #define HAVE_MMX_INLINE 1
600 #define HAVE_MMXEXT_INLINE 1
601 #define RENAME(a) a ## _MMX2
602 #include "postprocess_template.c"
608 #undef HAVE_MMX_INLINE
609 #undef HAVE_MMXEXT_INLINE
610 #undef HAVE_AMD3DNOW_INLINE
611 #define HAVE_MMX_INLINE 1
612 #define HAVE_MMXEXT_INLINE 0
613 #define HAVE_AMD3DNOW_INLINE 1
614 #define RENAME(a) a ## _3DNow
615 #include "postprocess_template.c"
618 // minor note: the HAVE_xyz is messed up after that line so do not use it.
620 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
621 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
623 PPContext *c= (PPContext *)vc;
624 PPMode *ppMode= (PPMode *)vm;
625 c->ppMode= *ppMode; //FIXME
627 if(ppMode->lumMode & BITEXACT) {
628 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
632 // Using ifs here as they are faster than function pointers although the
633 // difference would not be measurable here but it is much better because
634 // someone might exchange the CPU whithout restarting MPlayer ;)
635 #if CONFIG_RUNTIME_CPUDETECT
636 #if ARCH_X86 && HAVE_INLINE_ASM
637 // ordered per speed fastest first
638 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
639 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
640 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
641 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
642 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
643 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
645 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
648 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
649 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
652 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
654 #else /* CONFIG_RUNTIME_CPUDETECT */
655 #if HAVE_MMXEXT_INLINE
656 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 #elif HAVE_AMD3DNOW_INLINE
658 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 #elif HAVE_MMX_INLINE
660 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
662 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 #endif /* !CONFIG_RUNTIME_CPUDETECT */
669 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
670 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
672 /* -pp Command line Help
674 #if LIBPOSTPROC_VERSION_INT < (52<<16)
675 const char *const pp_help=
677 const char pp_help[] =
679 "Available postprocessing filters:\n"
681 "short long name short long option Description\n"
682 "* * a autoq CPU power dependent enabler\n"
683 " c chrom chrominance filtering enabled\n"
684 " y nochrom chrominance filtering disabled\n"
685 " n noluma luma filtering disabled\n"
686 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
687 " 1. difference factor: default=32, higher -> more deblocking\n"
688 " 2. flatness threshold: default=39, lower -> more deblocking\n"
689 " the h & v deblocking filters share these\n"
690 " so you can't set different thresholds for h / v\n"
691 "vb vdeblock (2 threshold) vertical deblocking filter\n"
692 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
693 "va vadeblock (2 threshold) vertical deblocking filter\n"
694 "h1 x1hdeblock experimental h deblock filter 1\n"
695 "v1 x1vdeblock experimental v deblock filter 1\n"
696 "dr dering deringing filter\n"
697 "al autolevels automatic brightness / contrast\n"
698 " f fullyrange stretch luminance to (0..255)\n"
699 "lb linblenddeint linear blend deinterlacer\n"
700 "li linipoldeint linear interpolating deinterlace\n"
701 "ci cubicipoldeint cubic interpolating deinterlacer\n"
702 "md mediandeint median deinterlacer\n"
703 "fd ffmpegdeint ffmpeg deinterlacer\n"
704 "l5 lowpass5 FIR lowpass deinterlacer\n"
705 "de default hb:a,vb:a,dr:a\n"
706 "fa fast h1:a,v1:a,dr:a\n"
707 "ac ha:a:128:7,va:a,dr:a\n"
708 "tn tmpnoise (3 threshold) temporal noise reducer\n"
709 " 1. <= 2. <= 3. larger -> stronger filtering\n"
710 "fq forceQuant <quantizer> force quantizer\n"
712 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
713 "long form example:\n"
714 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
715 "short form example:\n"
716 "vb:a/hb:a/lb de,-vb\n"
722 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
724 char temp[GET_MODE_BUFFER_SIZE];
726 static const char filterDelimiters[] = ",/";
727 static const char optionDelimiters[] = ":";
728 struct PPMode *ppMode;
732 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
736 if (!strcmp(name, "help")) {
738 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
739 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
740 av_log(NULL, AV_LOG_INFO, "%s", temp);
745 ppMode= av_malloc(sizeof(PPMode));
748 ppMode->chromMode= 0;
749 ppMode->maxTmpNoise[0]= 700;
750 ppMode->maxTmpNoise[1]= 1500;
751 ppMode->maxTmpNoise[2]= 3000;
752 ppMode->maxAllowedY= 234;
753 ppMode->minAllowedY= 16;
754 ppMode->baseDcDiff= 256/8;
755 ppMode->flatnessThreshold= 56-16-1;
756 ppMode->maxClippedThreshold= 0.01;
759 memset(temp, 0, GET_MODE_BUFFER_SIZE);
760 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
762 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
766 int q= 1000000; //PP_QUALITY_MAX;
770 char *options[OPTIONS_ARRAY_SIZE];
773 int numOfUnknownOptions=0;
774 int enable=1; //does the user want us to enabled or disabled the filter
776 filterToken= strtok(p, filterDelimiters);
777 if(filterToken == NULL) break;
778 p+= strlen(filterToken) + 1; // p points to next filterToken
779 filterName= strtok(filterToken, optionDelimiters);
780 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
782 if(*filterName == '-'){
787 for(;;){ //for all options
788 option= strtok(NULL, optionDelimiters);
789 if(option == NULL) break;
791 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
792 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
793 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
794 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
795 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
797 options[numOfUnknownOptions] = option;
798 numOfUnknownOptions++;
800 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
802 options[numOfUnknownOptions] = NULL;
804 /* replace stuff from the replace Table */
805 for(i=0; replaceTable[2*i]!=NULL; i++){
806 if(!strcmp(replaceTable[2*i], filterName)){
807 int newlen= strlen(replaceTable[2*i + 1]);
814 spaceLeft= p - temp + plen;
815 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
819 memmove(p + newlen, p, plen+1);
820 memcpy(p, replaceTable[2*i + 1], newlen);
825 for(i=0; filters[i].shortName!=NULL; i++){
826 if( !strcmp(filters[i].longName, filterName)
827 || !strcmp(filters[i].shortName, filterName)){
828 ppMode->lumMode &= ~filters[i].mask;
829 ppMode->chromMode &= ~filters[i].mask;
832 if(!enable) break; // user wants to disable it
834 if(q >= filters[i].minLumQuality && luma)
835 ppMode->lumMode|= filters[i].mask;
836 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
837 if(q >= filters[i].minChromQuality)
838 ppMode->chromMode|= filters[i].mask;
840 if(filters[i].mask == LEVEL_FIX){
842 ppMode->minAllowedY= 16;
843 ppMode->maxAllowedY= 234;
844 for(o=0; options[o]!=NULL; o++){
845 if( !strcmp(options[o],"fullyrange")
846 ||!strcmp(options[o],"f")){
847 ppMode->minAllowedY= 0;
848 ppMode->maxAllowedY= 255;
849 numOfUnknownOptions--;
853 else if(filters[i].mask == TEMP_NOISE_FILTER)
858 for(o=0; options[o]!=NULL; o++){
860 ppMode->maxTmpNoise[numOfNoises]=
861 strtol(options[o], &tail, 0);
862 if(tail!=options[o]){
864 numOfUnknownOptions--;
865 if(numOfNoises >= 3) break;
869 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
870 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
873 for(o=0; options[o]!=NULL && o<2; o++){
875 int val= strtol(options[o], &tail, 0);
876 if(tail==options[o]) break;
878 numOfUnknownOptions--;
879 if(o==0) ppMode->baseDcDiff= val;
880 else ppMode->flatnessThreshold= val;
883 else if(filters[i].mask == FORCE_QUANT){
885 ppMode->forcedQuant= 15;
887 for(o=0; options[o]!=NULL && o<1; o++){
889 int val= strtol(options[o], &tail, 0);
890 if(tail==options[o]) break;
892 numOfUnknownOptions--;
893 ppMode->forcedQuant= val;
898 if(!filterNameOk) ppMode->error++;
899 ppMode->error += numOfUnknownOptions;
902 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
904 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
911 void pp_free_mode(pp_mode *mode){
915 static void reallocAlign(void **p, int alignment, int size){
917 *p= av_mallocz(size);
920 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
921 int mbWidth = (width+15)>>4;
922 int mbHeight= (height+15)>>4;
926 c->qpStride= qpStride;
928 reallocAlign((void **)&c->tempDst, 8, stride*24);
929 reallocAlign((void **)&c->tempSrc, 8, stride*24);
930 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
931 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
933 c->yHistogram[i]= width*height/64*15/256;
936 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
937 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
938 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
941 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
942 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
943 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
944 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
947 static const char * context_to_name(void * ptr) {
951 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
953 pp_context *pp_get_context(int width, int height, int cpuCaps){
954 PPContext *c= av_malloc(sizeof(PPContext));
955 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
956 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
958 memset(c, 0, sizeof(PPContext));
959 c->av_class = &av_codec_context_class;
961 if(cpuCaps&PP_FORMAT){
962 c->hChromaSubSample= cpuCaps&0x3;
963 c->vChromaSubSample= (cpuCaps>>4)&0x3;
965 c->hChromaSubSample= 1;
966 c->vChromaSubSample= 1;
969 reallocBuffers(c, width, height, stride, qpStride);
976 void pp_free_context(void *vc){
977 PPContext *c = (PPContext*)vc;
980 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
981 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
983 av_free(c->tempBlocks);
984 av_free(c->yHistogram);
987 av_free(c->deintTemp);
988 av_free(c->stdQPTable);
989 av_free(c->nonBQPTable);
990 av_free(c->forcedQPTable);
992 memset(c, 0, sizeof(PPContext));
997 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
998 uint8_t * dst[3], const int dstStride[3],
999 int width, int height,
1000 const QP_STORE_T *QP_store, int QPStride,
1001 pp_mode *vm, void *vc, int pict_type)
1003 int mbWidth = (width+15)>>4;
1004 int mbHeight= (height+15)>>4;
1005 PPMode *mode = (PPMode*)vm;
1006 PPContext *c = (PPContext*)vc;
1007 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1008 int absQPStride = FFABS(QPStride);
1010 // c->stride and c->QPStride are always positive
1011 if(c->stride < minStride || c->qpStride < absQPStride)
1012 reallocBuffers(c, width, height,
1013 FFMAX(minStride, c->stride),
1014 FFMAX(c->qpStride, absQPStride));
1016 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1018 QP_store= c->forcedQPTable;
1019 absQPStride = QPStride = 0;
1020 if(mode->lumMode & FORCE_QUANT)
1021 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1023 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1026 if(pict_type & PP_PICT_TYPE_QP2){
1028 const int count= mbHeight * absQPStride;
1029 for(i=0; i<(count>>2); i++){
1030 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1032 for(i<<=2; i<count; i++){
1033 c->stdQPTable[i] = QP_store[i]>>1;
1035 QP_store= c->stdQPTable;
1036 QPStride= absQPStride;
1041 for(y=0; y<mbHeight; y++){
1042 for(x=0; x<mbWidth; x++){
1043 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1045 av_log(c, AV_LOG_INFO, "\n");
1047 av_log(c, AV_LOG_INFO, "\n");
1050 if((pict_type&7)!=3){
1053 const int count= mbHeight * QPStride;
1054 for(i=0; i<(count>>2); i++){
1055 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1057 for(i<<=2; i<count; i++){
1058 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1062 for(i=0; i<mbHeight; i++) {
1063 for(j=0; j<absQPStride; j++) {
1064 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1070 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1071 mode->lumMode, mode->chromMode);
1073 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1074 width, height, QP_store, QPStride, 0, mode, c);
1076 width = (width )>>c->hChromaSubSample;
1077 height = (height)>>c->vChromaSubSample;
1079 if(mode->chromMode){
1080 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1081 width, height, QP_store, QPStride, 1, mode, c);
1082 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1083 width, height, QP_store, QPStride, 2, mode, c);
1085 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1086 linecpy(dst[1], src[1], height, srcStride[1]);
1087 linecpy(dst[2], src[2], height, srcStride[2]);
1090 for(y=0; y<height; y++){
1091 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1092 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);