git.sesse.net Git - ffmpeg/blob - libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use git log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include <inttypes.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 //#undef HAVE_MMX2
  83 //#define HAVE_AMD3DNOW
  84 //#undef HAVE_MMX
  85 //#undef ARCH_X86
  86 //#define DEBUG_BRIGHTNESS
  87 #include "postprocess.h"
  88 #include "postprocess_internal.h"
  89
  90 unsigned postproc_version(void)
  91 {
  92     return LIBPOSTPROC_VERSION_INT;
  93 }
  94
  95 const char *postproc_configuration(void)
  96 {
  97     return FFMPEG_CONFIGURATION;
  98 }
  99
 100 const char *postproc_license(void)
 101 {
 102 #define LICENSE_PREFIX "libpostproc license: "
 103     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 104 }
 105
 106 #if HAVE_ALTIVEC_H
 107 #include <altivec.h>
 108 #endif
 109
 110 #define GET_MODE_BUFFER_SIZE 500
 111 #define OPTIONS_ARRAY_SIZE 10
 112 #define BLOCK_SIZE 8
 113 #define TEMP_STRIDE 8
 114 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 115
 116 #if ARCH_X86
 117 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 118 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 119 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 120 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 121 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 122 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 123 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 124 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 125 #endif
 126
 127 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 128
 129
 130 static struct PPFilter filters[]=
 131 {
 132     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 133     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 134 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 135     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 136     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 137     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 138     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 139     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 140     {"dr", "dering",                1, 5, 6, DERING},
 141     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 142     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 143     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 144     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 145     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 146     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 147     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 148     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 149     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 150     {NULL, NULL,0,0,0,0} //End Marker
 151 };
 152
 153 static const char *replaceTable[]=
 154 {
 155     "default",      "hb:a,vb:a,dr:a",
 156     "de",           "hb:a,vb:a,dr:a",
 157     "fast",         "h1:a,v1:a,dr:a",
 158     "fa",           "h1:a,v1:a,dr:a",
 159     "ac",           "ha:a:128:7,va:a,dr:a",
 160     NULL //End Marker
 161 };
 162
 163
 164 #if ARCH_X86
 165 static inline void prefetchnta(void *p)
 166 {
 167     __asm__ volatile(   "prefetchnta (%0)\n\t"
 168         : : "r" (p)
 169     );
 170 }
 171
 172 static inline void prefetcht0(void *p)
 173 {
 174     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 175         : : "r" (p)
 176     );
 177 }
 178
 179 static inline void prefetcht1(void *p)
 180 {
 181     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 182         : : "r" (p)
 183     );
 184 }
 185
 186 static inline void prefetcht2(void *p)
 187 {
 188     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 189         : : "r" (p)
 190     );
 191 }
 192 #endif
 193
 194 /* The horizontal functions exist only in C because the MMX
 195  * code is faster with vertical filters and transposing. */
 196
 197 /**
 198  * Check if the given 8x8 Block is mostly "flat"
 199  */
 200 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 201 {
 202     int numEq= 0;
 203     int y;
 204     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 205     const int dcThreshold= dcOffset*2 + 1;
 206
 207     for(y=0; y<BLOCK_SIZE; y++){
 208         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 209         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 210         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 211         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 212         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 213         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 214         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 215         src+= stride;
 216     }
 217     return numEq > c->ppMode.flatnessThreshold;
 218 }
 219
 220 /**
 221  * Check if the middle 8x8 Block in the given 8x16 block is flat
 222  */
 223 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
 224 {
 225     int numEq= 0;
 226     int y;
 227     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 228     const int dcThreshold= dcOffset*2 + 1;
 229
 230     src+= stride*4; // src points to begin of the 8x8 Block
 231     for(y=0; y<BLOCK_SIZE-1; y++){
 232         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 233         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 234         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 235         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 236         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 237         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 238         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 239         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 240         src+= stride;
 241     }
 242     return numEq > c->ppMode.flatnessThreshold;
 243 }
 244
 245 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 246 {
 247     int i;
 248 #if 1
 249     for(i=0; i<2; i++){
 250         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 251         src += stride;
 252         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 253         src += stride;
 254         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 255         src += stride;
 256         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 257         src += stride;
 258     }
 259 #else
 260     for(i=0; i<8; i++){
 261         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 262         src += stride;
 263     }
 264 #endif
 265     return 1;
 266 }
 267
 268 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 269 {
 270 #if 1
 271 #if 1
 272     int x;
 273     src+= stride*4;
 274     for(x=0; x<BLOCK_SIZE; x+=4){
 275         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 276         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 277         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 278         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 279     }
 280 #else
 281     int x;
 282     src+= stride*3;
 283     for(x=0; x<BLOCK_SIZE; x++){
 284         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 285     }
 286 #endif
 287     return 1;
 288 #else
 289     int x;
 290     src+= stride*4;
 291     for(x=0; x<BLOCK_SIZE; x++){
 292         int min=255;
 293         int max=0;
 294         int y;
 295         for(y=0; y<8; y++){
 296             int v= src[x + y*stride];
 297             if(v>max) max=v;
 298             if(v<min) min=v;
 299         }
 300         if(max-min > 2*QP) return 0;
 301     }
 302     return 1;
 303 #endif
 304 }
 305
 306 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
 307 {
 308     if( isHorizDC_C(src, stride, c) ){
 309         if( isHorizMinMaxOk_C(src, stride, c->QP) )
 310             return 1;
 311         else
 312             return 0;
 313     }else{
 314         return 2;
 315     }
 316 }
 317
 318 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
 319 {
 320     if( isVertDC_C(src, stride, c) ){
 321         if( isVertMinMaxOk_C(src, stride, c->QP) )
 322             return 1;
 323         else
 324             return 0;
 325     }else{
 326         return 2;
 327     }
 328 }
 329
 330 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 331 {
 332     int y;
 333     for(y=0; y<BLOCK_SIZE; y++){
 334         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 335
 336         if(FFABS(middleEnergy) < 8*c->QP){
 337             const int q=(dst[3] - dst[4])/2;
 338             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 339             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 340
 341             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 342             d= FFMAX(d, 0);
 343
 344             d= (5*d + 32) >> 6;
 345             d*= FFSIGN(-middleEnergy);
 346
 347             if(q>0)
 348             {
 349                 d= d<0 ? 0 : d;
 350                 d= d>q ? q : d;
 351             }
 352             else
 353             {
 354                 d= d>0 ? 0 : d;
 355                 d= d<q ? q : d;
 356             }
 357
 358             dst[3]-= d;
 359             dst[4]+= d;
 360         }
 361         dst+= stride;
 362     }
 363 }
 364
 365 /**
 366  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 367  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 368  */
 369 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 370 {
 371     int y;
 372     for(y=0; y<BLOCK_SIZE; y++){
 373         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 374         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 375
 376         int sums[10];
 377         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 378         sums[1] = sums[0] - first  + dst[3];
 379         sums[2] = sums[1] - first  + dst[4];
 380         sums[3] = sums[2] - first  + dst[5];
 381         sums[4] = sums[3] - first  + dst[6];
 382         sums[5] = sums[4] - dst[0] + dst[7];
 383         sums[6] = sums[5] - dst[1] + last;
 384         sums[7] = sums[6] - dst[2] + last;
 385         sums[8] = sums[7] - dst[3] + last;
 386         sums[9] = sums[8] - dst[4] + last;
 387
 388         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 389         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 390         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 391         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 392         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 393         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 394         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 395         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 396
 397         dst+= stride;
 398     }
 399 }
 400
 401 /**
 402  * Experimental Filter 1 (Horizontal)
 403  * will not damage linear gradients
 404  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 405  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 406  * MMX2 version does correct clipping C version does not
 407  * not identical with the vertical one
 408  */
 409 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 410 {
 411     int y;
 412     static uint64_t *lut= NULL;
 413     if(lut==NULL)
 414     {
 415         int i;
 416         lut = av_malloc(256*8);
 417         for(i=0; i<256; i++)
 418         {
 419             int v= i < 128 ? 2*i : 2*(i-256);
 420 /*
 421 //Simulate 112242211 9-Tap filter
 422             uint64_t a= (v/16)  & 0xFF;
 423             uint64_t b= (v/8)   & 0xFF;
 424             uint64_t c= (v/4)   & 0xFF;
 425             uint64_t d= (3*v/8) & 0xFF;
 426 */
 427 //Simulate piecewise linear interpolation
 428             uint64_t a= (v/16)   & 0xFF;
 429             uint64_t b= (v*3/16) & 0xFF;
 430             uint64_t c= (v*5/16) & 0xFF;
 431             uint64_t d= (7*v/16) & 0xFF;
 432             uint64_t A= (0x100 - a)&0xFF;
 433             uint64_t B= (0x100 - b)&0xFF;
 434             uint64_t C= (0x100 - c)&0xFF;
 435             uint64_t D= (0x100 - c)&0xFF;
 436
 437             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 438                        (D<<24) | (C<<16) | (B<<8)  | (A);
 439             //lut[i] = (v<<32) | (v<<24);
 440         }
 441     }
 442
 443     for(y=0; y<BLOCK_SIZE; y++){
 444         int a= src[1] - src[2];
 445         int b= src[3] - src[4];
 446         int c= src[5] - src[6];
 447
 448         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 449
 450         if(d < QP){
 451             int v = d * FFSIGN(-b);
 452
 453             src[1] +=v/8;
 454             src[2] +=v/4;
 455             src[3] +=3*v/8;
 456             src[4] -=3*v/8;
 457             src[5] -=v/4;
 458             src[6] -=v/8;
 459         }
 460         src+=stride;
 461     }
 462 }
 463
 464 /**
 465  * accurate deblock filter
 466  */
 467 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 468     int y;
 469     const int QP= c->QP;
 470     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 471     const int dcThreshold= dcOffset*2 + 1;
 472 //START_TIMER
 473     src+= step*4; // src points to begin of the 8x8 Block
 474     for(y=0; y<8; y++){
 475         int numEq= 0;
 476
 477         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 478         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 479         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 480         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 481         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 482         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 483         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 484         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 485         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 486         if(numEq > c->ppMode.flatnessThreshold){
 487             int min, max, x;
 488
 489             if(src[0] > src[step]){
 490                 max= src[0];
 491                 min= src[step];
 492             }else{
 493                 max= src[step];
 494                 min= src[0];
 495             }
 496             for(x=2; x<8; x+=2){
 497                 if(src[x*step] > src[(x+1)*step]){
 498                         if(src[x    *step] > max) max= src[ x   *step];
 499                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 500                 }else{
 501                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 502                         if(src[ x   *step] < min) min= src[ x   *step];
 503                 }
 504             }
 505             if(max-min < 2*QP){
 506                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 507                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 508
 509                 int sums[10];
 510                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 511                 sums[1] = sums[0] - first       + src[3*step];
 512                 sums[2] = sums[1] - first       + src[4*step];
 513                 sums[3] = sums[2] - first       + src[5*step];
 514                 sums[4] = sums[3] - first       + src[6*step];
 515                 sums[5] = sums[4] - src[0*step] + src[7*step];
 516                 sums[6] = sums[5] - src[1*step] + last;
 517                 sums[7] = sums[6] - src[2*step] + last;
 518                 sums[8] = sums[7] - src[3*step] + last;
 519                 sums[9] = sums[8] - src[4*step] + last;
 520
 521                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 522                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 523                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 524                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 525                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 526                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 527                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 528                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 529             }
 530         }else{
 531             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 532
 533             if(FFABS(middleEnergy) < 8*QP){
 534                 const int q=(src[3*step] - src[4*step])/2;
 535                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 536                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 537
 538                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 539                 d= FFMAX(d, 0);
 540
 541                 d= (5*d + 32) >> 6;
 542                 d*= FFSIGN(-middleEnergy);
 543
 544                 if(q>0){
 545                     d= d<0 ? 0 : d;
 546                     d= d>q ? q : d;
 547                 }else{
 548                     d= d>0 ? 0 : d;
 549                     d= d<q ? q : d;
 550                 }
 551
 552                 src[3*step]-= d;
 553                 src[4*step]+= d;
 554             }
 555         }
 556
 557         src += stride;
 558     }
 559 /*if(step==16){
 560     STOP_TIMER("step16")
 561 }else{
 562     STOP_TIMER("stepX")
 563 }*/
 564 }
 565
 566 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 567 //Plain C versions
 568 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
 569 #define COMPILE_C
 570 #endif
 571
 572 #if HAVE_ALTIVEC
 573 #define COMPILE_ALTIVEC
 574 #endif //HAVE_ALTIVEC
 575
 576 #if ARCH_X86
 577
 578 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 579 #define COMPILE_MMX
 580 #endif
 581
 582 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
 583 #define COMPILE_MMX2
 584 #endif
 585
 586 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 587 #define COMPILE_3DNOW
 588 #endif
 589 #endif /* ARCH_X86 */
 590
 591 #undef HAVE_MMX
 592 #define HAVE_MMX 0
 593 #undef HAVE_MMX2
 594 #define HAVE_MMX2 0
 595 #undef HAVE_AMD3DNOW
 596 #define HAVE_AMD3DNOW 0
 597 #undef HAVE_ALTIVEC
 598 #define HAVE_ALTIVEC 0
 599
 600 #ifdef COMPILE_C
 601 #define RENAME(a) a ## _C
 602 #include "postprocess_template.c"
 603 #endif
 604
 605 #ifdef COMPILE_ALTIVEC
 606 #undef RENAME
 607 #undef HAVE_ALTIVEC
 608 #define HAVE_ALTIVEC 1
 609 #define RENAME(a) a ## _altivec
 610 #include "postprocess_altivec_template.c"
 611 #include "postprocess_template.c"
 612 #endif
 613
 614 //MMX versions
 615 #ifdef COMPILE_MMX
 616 #undef RENAME
 617 #undef HAVE_MMX
 618 #define HAVE_MMX 1
 619 #define RENAME(a) a ## _MMX
 620 #include "postprocess_template.c"
 621 #endif
 622
 623 //MMX2 versions
 624 #ifdef COMPILE_MMX2
 625 #undef RENAME
 626 #undef HAVE_MMX
 627 #undef HAVE_MMX2
 628 #define HAVE_MMX 1
 629 #define HAVE_MMX2 1
 630 #define RENAME(a) a ## _MMX2
 631 #include "postprocess_template.c"
 632 #endif
 633
 634 //3DNOW versions
 635 #ifdef COMPILE_3DNOW
 636 #undef RENAME
 637 #undef HAVE_MMX
 638 #undef HAVE_MMX2
 639 #undef HAVE_AMD3DNOW
 640 #define HAVE_MMX 1
 641 #define HAVE_MMX2 0
 642 #define HAVE_AMD3DNOW 1
 643 #define RENAME(a) a ## _3DNow
 644 #include "postprocess_template.c"
 645 #endif
 646
 647 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 648
 649 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 650         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 651 {
 652     PPContext *c= (PPContext *)vc;
 653     PPMode *ppMode= (PPMode *)vm;
 654     c->ppMode= *ppMode; //FIXME
 655
 656     // Using ifs here as they are faster than function pointers although the
 657     // difference would not be measurable here but it is much better because
 658     // someone might exchange the CPU whithout restarting MPlayer ;)
 659 #if CONFIG_RUNTIME_CPUDETECT
 660 #if ARCH_X86
 661     // ordered per speed fastest first
 662     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 663         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 664     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 665         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 666     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 667         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 668     else
 669         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 670 #else
 671 #if HAVE_ALTIVEC
 672     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 673             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 674     else
 675 #endif
 676             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 677 #endif
 678 #else //CONFIG_RUNTIME_CPUDETECT
 679 #if   HAVE_MMX2
 680             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 681 #elif HAVE_AMD3DNOW
 682             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 683 #elif HAVE_MMX
 684             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 685 #elif HAVE_ALTIVEC
 686             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 687 #else
 688             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 689 #endif
 690 #endif //!CONFIG_RUNTIME_CPUDETECT
 691 }
 692
 693 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 694 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 695
 696 /* -pp Command line Help
 697 */
 698 #if LIBPOSTPROC_VERSION_INT < (52<<16)
 699 const char *const pp_help=
 700 #else
 701 const char pp_help[] =
 702 #endif
 703 "Available postprocessing filters:\n"
 704 "Filters                        Options\n"
 705 "short  long name       short   long option     Description\n"
 706 "*      *               a       autoq           CPU power dependent enabler\n"
 707 "                       c       chrom           chrominance filtering enabled\n"
 708 "                       y       nochrom         chrominance filtering disabled\n"
 709 "                       n       noluma          luma filtering disabled\n"
 710 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 711 "       1. difference factor: default=32, higher -> more deblocking\n"
 712 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 713 "                       the h & v deblocking filters share these\n"
 714 "                       so you can't set different thresholds for h / v\n"
 715 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 716 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 717 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 718 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 719 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 720 "dr     dering                                  deringing filter\n"
 721 "al     autolevels                              automatic brightness / contrast\n"
 722 "                       f        fullyrange     stretch luminance to (0..255)\n"
 723 "lb     linblenddeint                           linear blend deinterlacer\n"
 724 "li     linipoldeint                            linear interpolating deinterlace\n"
 725 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 726 "md     mediandeint                             median deinterlacer\n"
 727 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 728 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 729 "de     default                                 hb:a,vb:a,dr:a\n"
 730 "fa     fast                                    h1:a,v1:a,dr:a\n"
 731 "ac                                             ha:a:128:7,va:a,dr:a\n"
 732 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 733 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 734 "fq     forceQuant      <quantizer>             force quantizer\n"
 735 "Usage:\n"
 736 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 737 "long form example:\n"
 738 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 739 "short form example:\n"
 740 "vb:a/hb:a/lb                                   de,-vb\n"
 741 "more examples:\n"
 742 "tn:64:128:256\n"
 743 "\n"
 744 ;
 745
 746 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 747 {
 748     char temp[GET_MODE_BUFFER_SIZE];
 749     char *p= temp;
 750     static const char filterDelimiters[] = ",/";
 751     static const char optionDelimiters[] = ":";
 752     struct PPMode *ppMode;
 753     char *filterToken;
 754
 755     ppMode= av_malloc(sizeof(PPMode));
 756
 757     ppMode->lumMode= 0;
 758     ppMode->chromMode= 0;
 759     ppMode->maxTmpNoise[0]= 700;
 760     ppMode->maxTmpNoise[1]= 1500;
 761     ppMode->maxTmpNoise[2]= 3000;
 762     ppMode->maxAllowedY= 234;
 763     ppMode->minAllowedY= 16;
 764     ppMode->baseDcDiff= 256/8;
 765     ppMode->flatnessThreshold= 56-16-1;
 766     ppMode->maxClippedThreshold= 0.01;
 767     ppMode->error=0;
 768
 769 #undef strncpy
 770     strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 771
 772     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 773
 774     for(;;){
 775         char *filterName;
 776         int q= 1000000; //PP_QUALITY_MAX;
 777         int chrom=-1;
 778         int luma=-1;
 779         char *option;
 780         char *options[OPTIONS_ARRAY_SIZE];
 781         int i;
 782         int filterNameOk=0;
 783         int numOfUnknownOptions=0;
 784         int enable=1; //does the user want us to enabled or disabled the filter
 785
 786         filterToken= strtok(p, filterDelimiters);
 787         if(filterToken == NULL) break;
 788         p+= strlen(filterToken) + 1; // p points to next filterToken
 789         filterName= strtok(filterToken, optionDelimiters);
 790         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 791
 792         if(*filterName == '-'){
 793             enable=0;
 794             filterName++;
 795         }
 796
 797         for(;;){ //for all options
 798             option= strtok(NULL, optionDelimiters);
 799             if(option == NULL) break;
 800
 801             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 802             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 803             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 804             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 805             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 806             else{
 807                 options[numOfUnknownOptions] = option;
 808                 numOfUnknownOptions++;
 809             }
 810             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 811         }
 812         options[numOfUnknownOptions] = NULL;
 813
 814         /* replace stuff from the replace Table */
 815         for(i=0; replaceTable[2*i]!=NULL; i++){
 816             if(!strcmp(replaceTable[2*i], filterName)){
 817                 int newlen= strlen(replaceTable[2*i + 1]);
 818                 int plen;
 819                 int spaceLeft;
 820
 821                 if(p==NULL) p= temp, *p=0;      //last filter
 822                 else p--, *p=',';               //not last filter
 823
 824                 plen= strlen(p);
 825                 spaceLeft= p - temp + plen;
 826                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
 827                     ppMode->error++;
 828                     break;
 829                 }
 830                 memmove(p + newlen, p, plen+1);
 831                 memcpy(p, replaceTable[2*i + 1], newlen);
 832                 filterNameOk=1;
 833             }
 834         }
 835
 836         for(i=0; filters[i].shortName!=NULL; i++){
 837             if(   !strcmp(filters[i].longName, filterName)
 838                || !strcmp(filters[i].shortName, filterName)){
 839                 ppMode->lumMode &= ~filters[i].mask;
 840                 ppMode->chromMode &= ~filters[i].mask;
 841
 842                 filterNameOk=1;
 843                 if(!enable) break; // user wants to disable it
 844
 845                 if(q >= filters[i].minLumQuality && luma)
 846                     ppMode->lumMode|= filters[i].mask;
 847                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 848                     if(q >= filters[i].minChromQuality)
 849                             ppMode->chromMode|= filters[i].mask;
 850
 851                 if(filters[i].mask == LEVEL_FIX){
 852                     int o;
 853                     ppMode->minAllowedY= 16;
 854                     ppMode->maxAllowedY= 234;
 855                     for(o=0; options[o]!=NULL; o++){
 856                         if(  !strcmp(options[o],"fullyrange")
 857                            ||!strcmp(options[o],"f")){
 858                             ppMode->minAllowedY= 0;
 859                             ppMode->maxAllowedY= 255;
 860                             numOfUnknownOptions--;
 861                         }
 862                     }
 863                 }
 864                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 865                 {
 866                     int o;
 867                     int numOfNoises=0;
 868
 869                     for(o=0; options[o]!=NULL; o++){
 870                         char *tail;
 871                         ppMode->maxTmpNoise[numOfNoises]=
 872                             strtol(options[o], &tail, 0);
 873                         if(tail!=options[o]){
 874                             numOfNoises++;
 875                             numOfUnknownOptions--;
 876                             if(numOfNoises >= 3) break;
 877                         }
 878                     }
 879                 }
 880                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 881                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 882                     int o;
 883
 884                     for(o=0; options[o]!=NULL && o<2; o++){
 885                         char *tail;
 886                         int val= strtol(options[o], &tail, 0);
 887                         if(tail==options[o]) break;
 888
 889                         numOfUnknownOptions--;
 890                         if(o==0) ppMode->baseDcDiff= val;
 891                         else ppMode->flatnessThreshold= val;
 892                     }
 893                 }
 894                 else if(filters[i].mask == FORCE_QUANT){
 895                     int o;
 896                     ppMode->forcedQuant= 15;
 897
 898                     for(o=0; options[o]!=NULL && o<1; o++){
 899                         char *tail;
 900                         int val= strtol(options[o], &tail, 0);
 901                         if(tail==options[o]) break;
 902
 903                         numOfUnknownOptions--;
 904                         ppMode->forcedQuant= val;
 905                     }
 906                 }
 907             }
 908         }
 909         if(!filterNameOk) ppMode->error++;
 910         ppMode->error += numOfUnknownOptions;
 911     }
 912
 913     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 914     if(ppMode->error){
 915         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 916         av_free(ppMode);
 917         return NULL;
 918     }
 919     return ppMode;
 920 }
 921
 922 void pp_free_mode(pp_mode *mode){
 923     av_free(mode);
 924 }
 925
 926 static void reallocAlign(void **p, int alignment, int size){
 927     av_free(*p);
 928     *p= av_mallocz(size);
 929 }
 930
 931 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 932     int mbWidth = (width+15)>>4;
 933     int mbHeight= (height+15)>>4;
 934     int i;
 935
 936     c->stride= stride;
 937     c->qpStride= qpStride;
 938
 939     reallocAlign((void **)&c->tempDst, 8, stride*24);
 940     reallocAlign((void **)&c->tempSrc, 8, stride*24);
 941     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 942     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 943     for(i=0; i<256; i++)
 944             c->yHistogram[i]= width*height/64*15/256;
 945
 946     for(i=0; i<3; i++){
 947         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
 948         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
 949         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 950     }
 951
 952     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 953     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 954     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 955     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 956 }
 957
 958 static const char * context_to_name(void * ptr) {
 959     return "postproc";
 960 }
 961
 962 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 963
 964 pp_context *pp_get_context(int width, int height, int cpuCaps){
 965     PPContext *c= av_malloc(sizeof(PPContext));
 966     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
 967     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 968
 969     memset(c, 0, sizeof(PPContext));
 970     c->av_class = &av_codec_context_class;
 971     c->cpuCaps= cpuCaps;
 972     if(cpuCaps&PP_FORMAT){
 973         c->hChromaSubSample= cpuCaps&0x3;
 974         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 975     }else{
 976         c->hChromaSubSample= 1;
 977         c->vChromaSubSample= 1;
 978     }
 979
 980     reallocBuffers(c, width, height, stride, qpStride);
 981
 982     c->frameNum=-1;
 983
 984     return c;
 985 }
 986
 987 void pp_free_context(void *vc){
 988     PPContext *c = (PPContext*)vc;
 989     int i;
 990
 991     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
 992     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
 993
 994     av_free(c->tempBlocks);
 995     av_free(c->yHistogram);
 996     av_free(c->tempDst);
 997     av_free(c->tempSrc);
 998     av_free(c->deintTemp);
 999     av_free(c->stdQPTable);
1000     av_free(c->nonBQPTable);
1001     av_free(c->forcedQPTable);
1002
1003     memset(c, 0, sizeof(PPContext));
1004
1005     av_free(c);
1006 }
1007
1008 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1009                      uint8_t * dst[3], const int dstStride[3],
1010                      int width, int height,
1011                      const QP_STORE_T *QP_store,  int QPStride,
1012                      pp_mode *vm,  void *vc, int pict_type)
1013 {
1014     int mbWidth = (width+15)>>4;
1015     int mbHeight= (height+15)>>4;
1016     PPMode *mode = (PPMode*)vm;
1017     PPContext *c = (PPContext*)vc;
1018     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1019     int absQPStride = FFABS(QPStride);
1020
1021     // c->stride and c->QPStride are always positive
1022     if(c->stride < minStride || c->qpStride < absQPStride)
1023         reallocBuffers(c, width, height,
1024                        FFMAX(minStride, c->stride),
1025                        FFMAX(c->qpStride, absQPStride));
1026
1027     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1028         int i;
1029         QP_store= c->forcedQPTable;
1030         absQPStride = QPStride = 0;
1031         if(mode->lumMode & FORCE_QUANT)
1032             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1033         else
1034             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1035     }
1036
1037     if(pict_type & PP_PICT_TYPE_QP2){
1038         int i;
1039         const int count= mbHeight * absQPStride;
1040         for(i=0; i<(count>>2); i++){
1041             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1042         }
1043         for(i<<=2; i<count; i++){
1044             c->stdQPTable[i] = QP_store[i]>>1;
1045         }
1046         QP_store= c->stdQPTable;
1047         QPStride= absQPStride;
1048     }
1049
1050     if(0){
1051         int x,y;
1052         for(y=0; y<mbHeight; y++){
1053             for(x=0; x<mbWidth; x++){
1054                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1055             }
1056             av_log(c, AV_LOG_INFO, "\n");
1057         }
1058         av_log(c, AV_LOG_INFO, "\n");
1059     }
1060
1061     if((pict_type&7)!=3){
1062         if (QPStride >= 0){
1063             int i;
1064             const int count= mbHeight * QPStride;
1065             for(i=0; i<(count>>2); i++){
1066                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1067             }
1068             for(i<<=2; i<count; i++){
1069                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1070             }
1071         } else {
1072             int i,j;
1073             for(i=0; i<mbHeight; i++) {
1074                 for(j=0; j<absQPStride; j++) {
1075                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1076                 }
1077             }
1078         }
1079     }
1080
1081     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1082            mode->lumMode, mode->chromMode);
1083
1084     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1085                 width, height, QP_store, QPStride, 0, mode, c);
1086
1087     width  = (width )>>c->hChromaSubSample;
1088     height = (height)>>c->vChromaSubSample;
1089
1090     if(mode->chromMode){
1091         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1092                     width, height, QP_store, QPStride, 1, mode, c);
1093         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1094                     width, height, QP_store, QPStride, 2, mode, c);
1095     }
1096     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1097         linecpy(dst[1], src[1], height, srcStride[1]);
1098         linecpy(dst[2], src[2], height, srcStride[2]);
1099     }else{
1100         int y;
1101         for(y=0; y<height; y++){
1102             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1103             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1104         }
1105     }
1106 }