git.sesse.net Git - ffmpeg/blob - libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use git log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include "libavutil/avassert.h"
  79 #include <inttypes.h>
  80 #include <stdio.h>
  81 #include <stdlib.h>
  82 #include <string.h>
  83 //#undef HAVE_MMXEXT_INLINE
  84 //#define HAVE_AMD3DNOW_INLINE
  85 //#undef HAVE_MMX_INLINE
  86 //#undef ARCH_X86
  87 //#define DEBUG_BRIGHTNESS
  88 #include "postprocess.h"
  89 #include "postprocess_internal.h"
  90 #include "libavutil/avstring.h"
  91
  92 #include "libavutil/ffversion.h"
  93 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
  94
  95 unsigned postproc_version(void)
  96 {
  97     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
  98     return LIBPOSTPROC_VERSION_INT;
  99 }
 100
 101 const char *postproc_configuration(void)
 102 {
 103     return FFMPEG_CONFIGURATION;
 104 }
 105
 106 const char *postproc_license(void)
 107 {
 108 #define LICENSE_PREFIX "libpostproc license: "
 109     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 110 }
 111
 112 #if HAVE_ALTIVEC_H
 113 #include <altivec.h>
 114 #endif
 115
 116 #define GET_MODE_BUFFER_SIZE 500
 117 #define OPTIONS_ARRAY_SIZE 10
 118 #define BLOCK_SIZE 8
 119 #define TEMP_STRIDE 8
 120 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 121
 122 #if ARCH_X86 && HAVE_INLINE_ASM
 123 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 124 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 125 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 126 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 127 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 128 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 129 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 130 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 131 #endif
 132
 133 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 134
 135
 136 static const struct PPFilter filters[]=
 137 {
 138     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 139     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 140 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 141     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 142     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 143     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 144     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 145     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 146     {"dr", "dering",                1, 5, 6, DERING},
 147     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 148     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 149     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 150     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 151     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 152     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 153     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 154     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 155     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 156     {"be", "bitexact",              1, 0, 0, BITEXACT},
 157     {"vi", "visualize",             1, 0, 0, VISUALIZE},
 158     {NULL, NULL,0,0,0,0} //End Marker
 159 };
 160
 161 static const char * const replaceTable[]=
 162 {
 163     "default",      "hb:a,vb:a,dr:a",
 164     "de",           "hb:a,vb:a,dr:a",
 165     "fast",         "h1:a,v1:a,dr:a",
 166     "fa",           "h1:a,v1:a,dr:a",
 167     "ac",           "ha:a:128:7,va:a,dr:a",
 168     NULL //End Marker
 169 };
 170
 171
 172 #if ARCH_X86 && HAVE_INLINE_ASM
 173 static inline void prefetchnta(const void *p)
 174 {
 175     __asm__ volatile(   "prefetchnta (%0)\n\t"
 176         : : "r" (p)
 177     );
 178 }
 179
 180 static inline void prefetcht0(const void *p)
 181 {
 182     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 183         : : "r" (p)
 184     );
 185 }
 186
 187 static inline void prefetcht1(const void *p)
 188 {
 189     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 190         : : "r" (p)
 191     );
 192 }
 193
 194 static inline void prefetcht2(const void *p)
 195 {
 196     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 197         : : "r" (p)
 198     );
 199 }
 200 #endif
 201
 202 /* The horizontal functions exist only in C because the MMX
 203  * code is faster with vertical filters and transposing. */
 204
 205 /**
 206  * Check if the given 8x8 Block is mostly "flat"
 207  */
 208 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
 209 {
 210     int numEq= 0;
 211     int y;
 212     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 213     const int dcThreshold= dcOffset*2 + 1;
 214
 215     for(y=0; y<BLOCK_SIZE; y++){
 216         numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
 217         numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
 218         numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
 219         numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
 220         numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
 221         numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
 222         numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
 223         src+= stride;
 224     }
 225     return numEq > c->ppMode.flatnessThreshold;
 226 }
 227
 228 /**
 229  * Check if the middle 8x8 Block in the given 8x16 block is flat
 230  */
 231 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
 232 {
 233     int numEq= 0;
 234     int y;
 235     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 236     const int dcThreshold= dcOffset*2 + 1;
 237
 238     src+= stride*4; // src points to begin of the 8x8 Block
 239     for(y=0; y<BLOCK_SIZE-1; y++){
 240         numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
 241         numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
 242         numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
 243         numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
 244         numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
 245         numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
 246         numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
 247         numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
 248         src+= stride;
 249     }
 250     return numEq > c->ppMode.flatnessThreshold;
 251 }
 252
 253 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
 254 {
 255     int i;
 256     for(i=0; i<2; i++){
 257         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 258         src += stride;
 259         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 260         src += stride;
 261         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 262         src += stride;
 263         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 264         src += stride;
 265     }
 266     return 1;
 267 }
 268
 269 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
 270 {
 271     int x;
 272     src+= stride*4;
 273     for(x=0; x<BLOCK_SIZE; x+=4){
 274         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 275         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 276         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 277         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 278     }
 279     return 1;
 280 }
 281
 282 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
 283 {
 284     if( isHorizDC_C(src, stride, c) ){
 285         return isHorizMinMaxOk_C(src, stride, c->QP);
 286     }else{
 287         return 2;
 288     }
 289 }
 290
 291 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
 292 {
 293     if( isVertDC_C(src, stride, c) ){
 294         return isVertMinMaxOk_C(src, stride, c->QP);
 295     }else{
 296         return 2;
 297     }
 298 }
 299
 300 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
 301 {
 302     int y;
 303     for(y=0; y<BLOCK_SIZE; y++){
 304         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 305
 306         if(FFABS(middleEnergy) < 8*c->QP){
 307             const int q=(dst[3] - dst[4])/2;
 308             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 309             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 310
 311             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 312             d= FFMAX(d, 0);
 313
 314             d= (5*d + 32) >> 6;
 315             d*= FFSIGN(-middleEnergy);
 316
 317             if(q>0)
 318             {
 319                 d = FFMAX(d, 0);
 320                 d = FFMIN(d, q);
 321             }
 322             else
 323             {
 324                 d = FFMIN(d, 0);
 325                 d = FFMAX(d, q);
 326             }
 327
 328             dst[3]-= d;
 329             dst[4]+= d;
 330         }
 331         dst+= stride;
 332     }
 333 }
 334
 335 /**
 336  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 337  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 338  */
 339 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
 340 {
 341     int y;
 342     for(y=0; y<BLOCK_SIZE; y++){
 343         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 344         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 345
 346         int sums[10];
 347         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 348         sums[1] = sums[0] - first  + dst[3];
 349         sums[2] = sums[1] - first  + dst[4];
 350         sums[3] = sums[2] - first  + dst[5];
 351         sums[4] = sums[3] - first  + dst[6];
 352         sums[5] = sums[4] - dst[0] + dst[7];
 353         sums[6] = sums[5] - dst[1] + last;
 354         sums[7] = sums[6] - dst[2] + last;
 355         sums[8] = sums[7] - dst[3] + last;
 356         sums[9] = sums[8] - dst[4] + last;
 357
 358         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 359         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 360         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 361         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 362         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 363         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 364         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 365         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 366
 367         dst+= stride;
 368     }
 369 }
 370
 371 /**
 372  * Experimental Filter 1 (Horizontal)
 373  * will not damage linear gradients
 374  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 375  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 376  * MMX2 version does correct clipping C version does not
 377  * not identical with the vertical one
 378  */
 379 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 380 {
 381     int y;
 382     static uint64_t lut[256];
 383     if(!lut[255])
 384     {
 385         int i;
 386         for(i=0; i<256; i++)
 387         {
 388             int v= i < 128 ? 2*i : 2*(i-256);
 389 /*
 390 //Simulate 112242211 9-Tap filter
 391             uint64_t a= (v/16)  & 0xFF;
 392             uint64_t b= (v/8)   & 0xFF;
 393             uint64_t c= (v/4)   & 0xFF;
 394             uint64_t d= (3*v/8) & 0xFF;
 395 */
 396 //Simulate piecewise linear interpolation
 397             uint64_t a= (v/16)   & 0xFF;
 398             uint64_t b= (v*3/16) & 0xFF;
 399             uint64_t c= (v*5/16) & 0xFF;
 400             uint64_t d= (7*v/16) & 0xFF;
 401             uint64_t A= (0x100 - a)&0xFF;
 402             uint64_t B= (0x100 - b)&0xFF;
 403             uint64_t C= (0x100 - c)&0xFF;
 404             uint64_t D= (0x100 - c)&0xFF;
 405
 406             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 407                        (D<<24) | (C<<16) | (B<<8)  | (A);
 408             //lut[i] = (v<<32) | (v<<24);
 409         }
 410     }
 411
 412     for(y=0; y<BLOCK_SIZE; y++){
 413         int a= src[1] - src[2];
 414         int b= src[3] - src[4];
 415         int c= src[5] - src[6];
 416
 417         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 418
 419         if(d < QP){
 420             int v = d * FFSIGN(-b);
 421
 422             src[1] +=v/8;
 423             src[2] +=v/4;
 424             src[3] +=3*v/8;
 425             src[4] -=3*v/8;
 426             src[5] -=v/4;
 427             src[6] -=v/8;
 428         }
 429         src+=stride;
 430     }
 431 }
 432
 433 /**
 434  * accurate deblock filter
 435  */
 436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
 437                                             int stride, const PPContext *c, int mode)
 438 {
 439     int y;
 440     const int QP= c->QP;
 441     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 442     const int dcThreshold= dcOffset*2 + 1;
 443 //START_TIMER
 444     src+= step*4; // src points to begin of the 8x8 Block
 445     for(y=0; y<8; y++){
 446         int numEq= 0;
 447
 448         numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
 449         numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
 450         numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
 451         numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
 452         numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
 453         numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
 454         numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
 455         numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
 456         numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
 457         if(numEq > c->ppMode.flatnessThreshold){
 458             int min, max, x;
 459
 460             if(src[0] > src[step]){
 461                 max= src[0];
 462                 min= src[step];
 463             }else{
 464                 max= src[step];
 465                 min= src[0];
 466             }
 467             for(x=2; x<8; x+=2){
 468                 if(src[x*step] > src[(x+1)*step]){
 469                         if(src[x    *step] > max) max= src[ x   *step];
 470                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 471                 }else{
 472                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 473                         if(src[ x   *step] < min) min= src[ x   *step];
 474                 }
 475             }
 476             if(max-min < 2*QP){
 477                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 478                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 479
 480                 int sums[10];
 481                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 482                 sums[1] = sums[0] - first       + src[3*step];
 483                 sums[2] = sums[1] - first       + src[4*step];
 484                 sums[3] = sums[2] - first       + src[5*step];
 485                 sums[4] = sums[3] - first       + src[6*step];
 486                 sums[5] = sums[4] - src[0*step] + src[7*step];
 487                 sums[6] = sums[5] - src[1*step] + last;
 488                 sums[7] = sums[6] - src[2*step] + last;
 489                 sums[8] = sums[7] - src[3*step] + last;
 490                 sums[9] = sums[8] - src[4*step] + last;
 491
 492                 if (mode & VISUALIZE) {
 493                     src[0*step] =
 494                     src[1*step] =
 495                     src[2*step] =
 496                     src[3*step] =
 497                     src[4*step] =
 498                     src[5*step] =
 499                     src[6*step] =
 500                     src[7*step] = 128;
 501                 }
 502                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 503                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 504                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 505                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 506                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 507                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 508                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 509                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 510             }
 511         }else{
 512             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 513
 514             if(FFABS(middleEnergy) < 8*QP){
 515                 const int q=(src[3*step] - src[4*step])/2;
 516                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 517                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 518
 519                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 520                 d= FFMAX(d, 0);
 521
 522                 d= (5*d + 32) >> 6;
 523                 d*= FFSIGN(-middleEnergy);
 524
 525                 if(q>0){
 526                     d = FFMAX(d, 0);
 527                     d = FFMIN(d, q);
 528                 }else{
 529                     d = FFMIN(d, 0);
 530                     d = FFMAX(d, q);
 531                 }
 532
 533                 if ((mode & VISUALIZE) && d) {
 534                     d= (d < 0) ? 32 : -32;
 535                     src[3*step]= av_clip_uint8(src[3*step] - d);
 536                     src[4*step]= av_clip_uint8(src[4*step] + d);
 537                     d = 0;
 538                 }
 539
 540                 src[3*step]-= d;
 541                 src[4*step]+= d;
 542             }
 543         }
 544
 545         src += stride;
 546     }
 547 /*if(step==16){
 548     STOP_TIMER("step16")
 549 }else{
 550     STOP_TIMER("stepX")
 551 }*/
 552 }
 553
 554 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 555 //Plain C versions
 556 //we always compile C for testing which needs bitexactness
 557 #define TEMPLATE_PP_C 1
 558 #include "postprocess_template.c"
 559
 560 #if HAVE_ALTIVEC
 561 #   define TEMPLATE_PP_ALTIVEC 1
 562 #   include "postprocess_altivec_template.c"
 563 #   include "postprocess_template.c"
 564 #endif
 565
 566 #if ARCH_X86 && HAVE_INLINE_ASM
 567 #    if CONFIG_RUNTIME_CPUDETECT
 568 #        define TEMPLATE_PP_MMX 1
 569 #        include "postprocess_template.c"
 570 #        define TEMPLATE_PP_MMXEXT 1
 571 #        include "postprocess_template.c"
 572 #        define TEMPLATE_PP_3DNOW 1
 573 #        include "postprocess_template.c"
 574 #        define TEMPLATE_PP_SSE2 1
 575 #        include "postprocess_template.c"
 576 #    else
 577 #        if HAVE_SSE2_INLINE
 578 #            define TEMPLATE_PP_SSE2 1
 579 #            include "postprocess_template.c"
 580 #        elif HAVE_MMXEXT_INLINE
 581 #            define TEMPLATE_PP_MMXEXT 1
 582 #            include "postprocess_template.c"
 583 #        elif HAVE_AMD3DNOW_INLINE
 584 #            define TEMPLATE_PP_3DNOW 1
 585 #            include "postprocess_template.c"
 586 #        elif HAVE_MMX_INLINE
 587 #            define TEMPLATE_PP_MMX 1
 588 #            include "postprocess_template.c"
 589 #        endif
 590 #    endif
 591 #endif
 592
 593 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 594                       const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
 595
 596 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 597         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 598 {
 599     pp_fn pp = postProcess_C;
 600     PPContext *c= (PPContext *)vc;
 601     PPMode *ppMode= (PPMode *)vm;
 602     c->ppMode= *ppMode; //FIXME
 603
 604     if (!(ppMode->lumMode & BITEXACT)) {
 605 #if CONFIG_RUNTIME_CPUDETECT
 606 #if ARCH_X86 && HAVE_INLINE_ASM
 607         // ordered per speed fastest first
 608         if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
 609         else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
 610         else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
 611         else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
 612 #elif HAVE_ALTIVEC
 613         if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
 614 #endif
 615 #else /* CONFIG_RUNTIME_CPUDETECT */
 616 #if     HAVE_SSE2_INLINE
 617         pp = postProcess_SSE2;
 618 #elif   HAVE_MMXEXT_INLINE
 619         pp = postProcess_MMX2;
 620 #elif HAVE_AMD3DNOW_INLINE
 621         pp = postProcess_3DNow;
 622 #elif HAVE_MMX_INLINE
 623         pp = postProcess_MMX;
 624 #elif HAVE_ALTIVEC
 625         pp = postProcess_altivec;
 626 #endif
 627 #endif /* !CONFIG_RUNTIME_CPUDETECT */
 628     }
 629
 630     pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 631 }
 632
 633 /* -pp Command line Help
 634 */
 635 const char pp_help[] =
 636 "Available postprocessing filters:\n"
 637 "Filters                        Options\n"
 638 "short  long name       short   long option     Description\n"
 639 "*      *               a       autoq           CPU power dependent enabler\n"
 640 "                       c       chrom           chrominance filtering enabled\n"
 641 "                       y       nochrom         chrominance filtering disabled\n"
 642 "                       n       noluma          luma filtering disabled\n"
 643 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 644 "       1. difference factor: default=32, higher -> more deblocking\n"
 645 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 646 "                       the h & v deblocking filters share these\n"
 647 "                       so you can't set different thresholds for h / v\n"
 648 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 649 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 650 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 651 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 652 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 653 "dr     dering                                  deringing filter\n"
 654 "al     autolevels                              automatic brightness / contrast\n"
 655 "                       f        fullyrange     stretch luminance to (0..255)\n"
 656 "lb     linblenddeint                           linear blend deinterlacer\n"
 657 "li     linipoldeint                            linear interpolating deinterlace\n"
 658 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 659 "md     mediandeint                             median deinterlacer\n"
 660 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 661 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 662 "de     default                                 hb:a,vb:a,dr:a\n"
 663 "fa     fast                                    h1:a,v1:a,dr:a\n"
 664 "ac                                             ha:a:128:7,va:a,dr:a\n"
 665 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 666 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 667 "fq     forceQuant      <quantizer>             force quantizer\n"
 668 "Usage:\n"
 669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 670 "long form example:\n"
 671 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 672 "short form example:\n"
 673 "vb:a/hb:a/lb                                   de,-vb\n"
 674 "more examples:\n"
 675 "tn:64:128:256\n"
 676 "\n"
 677 ;
 678
 679 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 680 {
 681     char temp[GET_MODE_BUFFER_SIZE];
 682     char *p= temp;
 683     static const char filterDelimiters[] = ",/";
 684     static const char optionDelimiters[] = ":|";
 685     struct PPMode *ppMode;
 686     char *filterToken;
 687
 688     if (!name)  {
 689         av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
 690         return NULL;
 691     }
 692
 693     if (!strcmp(name, "help")) {
 694         const char *p;
 695         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
 696             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
 697             av_log(NULL, AV_LOG_INFO, "%s", temp);
 698         }
 699         return NULL;
 700     }
 701
 702     ppMode= av_malloc(sizeof(PPMode));
 703
 704     ppMode->lumMode= 0;
 705     ppMode->chromMode= 0;
 706     ppMode->maxTmpNoise[0]= 700;
 707     ppMode->maxTmpNoise[1]= 1500;
 708     ppMode->maxTmpNoise[2]= 3000;
 709     ppMode->maxAllowedY= 234;
 710     ppMode->minAllowedY= 16;
 711     ppMode->baseDcDiff= 256/8;
 712     ppMode->flatnessThreshold= 56-16-1;
 713     ppMode->maxClippedThreshold= 0.01;
 714     ppMode->error=0;
 715
 716     memset(temp, 0, GET_MODE_BUFFER_SIZE);
 717     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
 718
 719     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 720
 721     for(;;){
 722         const char *filterName;
 723         int q= 1000000; //PP_QUALITY_MAX;
 724         int chrom=-1;
 725         int luma=-1;
 726         const char *option;
 727         const char *options[OPTIONS_ARRAY_SIZE];
 728         int i;
 729         int filterNameOk=0;
 730         int numOfUnknownOptions=0;
 731         int enable=1; //does the user want us to enabled or disabled the filter
 732         char *tokstate;
 733
 734         filterToken= av_strtok(p, filterDelimiters, &tokstate);
 735         if(!filterToken) break;
 736         p+= strlen(filterToken) + 1; // p points to next filterToken
 737         filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
 738         if (!filterName) {
 739             ppMode->error++;
 740             break;
 741         }
 742         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 743
 744         if(*filterName == '-'){
 745             enable=0;
 746             filterName++;
 747         }
 748
 749         for(;;){ //for all options
 750             option= av_strtok(NULL, optionDelimiters, &tokstate);
 751             if(!option) break;
 752
 753             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 754             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 755             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 756             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 757             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 758             else{
 759                 options[numOfUnknownOptions] = option;
 760                 numOfUnknownOptions++;
 761             }
 762             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 763         }
 764         options[numOfUnknownOptions] = NULL;
 765
 766         /* replace stuff from the replace Table */
 767         for(i=0; replaceTable[2*i]; i++){
 768             if(!strcmp(replaceTable[2*i], filterName)){
 769                 int newlen= strlen(replaceTable[2*i + 1]);
 770                 int plen;
 771                 int spaceLeft;
 772
 773                 p--, *p=',';
 774
 775                 plen= strlen(p);
 776                 spaceLeft= p - temp + plen;
 777                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
 778                     ppMode->error++;
 779                     break;
 780                 }
 781                 memmove(p + newlen, p, plen+1);
 782                 memcpy(p, replaceTable[2*i + 1], newlen);
 783                 filterNameOk=1;
 784             }
 785         }
 786
 787         for(i=0; filters[i].shortName; i++){
 788             if(   !strcmp(filters[i].longName, filterName)
 789                || !strcmp(filters[i].shortName, filterName)){
 790                 ppMode->lumMode &= ~filters[i].mask;
 791                 ppMode->chromMode &= ~filters[i].mask;
 792
 793                 filterNameOk=1;
 794                 if(!enable) break; // user wants to disable it
 795
 796                 if(q >= filters[i].minLumQuality && luma)
 797                     ppMode->lumMode|= filters[i].mask;
 798                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 799                     if(q >= filters[i].minChromQuality)
 800                             ppMode->chromMode|= filters[i].mask;
 801
 802                 if(filters[i].mask == LEVEL_FIX){
 803                     int o;
 804                     ppMode->minAllowedY= 16;
 805                     ppMode->maxAllowedY= 234;
 806                     for(o=0; options[o]; o++){
 807                         if(  !strcmp(options[o],"fullyrange")
 808                            ||!strcmp(options[o],"f")){
 809                             ppMode->minAllowedY= 0;
 810                             ppMode->maxAllowedY= 255;
 811                             numOfUnknownOptions--;
 812                         }
 813                     }
 814                 }
 815                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 816                 {
 817                     int o;
 818                     int numOfNoises=0;
 819
 820                     for(o=0; options[o]; o++){
 821                         char *tail;
 822                         ppMode->maxTmpNoise[numOfNoises]=
 823                             strtol(options[o], &tail, 0);
 824                         if(tail!=options[o]){
 825                             numOfNoises++;
 826                             numOfUnknownOptions--;
 827                             if(numOfNoises >= 3) break;
 828                         }
 829                     }
 830                 }
 831                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 832                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 833                     int o;
 834
 835                     for(o=0; options[o] && o<2; o++){
 836                         char *tail;
 837                         int val= strtol(options[o], &tail, 0);
 838                         if(tail==options[o]) break;
 839
 840                         numOfUnknownOptions--;
 841                         if(o==0) ppMode->baseDcDiff= val;
 842                         else ppMode->flatnessThreshold= val;
 843                     }
 844                 }
 845                 else if(filters[i].mask == FORCE_QUANT){
 846                     int o;
 847                     ppMode->forcedQuant= 15;
 848
 849                     for(o=0; options[o] && o<1; o++){
 850                         char *tail;
 851                         int val= strtol(options[o], &tail, 0);
 852                         if(tail==options[o]) break;
 853
 854                         numOfUnknownOptions--;
 855                         ppMode->forcedQuant= val;
 856                     }
 857                 }
 858             }
 859         }
 860         if(!filterNameOk) ppMode->error++;
 861         ppMode->error += numOfUnknownOptions;
 862     }
 863
 864     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 865     if(ppMode->error){
 866         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 867         av_free(ppMode);
 868         return NULL;
 869     }
 870     return ppMode;
 871 }
 872
 873 void pp_free_mode(pp_mode *mode){
 874     av_free(mode);
 875 }
 876
 877 static void reallocAlign(void **p, int size){
 878     av_free(*p);
 879     *p= av_mallocz(size);
 880 }
 881
 882 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 883     int mbWidth = (width+15)>>4;
 884     int mbHeight= (height+15)>>4;
 885     int i;
 886
 887     c->stride= stride;
 888     c->qpStride= qpStride;
 889
 890     reallocAlign((void **)&c->tempDst, stride*24+32);
 891     reallocAlign((void **)&c->tempSrc, stride*24);
 892     reallocAlign((void **)&c->tempBlocks, 2*16*8);
 893     reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
 894     for(i=0; i<256; i++)
 895             c->yHistogram[i]= width*height/64*15/256;
 896
 897     for(i=0; i<3; i++){
 898         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
 899         reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
 900         reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 901     }
 902
 903     reallocAlign((void **)&c->deintTemp, 2*width+32);
 904     reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
 905     reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
 906     reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
 907 }
 908
 909 static const char * context_to_name(void * ptr) {
 910     return "postproc";
 911 }
 912
 913 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 914
 915 pp_context *pp_get_context(int width, int height, int cpuCaps){
 916     PPContext *c= av_malloc(sizeof(PPContext));
 917     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
 918     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 919
 920     memset(c, 0, sizeof(PPContext));
 921     c->av_class = &av_codec_context_class;
 922     if(cpuCaps&PP_FORMAT){
 923         c->hChromaSubSample= cpuCaps&0x3;
 924         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 925     }else{
 926         c->hChromaSubSample= 1;
 927         c->vChromaSubSample= 1;
 928     }
 929     if (cpuCaps & PP_CPU_CAPS_AUTO) {
 930         c->cpuCaps = av_get_cpu_flags();
 931     } else {
 932         c->cpuCaps = 0;
 933         if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
 934         if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
 935         if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
 936         if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
 937     }
 938
 939     reallocBuffers(c, width, height, stride, qpStride);
 940
 941     c->frameNum=-1;
 942
 943     return c;
 944 }
 945
 946 void pp_free_context(void *vc){
 947     PPContext *c = (PPContext*)vc;
 948     int i;
 949
 950     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
 951         av_free(c->tempBlurred[i]);
 952     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
 953         av_free(c->tempBlurredPast[i]);
 954
 955     av_free(c->tempBlocks);
 956     av_free(c->yHistogram);
 957     av_free(c->tempDst);
 958     av_free(c->tempSrc);
 959     av_free(c->deintTemp);
 960     av_free(c->stdQPTable);
 961     av_free(c->nonBQPTable);
 962     av_free(c->forcedQPTable);
 963
 964     memset(c, 0, sizeof(PPContext));
 965
 966     av_free(c);
 967 }
 968
 969 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 970                      uint8_t * dst[3], const int dstStride[3],
 971                      int width, int height,
 972                      const QP_STORE_T *QP_store,  int QPStride,
 973                      pp_mode *vm,  void *vc, int pict_type)
 974 {
 975     int mbWidth = (width+15)>>4;
 976     int mbHeight= (height+15)>>4;
 977     PPMode *mode = vm;
 978     PPContext *c = vc;
 979     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
 980     int absQPStride = FFABS(QPStride);
 981
 982     // c->stride and c->QPStride are always positive
 983     if(c->stride < minStride || c->qpStride < absQPStride)
 984         reallocBuffers(c, width, height,
 985                        FFMAX(minStride, c->stride),
 986                        FFMAX(c->qpStride, absQPStride));
 987
 988     if(!QP_store || (mode->lumMode & FORCE_QUANT)){
 989         int i;
 990         QP_store= c->forcedQPTable;
 991         absQPStride = QPStride = 0;
 992         if(mode->lumMode & FORCE_QUANT)
 993             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
 994         else
 995             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
 996     }
 997
 998     if(pict_type & PP_PICT_TYPE_QP2){
 999         int i;
1000         const int count= FFMAX(mbHeight * absQPStride, mbWidth);
1001         for(i=0; i<(count>>2); i++){
1002             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1003         }
1004         for(i<<=2; i<count; i++){
1005             c->stdQPTable[i] = QP_store[i]>>1;
1006         }
1007         QP_store= c->stdQPTable;
1008         QPStride= absQPStride;
1009     }
1010
1011     if(0){
1012         int x,y;
1013         for(y=0; y<mbHeight; y++){
1014             for(x=0; x<mbWidth; x++){
1015                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1016             }
1017             av_log(c, AV_LOG_INFO, "\n");
1018         }
1019         av_log(c, AV_LOG_INFO, "\n");
1020     }
1021
1022     if((pict_type&7)!=3){
1023         if (QPStride >= 0){
1024             int i;
1025             const int count= FFMAX(mbHeight * QPStride, mbWidth);
1026             for(i=0; i<(count>>2); i++){
1027                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1028             }
1029             for(i<<=2; i<count; i++){
1030                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1031             }
1032         } else {
1033             int i,j;
1034             for(i=0; i<mbHeight; i++) {
1035                 for(j=0; j<absQPStride; j++) {
1036                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1037                 }
1038             }
1039         }
1040     }
1041
1042     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1043            mode->lumMode, mode->chromMode);
1044
1045     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1046                 width, height, QP_store, QPStride, 0, mode, c);
1047
1048     if (!(src[1] && src[2] && dst[1] && dst[2]))
1049         return;
1050
1051     width  = (width )>>c->hChromaSubSample;
1052     height = (height)>>c->vChromaSubSample;
1053
1054     if(mode->chromMode){
1055         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1056                     width, height, QP_store, QPStride, 1, mode, c);
1057         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1058                     width, height, QP_store, QPStride, 2, mode, c);
1059     }
1060     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1061         linecpy(dst[1], src[1], height, srcStride[1]);
1062         linecpy(dst[2], src[2], height, srcStride[2]);
1063     }else{
1064         int y;
1065         for(y=0; y<height; y++){
1066             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1067             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1068         }
1069     }
1070 }