git.sesse.net Git - ffmpeg/blob - libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use git log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include "libavutil/avassert.h"
  79 #include <inttypes.h>
  80 #include <stdio.h>
  81 #include <stdlib.h>
  82 #include <string.h>
  83 //#undef HAVE_MMXEXT_INLINE
  84 //#define HAVE_AMD3DNOW_INLINE
  85 //#undef HAVE_MMX_INLINE
  86 //#undef ARCH_X86
  87 //#define DEBUG_BRIGHTNESS
  88 #include "postprocess.h"
  89 #include "postprocess_internal.h"
  90 #include "libavutil/avstring.h"
  91
  92 unsigned postproc_version(void)
  93 {
  94     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
  95     return LIBPOSTPROC_VERSION_INT;
  96 }
  97
  98 const char *postproc_configuration(void)
  99 {
 100     return FFMPEG_CONFIGURATION;
 101 }
 102
 103 const char *postproc_license(void)
 104 {
 105 #define LICENSE_PREFIX "libpostproc license: "
 106     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 107 }
 108
 109 #if HAVE_ALTIVEC_H
 110 #include <altivec.h>
 111 #endif
 112
 113 #define GET_MODE_BUFFER_SIZE 500
 114 #define OPTIONS_ARRAY_SIZE 10
 115 #define BLOCK_SIZE 8
 116 #define TEMP_STRIDE 8
 117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 118
 119 #if ARCH_X86 && HAVE_INLINE_ASM
 120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 128 #endif
 129
 130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 131
 132
 133 static const struct PPFilter filters[]=
 134 {
 135     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 136     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 137 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 138     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 139     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 140     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 141     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 142     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 143     {"dr", "dering",                1, 5, 6, DERING},
 144     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 145     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 146     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 147     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 148     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 149     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 150     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 151     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 152     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 153     {"be", "bitexact",              1, 0, 0, BITEXACT},
 154     {"vi", "visualize",             1, 0, 0, VISUALIZE},
 155     {NULL, NULL,0,0,0,0} //End Marker
 156 };
 157
 158 static const char * const replaceTable[]=
 159 {
 160     "default",      "hb:a,vb:a,dr:a",
 161     "de",           "hb:a,vb:a,dr:a",
 162     "fast",         "h1:a,v1:a,dr:a",
 163     "fa",           "h1:a,v1:a,dr:a",
 164     "ac",           "ha:a:128:7,va:a,dr:a",
 165     NULL //End Marker
 166 };
 167
 168
 169 #if ARCH_X86 && HAVE_INLINE_ASM
 170 static inline void prefetchnta(const void *p)
 171 {
 172     __asm__ volatile(   "prefetchnta (%0)\n\t"
 173         : : "r" (p)
 174     );
 175 }
 176
 177 static inline void prefetcht0(const void *p)
 178 {
 179     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 180         : : "r" (p)
 181     );
 182 }
 183
 184 static inline void prefetcht1(const void *p)
 185 {
 186     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 187         : : "r" (p)
 188     );
 189 }
 190
 191 static inline void prefetcht2(const void *p)
 192 {
 193     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 194         : : "r" (p)
 195     );
 196 }
 197 #endif
 198
 199 /* The horizontal functions exist only in C because the MMX
 200  * code is faster with vertical filters and transposing. */
 201
 202 /**
 203  * Check if the given 8x8 Block is mostly "flat"
 204  */
 205 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
 206 {
 207     int numEq= 0;
 208     int y;
 209     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 210     const int dcThreshold= dcOffset*2 + 1;
 211
 212     for(y=0; y<BLOCK_SIZE; y++){
 213         numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
 214         numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
 215         numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
 216         numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
 217         numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
 218         numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
 219         numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
 220         src+= stride;
 221     }
 222     return numEq > c->ppMode.flatnessThreshold;
 223 }
 224
 225 /**
 226  * Check if the middle 8x8 Block in the given 8x16 block is flat
 227  */
 228 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
 229 {
 230     int numEq= 0;
 231     int y;
 232     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 233     const int dcThreshold= dcOffset*2 + 1;
 234
 235     src+= stride*4; // src points to begin of the 8x8 Block
 236     for(y=0; y<BLOCK_SIZE-1; y++){
 237         numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
 238         numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
 239         numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
 240         numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
 241         numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
 242         numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
 243         numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
 244         numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
 245         src+= stride;
 246     }
 247     return numEq > c->ppMode.flatnessThreshold;
 248 }
 249
 250 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
 251 {
 252     int i;
 253     for(i=0; i<2; i++){
 254         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 255         src += stride;
 256         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 257         src += stride;
 258         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 259         src += stride;
 260         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 261         src += stride;
 262     }
 263     return 1;
 264 }
 265
 266 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
 267 {
 268     int x;
 269     src+= stride*4;
 270     for(x=0; x<BLOCK_SIZE; x+=4){
 271         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 272         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 273         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 274         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 275     }
 276     return 1;
 277 }
 278
 279 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
 280 {
 281     if( isHorizDC_C(src, stride, c) ){
 282         return isHorizMinMaxOk_C(src, stride, c->QP);
 283     }else{
 284         return 2;
 285     }
 286 }
 287
 288 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
 289 {
 290     if( isVertDC_C(src, stride, c) ){
 291         return isVertMinMaxOk_C(src, stride, c->QP);
 292     }else{
 293         return 2;
 294     }
 295 }
 296
 297 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
 298 {
 299     int y;
 300     for(y=0; y<BLOCK_SIZE; y++){
 301         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 302
 303         if(FFABS(middleEnergy) < 8*c->QP){
 304             const int q=(dst[3] - dst[4])/2;
 305             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 306             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 307
 308             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 309             d= FFMAX(d, 0);
 310
 311             d= (5*d + 32) >> 6;
 312             d*= FFSIGN(-middleEnergy);
 313
 314             if(q>0)
 315             {
 316                 d = FFMAX(d, 0);
 317                 d = FFMIN(d, q);
 318             }
 319             else
 320             {
 321                 d = FFMIN(d, 0);
 322                 d = FFMAX(d, q);
 323             }
 324
 325             dst[3]-= d;
 326             dst[4]+= d;
 327         }
 328         dst+= stride;
 329     }
 330 }
 331
 332 /**
 333  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 334  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 335  */
 336 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
 337 {
 338     int y;
 339     for(y=0; y<BLOCK_SIZE; y++){
 340         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 341         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 342
 343         int sums[10];
 344         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 345         sums[1] = sums[0] - first  + dst[3];
 346         sums[2] = sums[1] - first  + dst[4];
 347         sums[3] = sums[2] - first  + dst[5];
 348         sums[4] = sums[3] - first  + dst[6];
 349         sums[5] = sums[4] - dst[0] + dst[7];
 350         sums[6] = sums[5] - dst[1] + last;
 351         sums[7] = sums[6] - dst[2] + last;
 352         sums[8] = sums[7] - dst[3] + last;
 353         sums[9] = sums[8] - dst[4] + last;
 354
 355         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 356         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 357         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 358         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 359         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 360         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 361         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 362         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 363
 364         dst+= stride;
 365     }
 366 }
 367
 368 /**
 369  * Experimental Filter 1 (Horizontal)
 370  * will not damage linear gradients
 371  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 372  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 373  * MMX2 version does correct clipping C version does not
 374  * not identical with the vertical one
 375  */
 376 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 377 {
 378     int y;
 379     static uint64_t lut[256];
 380     if(!lut[255])
 381     {
 382         int i;
 383         for(i=0; i<256; i++)
 384         {
 385             int v= i < 128 ? 2*i : 2*(i-256);
 386 /*
 387 //Simulate 112242211 9-Tap filter
 388             uint64_t a= (v/16)  & 0xFF;
 389             uint64_t b= (v/8)   & 0xFF;
 390             uint64_t c= (v/4)   & 0xFF;
 391             uint64_t d= (3*v/8) & 0xFF;
 392 */
 393 //Simulate piecewise linear interpolation
 394             uint64_t a= (v/16)   & 0xFF;
 395             uint64_t b= (v*3/16) & 0xFF;
 396             uint64_t c= (v*5/16) & 0xFF;
 397             uint64_t d= (7*v/16) & 0xFF;
 398             uint64_t A= (0x100 - a)&0xFF;
 399             uint64_t B= (0x100 - b)&0xFF;
 400             uint64_t C= (0x100 - c)&0xFF;
 401             uint64_t D= (0x100 - c)&0xFF;
 402
 403             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 404                        (D<<24) | (C<<16) | (B<<8)  | (A);
 405             //lut[i] = (v<<32) | (v<<24);
 406         }
 407     }
 408
 409     for(y=0; y<BLOCK_SIZE; y++){
 410         int a= src[1] - src[2];
 411         int b= src[3] - src[4];
 412         int c= src[5] - src[6];
 413
 414         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 415
 416         if(d < QP){
 417             int v = d * FFSIGN(-b);
 418
 419             src[1] +=v/8;
 420             src[2] +=v/4;
 421             src[3] +=3*v/8;
 422             src[4] -=3*v/8;
 423             src[5] -=v/4;
 424             src[6] -=v/8;
 425         }
 426         src+=stride;
 427     }
 428 }
 429
 430 /**
 431  * accurate deblock filter
 432  */
 433 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
 434                                             int stride, const PPContext *c, int mode)
 435 {
 436     int y;
 437     const int QP= c->QP;
 438     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 439     const int dcThreshold= dcOffset*2 + 1;
 440 //START_TIMER
 441     src+= step*4; // src points to begin of the 8x8 Block
 442     for(y=0; y<8; y++){
 443         int numEq= 0;
 444
 445         numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
 446         numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
 447         numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
 448         numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
 449         numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
 450         numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
 451         numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
 452         numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
 453         numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
 454         if(numEq > c->ppMode.flatnessThreshold){
 455             int min, max, x;
 456
 457             if(src[0] > src[step]){
 458                 max= src[0];
 459                 min= src[step];
 460             }else{
 461                 max= src[step];
 462                 min= src[0];
 463             }
 464             for(x=2; x<8; x+=2){
 465                 if(src[x*step] > src[(x+1)*step]){
 466                         if(src[x    *step] > max) max= src[ x   *step];
 467                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 468                 }else{
 469                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 470                         if(src[ x   *step] < min) min= src[ x   *step];
 471                 }
 472             }
 473             if(max-min < 2*QP){
 474                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 475                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 476
 477                 int sums[10];
 478                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 479                 sums[1] = sums[0] - first       + src[3*step];
 480                 sums[2] = sums[1] - first       + src[4*step];
 481                 sums[3] = sums[2] - first       + src[5*step];
 482                 sums[4] = sums[3] - first       + src[6*step];
 483                 sums[5] = sums[4] - src[0*step] + src[7*step];
 484                 sums[6] = sums[5] - src[1*step] + last;
 485                 sums[7] = sums[6] - src[2*step] + last;
 486                 sums[8] = sums[7] - src[3*step] + last;
 487                 sums[9] = sums[8] - src[4*step] + last;
 488
 489                 if (mode & VISUALIZE) {
 490                     src[0*step] =
 491                     src[1*step] =
 492                     src[2*step] =
 493                     src[3*step] =
 494                     src[4*step] =
 495                     src[5*step] =
 496                     src[6*step] =
 497                     src[7*step] = 128;
 498                 }
 499                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 500                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 501                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 502                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 503                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 504                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 505                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 506                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 507             }
 508         }else{
 509             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 510
 511             if(FFABS(middleEnergy) < 8*QP){
 512                 const int q=(src[3*step] - src[4*step])/2;
 513                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 514                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 515
 516                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 517                 d= FFMAX(d, 0);
 518
 519                 d= (5*d + 32) >> 6;
 520                 d*= FFSIGN(-middleEnergy);
 521
 522                 if(q>0){
 523                     d = FFMAX(d, 0);
 524                     d = FFMIN(d, q);
 525                 }else{
 526                     d = FFMIN(d, 0);
 527                     d = FFMAX(d, q);
 528                 }
 529
 530                 if ((mode & VISUALIZE) && d) {
 531                     d= (d < 0) ? 32 : -32;
 532                     src[3*step]= av_clip_uint8(src[3*step] - d);
 533                     src[4*step]= av_clip_uint8(src[4*step] + d);
 534                     d = 0;
 535                 }
 536
 537                 src[3*step]-= d;
 538                 src[4*step]+= d;
 539             }
 540         }
 541
 542         src += stride;
 543     }
 544 /*if(step==16){
 545     STOP_TIMER("step16")
 546 }else{
 547     STOP_TIMER("stepX")
 548 }*/
 549 }
 550
 551 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 552 //Plain C versions
 553 //we always compile C for testing which needs bitexactness
 554 #define TEMPLATE_PP_C 1
 555 #include "postprocess_template.c"
 556
 557 #if HAVE_ALTIVEC
 558 #   define TEMPLATE_PP_ALTIVEC 1
 559 #   include "postprocess_altivec_template.c"
 560 #   include "postprocess_template.c"
 561 #endif
 562
 563 #if ARCH_X86 && HAVE_INLINE_ASM
 564 #    if CONFIG_RUNTIME_CPUDETECT
 565 #        define TEMPLATE_PP_MMX 1
 566 #        include "postprocess_template.c"
 567 #        define TEMPLATE_PP_MMXEXT 1
 568 #        include "postprocess_template.c"
 569 #        define TEMPLATE_PP_3DNOW 1
 570 #        include "postprocess_template.c"
 571 #        define TEMPLATE_PP_SSE2 1
 572 #        include "postprocess_template.c"
 573 #    else
 574 #        if HAVE_SSE2_INLINE
 575 #            define TEMPLATE_PP_SSE2 1
 576 #            include "postprocess_template.c"
 577 #        elif HAVE_MMXEXT_INLINE
 578 #            define TEMPLATE_PP_MMXEXT 1
 579 #            include "postprocess_template.c"
 580 #        elif HAVE_AMD3DNOW_INLINE
 581 #            define TEMPLATE_PP_3DNOW 1
 582 #            include "postprocess_template.c"
 583 #        elif HAVE_MMX_INLINE
 584 #            define TEMPLATE_PP_MMX 1
 585 #            include "postprocess_template.c"
 586 #        endif
 587 #    endif
 588 #endif
 589
 590 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 591                       const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
 592
 593 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 594         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 595 {
 596     pp_fn pp = postProcess_C;
 597     PPContext *c= (PPContext *)vc;
 598     PPMode *ppMode= (PPMode *)vm;
 599     c->ppMode= *ppMode; //FIXME
 600
 601     if (!(ppMode->lumMode & BITEXACT)) {
 602 #if CONFIG_RUNTIME_CPUDETECT
 603 #if ARCH_X86 && HAVE_INLINE_ASM
 604         // ordered per speed fastest first
 605         if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
 606         else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
 607         else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
 608         else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
 609 #elif HAVE_ALTIVEC
 610         if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
 611 #endif
 612 #else /* CONFIG_RUNTIME_CPUDETECT */
 613 #if     HAVE_SSE2_INLINE
 614         pp = postProcess_SSE2;
 615 #elif   HAVE_MMXEXT_INLINE
 616         pp = postProcess_MMX2;
 617 #elif HAVE_AMD3DNOW_INLINE
 618         pp = postProcess_3DNow;
 619 #elif HAVE_MMX_INLINE
 620         pp = postProcess_MMX;
 621 #elif HAVE_ALTIVEC
 622         pp = postProcess_altivec;
 623 #endif
 624 #endif /* !CONFIG_RUNTIME_CPUDETECT */
 625     }
 626
 627     pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 628 }
 629
 630 /* -pp Command line Help
 631 */
 632 const char pp_help[] =
 633 "Available postprocessing filters:\n"
 634 "Filters                        Options\n"
 635 "short  long name       short   long option     Description\n"
 636 "*      *               a       autoq           CPU power dependent enabler\n"
 637 "                       c       chrom           chrominance filtering enabled\n"
 638 "                       y       nochrom         chrominance filtering disabled\n"
 639 "                       n       noluma          luma filtering disabled\n"
 640 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 641 "       1. difference factor: default=32, higher -> more deblocking\n"
 642 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 643 "                       the h & v deblocking filters share these\n"
 644 "                       so you can't set different thresholds for h / v\n"
 645 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 646 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 647 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 648 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 649 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 650 "dr     dering                                  deringing filter\n"
 651 "al     autolevels                              automatic brightness / contrast\n"
 652 "                       f        fullyrange     stretch luminance to (0..255)\n"
 653 "lb     linblenddeint                           linear blend deinterlacer\n"
 654 "li     linipoldeint                            linear interpolating deinterlace\n"
 655 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 656 "md     mediandeint                             median deinterlacer\n"
 657 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 658 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 659 "de     default                                 hb:a,vb:a,dr:a\n"
 660 "fa     fast                                    h1:a,v1:a,dr:a\n"
 661 "ac                                             ha:a:128:7,va:a,dr:a\n"
 662 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 663 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 664 "fq     forceQuant      <quantizer>             force quantizer\n"
 665 "Usage:\n"
 666 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 667 "long form example:\n"
 668 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 669 "short form example:\n"
 670 "vb:a/hb:a/lb                                   de,-vb\n"
 671 "more examples:\n"
 672 "tn:64:128:256\n"
 673 "\n"
 674 ;
 675
 676 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 677 {
 678     char temp[GET_MODE_BUFFER_SIZE];
 679     char *p= temp;
 680     static const char filterDelimiters[] = ",/";
 681     static const char optionDelimiters[] = ":|";
 682     struct PPMode *ppMode;
 683     char *filterToken;
 684
 685     if (!name)  {
 686         av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
 687         return NULL;
 688     }
 689
 690     if (!strcmp(name, "help")) {
 691         const char *p;
 692         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
 693             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
 694             av_log(NULL, AV_LOG_INFO, "%s", temp);
 695         }
 696         return NULL;
 697     }
 698
 699     ppMode= av_malloc(sizeof(PPMode));
 700
 701     ppMode->lumMode= 0;
 702     ppMode->chromMode= 0;
 703     ppMode->maxTmpNoise[0]= 700;
 704     ppMode->maxTmpNoise[1]= 1500;
 705     ppMode->maxTmpNoise[2]= 3000;
 706     ppMode->maxAllowedY= 234;
 707     ppMode->minAllowedY= 16;
 708     ppMode->baseDcDiff= 256/8;
 709     ppMode->flatnessThreshold= 56-16-1;
 710     ppMode->maxClippedThreshold= 0.01;
 711     ppMode->error=0;
 712
 713     memset(temp, 0, GET_MODE_BUFFER_SIZE);
 714     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
 715
 716     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 717
 718     for(;;){
 719         const char *filterName;
 720         int q= 1000000; //PP_QUALITY_MAX;
 721         int chrom=-1;
 722         int luma=-1;
 723         const char *option;
 724         const char *options[OPTIONS_ARRAY_SIZE];
 725         int i;
 726         int filterNameOk=0;
 727         int numOfUnknownOptions=0;
 728         int enable=1; //does the user want us to enabled or disabled the filter
 729         char *tokstate;
 730
 731         filterToken= av_strtok(p, filterDelimiters, &tokstate);
 732         if(!filterToken) break;
 733         p+= strlen(filterToken) + 1; // p points to next filterToken
 734         filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
 735         if (!filterName) {
 736             ppMode->error++;
 737             break;
 738         }
 739         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 740
 741         if(*filterName == '-'){
 742             enable=0;
 743             filterName++;
 744         }
 745
 746         for(;;){ //for all options
 747             option= av_strtok(NULL, optionDelimiters, &tokstate);
 748             if(!option) break;
 749
 750             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 751             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 752             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 753             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 754             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 755             else{
 756                 options[numOfUnknownOptions] = option;
 757                 numOfUnknownOptions++;
 758             }
 759             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 760         }
 761         options[numOfUnknownOptions] = NULL;
 762
 763         /* replace stuff from the replace Table */
 764         for(i=0; replaceTable[2*i]; i++){
 765             if(!strcmp(replaceTable[2*i], filterName)){
 766                 int newlen= strlen(replaceTable[2*i + 1]);
 767                 int plen;
 768                 int spaceLeft;
 769
 770                 p--, *p=',';
 771
 772                 plen= strlen(p);
 773                 spaceLeft= p - temp + plen;
 774                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
 775                     ppMode->error++;
 776                     break;
 777                 }
 778                 memmove(p + newlen, p, plen+1);
 779                 memcpy(p, replaceTable[2*i + 1], newlen);
 780                 filterNameOk=1;
 781             }
 782         }
 783
 784         for(i=0; filters[i].shortName; i++){
 785             if(   !strcmp(filters[i].longName, filterName)
 786                || !strcmp(filters[i].shortName, filterName)){
 787                 ppMode->lumMode &= ~filters[i].mask;
 788                 ppMode->chromMode &= ~filters[i].mask;
 789
 790                 filterNameOk=1;
 791                 if(!enable) break; // user wants to disable it
 792
 793                 if(q >= filters[i].minLumQuality && luma)
 794                     ppMode->lumMode|= filters[i].mask;
 795                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 796                     if(q >= filters[i].minChromQuality)
 797                             ppMode->chromMode|= filters[i].mask;
 798
 799                 if(filters[i].mask == LEVEL_FIX){
 800                     int o;
 801                     ppMode->minAllowedY= 16;
 802                     ppMode->maxAllowedY= 234;
 803                     for(o=0; options[o]; o++){
 804                         if(  !strcmp(options[o],"fullyrange")
 805                            ||!strcmp(options[o],"f")){
 806                             ppMode->minAllowedY= 0;
 807                             ppMode->maxAllowedY= 255;
 808                             numOfUnknownOptions--;
 809                         }
 810                     }
 811                 }
 812                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 813                 {
 814                     int o;
 815                     int numOfNoises=0;
 816
 817                     for(o=0; options[o]; o++){
 818                         char *tail;
 819                         ppMode->maxTmpNoise[numOfNoises]=
 820                             strtol(options[o], &tail, 0);
 821                         if(tail!=options[o]){
 822                             numOfNoises++;
 823                             numOfUnknownOptions--;
 824                             if(numOfNoises >= 3) break;
 825                         }
 826                     }
 827                 }
 828                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 829                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 830                     int o;
 831
 832                     for(o=0; options[o] && o<2; o++){
 833                         char *tail;
 834                         int val= strtol(options[o], &tail, 0);
 835                         if(tail==options[o]) break;
 836
 837                         numOfUnknownOptions--;
 838                         if(o==0) ppMode->baseDcDiff= val;
 839                         else ppMode->flatnessThreshold= val;
 840                     }
 841                 }
 842                 else if(filters[i].mask == FORCE_QUANT){
 843                     int o;
 844                     ppMode->forcedQuant= 15;
 845
 846                     for(o=0; options[o] && o<1; o++){
 847                         char *tail;
 848                         int val= strtol(options[o], &tail, 0);
 849                         if(tail==options[o]) break;
 850
 851                         numOfUnknownOptions--;
 852                         ppMode->forcedQuant= val;
 853                     }
 854                 }
 855             }
 856         }
 857         if(!filterNameOk) ppMode->error++;
 858         ppMode->error += numOfUnknownOptions;
 859     }
 860
 861     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 862     if(ppMode->error){
 863         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 864         av_free(ppMode);
 865         return NULL;
 866     }
 867     return ppMode;
 868 }
 869
 870 void pp_free_mode(pp_mode *mode){
 871     av_free(mode);
 872 }
 873
 874 static void reallocAlign(void **p, int size){
 875     av_free(*p);
 876     *p= av_mallocz(size);
 877 }
 878
 879 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 880     int mbWidth = (width+15)>>4;
 881     int mbHeight= (height+15)>>4;
 882     int i;
 883
 884     c->stride= stride;
 885     c->qpStride= qpStride;
 886
 887     reallocAlign((void **)&c->tempDst, stride*24+32);
 888     reallocAlign((void **)&c->tempSrc, stride*24);
 889     reallocAlign((void **)&c->tempBlocks, 2*16*8);
 890     reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
 891     for(i=0; i<256; i++)
 892             c->yHistogram[i]= width*height/64*15/256;
 893
 894     for(i=0; i<3; i++){
 895         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
 896         reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
 897         reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 898     }
 899
 900     reallocAlign((void **)&c->deintTemp, 2*width+32);
 901     reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
 902     reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
 903     reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
 904 }
 905
 906 static const char * context_to_name(void * ptr) {
 907     return "postproc";
 908 }
 909
 910 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 911
 912 pp_context *pp_get_context(int width, int height, int cpuCaps){
 913     PPContext *c= av_malloc(sizeof(PPContext));
 914     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
 915     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 916
 917     memset(c, 0, sizeof(PPContext));
 918     c->av_class = &av_codec_context_class;
 919     if(cpuCaps&PP_FORMAT){
 920         c->hChromaSubSample= cpuCaps&0x3;
 921         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 922     }else{
 923         c->hChromaSubSample= 1;
 924         c->vChromaSubSample= 1;
 925     }
 926     if (cpuCaps & PP_CPU_CAPS_AUTO) {
 927         c->cpuCaps = av_get_cpu_flags();
 928     } else {
 929         c->cpuCaps = 0;
 930         if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
 931         if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
 932         if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
 933         if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
 934     }
 935
 936     reallocBuffers(c, width, height, stride, qpStride);
 937
 938     c->frameNum=-1;
 939
 940     return c;
 941 }
 942
 943 void pp_free_context(void *vc){
 944     PPContext *c = (PPContext*)vc;
 945     int i;
 946
 947     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
 948         av_free(c->tempBlurred[i]);
 949     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
 950         av_free(c->tempBlurredPast[i]);
 951
 952     av_free(c->tempBlocks);
 953     av_free(c->yHistogram);
 954     av_free(c->tempDst);
 955     av_free(c->tempSrc);
 956     av_free(c->deintTemp);
 957     av_free(c->stdQPTable);
 958     av_free(c->nonBQPTable);
 959     av_free(c->forcedQPTable);
 960
 961     memset(c, 0, sizeof(PPContext));
 962
 963     av_free(c);
 964 }
 965
 966 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 967                      uint8_t * dst[3], const int dstStride[3],
 968                      int width, int height,
 969                      const QP_STORE_T *QP_store,  int QPStride,
 970                      pp_mode *vm,  void *vc, int pict_type)
 971 {
 972     int mbWidth = (width+15)>>4;
 973     int mbHeight= (height+15)>>4;
 974     PPMode *mode = vm;
 975     PPContext *c = vc;
 976     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
 977     int absQPStride = FFABS(QPStride);
 978
 979     // c->stride and c->QPStride are always positive
 980     if(c->stride < minStride || c->qpStride < absQPStride)
 981         reallocBuffers(c, width, height,
 982                        FFMAX(minStride, c->stride),
 983                        FFMAX(c->qpStride, absQPStride));
 984
 985     if(!QP_store || (mode->lumMode & FORCE_QUANT)){
 986         int i;
 987         QP_store= c->forcedQPTable;
 988         absQPStride = QPStride = 0;
 989         if(mode->lumMode & FORCE_QUANT)
 990             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
 991         else
 992             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
 993     }
 994
 995     if(pict_type & PP_PICT_TYPE_QP2){
 996         int i;
 997         const int count= mbHeight * absQPStride;
 998         for(i=0; i<(count>>2); i++){
 999             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1000         }
1001         for(i<<=2; i<count; i++){
1002             c->stdQPTable[i] = QP_store[i]>>1;
1003         }
1004         QP_store= c->stdQPTable;
1005         QPStride= absQPStride;
1006     }
1007
1008     if(0){
1009         int x,y;
1010         for(y=0; y<mbHeight; y++){
1011             for(x=0; x<mbWidth; x++){
1012                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1013             }
1014             av_log(c, AV_LOG_INFO, "\n");
1015         }
1016         av_log(c, AV_LOG_INFO, "\n");
1017     }
1018
1019     if((pict_type&7)!=3){
1020         if (QPStride >= 0){
1021             int i;
1022             const int count= mbHeight * QPStride;
1023             for(i=0; i<(count>>2); i++){
1024                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1025             }
1026             for(i<<=2; i<count; i++){
1027                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1028             }
1029         } else {
1030             int i,j;
1031             for(i=0; i<mbHeight; i++) {
1032                 for(j=0; j<absQPStride; j++) {
1033                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1034                 }
1035             }
1036         }
1037     }
1038
1039     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1040            mode->lumMode, mode->chromMode);
1041
1042     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1043                 width, height, QP_store, QPStride, 0, mode, c);
1044
1045     width  = (width )>>c->hChromaSubSample;
1046     height = (height)>>c->vChromaSubSample;
1047
1048     if(mode->chromMode){
1049         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1050                     width, height, QP_store, QPStride, 1, mode, c);
1051         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1052                     width, height, QP_store, QPStride, 2, mode, c);
1053     }
1054     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1055         linecpy(dst[1], src[1], height, srcStride[1]);
1056         linecpy(dst[2], src[2], height, srcStride[2]);
1057     }else{
1058         int y;
1059         for(y=0; y<height; y++){
1060             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1061             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1062         }
1063     }
1064 }