git.sesse.net Git - ffmpeg/blob - libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use git log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include "libavutil/avassert.h"
  79 #include <inttypes.h>
  80 #include <stdio.h>
  81 #include <stdlib.h>
  82 #include <string.h>
  83 //#undef HAVE_MMX2
  84 //#define HAVE_AMD3DNOW
  85 //#undef HAVE_MMX
  86 //#undef ARCH_X86
  87 //#define DEBUG_BRIGHTNESS
  88 #include "postprocess.h"
  89 #include "postprocess_internal.h"
  90 #include "libavutil/avstring.h"
  91
  92 unsigned postproc_version(void)
  93 {
  94     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
  95     return LIBPOSTPROC_VERSION_INT;
  96 }
  97
  98 const char *postproc_configuration(void)
  99 {
 100     return FFMPEG_CONFIGURATION;
 101 }
 102
 103 const char *postproc_license(void)
 104 {
 105 #define LICENSE_PREFIX "libpostproc license: "
 106     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 107 }
 108
 109 #if HAVE_ALTIVEC_H
 110 #include <altivec.h>
 111 #endif
 112
 113 #define GET_MODE_BUFFER_SIZE 500
 114 #define OPTIONS_ARRAY_SIZE 10
 115 #define BLOCK_SIZE 8
 116 #define TEMP_STRIDE 8
 117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 118
 119 #if ARCH_X86
 120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 128 #endif
 129
 130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 131
 132
 133 static struct PPFilter filters[]=
 134 {
 135     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 136     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 137 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 138     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 139     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 140     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 141     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 142     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 143     {"dr", "dering",                1, 5, 6, DERING},
 144     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 145     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 146     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 147     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 148     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 149     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 150     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 151     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 152     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 153     {NULL, NULL,0,0,0,0} //End Marker
 154 };
 155
 156 static const char *replaceTable[]=
 157 {
 158     "default",      "hb:a,vb:a,dr:a",
 159     "de",           "hb:a,vb:a,dr:a",
 160     "fast",         "h1:a,v1:a,dr:a",
 161     "fa",           "h1:a,v1:a,dr:a",
 162     "ac",           "ha:a:128:7,va:a,dr:a",
 163     NULL //End Marker
 164 };
 165
 166
 167 #if ARCH_X86
 168 static inline void prefetchnta(void *p)
 169 {
 170     __asm__ volatile(   "prefetchnta (%0)\n\t"
 171         : : "r" (p)
 172     );
 173 }
 174
 175 static inline void prefetcht0(void *p)
 176 {
 177     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 178         : : "r" (p)
 179     );
 180 }
 181
 182 static inline void prefetcht1(void *p)
 183 {
 184     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 185         : : "r" (p)
 186     );
 187 }
 188
 189 static inline void prefetcht2(void *p)
 190 {
 191     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 192         : : "r" (p)
 193     );
 194 }
 195 #endif
 196
 197 /* The horizontal functions exist only in C because the MMX
 198  * code is faster with vertical filters and transposing. */
 199
 200 /**
 201  * Check if the given 8x8 Block is mostly "flat"
 202  */
 203 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 204 {
 205     int numEq= 0;
 206     int y;
 207     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 208     const int dcThreshold= dcOffset*2 + 1;
 209
 210     for(y=0; y<BLOCK_SIZE; y++){
 211         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 212         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 213         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 214         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 215         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 216         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 217         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 218         src+= stride;
 219     }
 220     return numEq > c->ppMode.flatnessThreshold;
 221 }
 222
 223 /**
 224  * Check if the middle 8x8 Block in the given 8x16 block is flat
 225  */
 226 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
 227 {
 228     int numEq= 0;
 229     int y;
 230     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 231     const int dcThreshold= dcOffset*2 + 1;
 232
 233     src+= stride*4; // src points to begin of the 8x8 Block
 234     for(y=0; y<BLOCK_SIZE-1; y++){
 235         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 236         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 237         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 238         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 239         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 240         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 241         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 242         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 243         src+= stride;
 244     }
 245     return numEq > c->ppMode.flatnessThreshold;
 246 }
 247
 248 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 249 {
 250     int i;
 251     for(i=0; i<2; i++){
 252         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 253         src += stride;
 254         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 255         src += stride;
 256         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 257         src += stride;
 258         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 259         src += stride;
 260     }
 261     return 1;
 262 }
 263
 264 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 265 {
 266     int x;
 267     src+= stride*4;
 268     for(x=0; x<BLOCK_SIZE; x+=4){
 269         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 270         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 271         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 272         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 273     }
 274     return 1;
 275 }
 276
 277 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
 278 {
 279     if( isHorizDC_C(src, stride, c) ){
 280         if( isHorizMinMaxOk_C(src, stride, c->QP) )
 281             return 1;
 282         else
 283             return 0;
 284     }else{
 285         return 2;
 286     }
 287 }
 288
 289 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
 290 {
 291     if( isVertDC_C(src, stride, c) ){
 292         if( isVertMinMaxOk_C(src, stride, c->QP) )
 293             return 1;
 294         else
 295             return 0;
 296     }else{
 297         return 2;
 298     }
 299 }
 300
 301 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 302 {
 303     int y;
 304     for(y=0; y<BLOCK_SIZE; y++){
 305         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 306
 307         if(FFABS(middleEnergy) < 8*c->QP){
 308             const int q=(dst[3] - dst[4])/2;
 309             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 310             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 311
 312             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 313             d= FFMAX(d, 0);
 314
 315             d= (5*d + 32) >> 6;
 316             d*= FFSIGN(-middleEnergy);
 317
 318             if(q>0)
 319             {
 320                 d= d<0 ? 0 : d;
 321                 d= d>q ? q : d;
 322             }
 323             else
 324             {
 325                 d= d>0 ? 0 : d;
 326                 d= d<q ? q : d;
 327             }
 328
 329             dst[3]-= d;
 330             dst[4]+= d;
 331         }
 332         dst+= stride;
 333     }
 334 }
 335
 336 /**
 337  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 338  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 339  */
 340 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 341 {
 342     int y;
 343     for(y=0; y<BLOCK_SIZE; y++){
 344         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 345         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 346
 347         int sums[10];
 348         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 349         sums[1] = sums[0] - first  + dst[3];
 350         sums[2] = sums[1] - first  + dst[4];
 351         sums[3] = sums[2] - first  + dst[5];
 352         sums[4] = sums[3] - first  + dst[6];
 353         sums[5] = sums[4] - dst[0] + dst[7];
 354         sums[6] = sums[5] - dst[1] + last;
 355         sums[7] = sums[6] - dst[2] + last;
 356         sums[8] = sums[7] - dst[3] + last;
 357         sums[9] = sums[8] - dst[4] + last;
 358
 359         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 360         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 361         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 362         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 363         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 364         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 365         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 366         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 367
 368         dst+= stride;
 369     }
 370 }
 371
 372 /**
 373  * Experimental Filter 1 (Horizontal)
 374  * will not damage linear gradients
 375  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 376  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 377  * MMX2 version does correct clipping C version does not
 378  * not identical with the vertical one
 379  */
 380 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 381 {
 382     int y;
 383     static uint64_t *lut= NULL;
 384     if(lut==NULL)
 385     {
 386         int i;
 387         lut = av_malloc(256*8);
 388         for(i=0; i<256; i++)
 389         {
 390             int v= i < 128 ? 2*i : 2*(i-256);
 391 /*
 392 //Simulate 112242211 9-Tap filter
 393             uint64_t a= (v/16)  & 0xFF;
 394             uint64_t b= (v/8)   & 0xFF;
 395             uint64_t c= (v/4)   & 0xFF;
 396             uint64_t d= (3*v/8) & 0xFF;
 397 */
 398 //Simulate piecewise linear interpolation
 399             uint64_t a= (v/16)   & 0xFF;
 400             uint64_t b= (v*3/16) & 0xFF;
 401             uint64_t c= (v*5/16) & 0xFF;
 402             uint64_t d= (7*v/16) & 0xFF;
 403             uint64_t A= (0x100 - a)&0xFF;
 404             uint64_t B= (0x100 - b)&0xFF;
 405             uint64_t C= (0x100 - c)&0xFF;
 406             uint64_t D= (0x100 - c)&0xFF;
 407
 408             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 409                        (D<<24) | (C<<16) | (B<<8)  | (A);
 410             //lut[i] = (v<<32) | (v<<24);
 411         }
 412     }
 413
 414     for(y=0; y<BLOCK_SIZE; y++){
 415         int a= src[1] - src[2];
 416         int b= src[3] - src[4];
 417         int c= src[5] - src[6];
 418
 419         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 420
 421         if(d < QP){
 422             int v = d * FFSIGN(-b);
 423
 424             src[1] +=v/8;
 425             src[2] +=v/4;
 426             src[3] +=3*v/8;
 427             src[4] -=3*v/8;
 428             src[5] -=v/4;
 429             src[6] -=v/8;
 430         }
 431         src+=stride;
 432     }
 433 }
 434
 435 /**
 436  * accurate deblock filter
 437  */
 438 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 439     int y;
 440     const int QP= c->QP;
 441     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 442     const int dcThreshold= dcOffset*2 + 1;
 443 //START_TIMER
 444     src+= step*4; // src points to begin of the 8x8 Block
 445     for(y=0; y<8; y++){
 446         int numEq= 0;
 447
 448         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 449         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 450         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 451         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 452         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 453         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 454         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 455         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 456         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 457         if(numEq > c->ppMode.flatnessThreshold){
 458             int min, max, x;
 459
 460             if(src[0] > src[step]){
 461                 max= src[0];
 462                 min= src[step];
 463             }else{
 464                 max= src[step];
 465                 min= src[0];
 466             }
 467             for(x=2; x<8; x+=2){
 468                 if(src[x*step] > src[(x+1)*step]){
 469                         if(src[x    *step] > max) max= src[ x   *step];
 470                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 471                 }else{
 472                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 473                         if(src[ x   *step] < min) min= src[ x   *step];
 474                 }
 475             }
 476             if(max-min < 2*QP){
 477                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 478                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 479
 480                 int sums[10];
 481                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 482                 sums[1] = sums[0] - first       + src[3*step];
 483                 sums[2] = sums[1] - first       + src[4*step];
 484                 sums[3] = sums[2] - first       + src[5*step];
 485                 sums[4] = sums[3] - first       + src[6*step];
 486                 sums[5] = sums[4] - src[0*step] + src[7*step];
 487                 sums[6] = sums[5] - src[1*step] + last;
 488                 sums[7] = sums[6] - src[2*step] + last;
 489                 sums[8] = sums[7] - src[3*step] + last;
 490                 sums[9] = sums[8] - src[4*step] + last;
 491
 492                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 493                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 494                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 495                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 496                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 497                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 498                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 499                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 500             }
 501         }else{
 502             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 503
 504             if(FFABS(middleEnergy) < 8*QP){
 505                 const int q=(src[3*step] - src[4*step])/2;
 506                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 507                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 508
 509                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 510                 d= FFMAX(d, 0);
 511
 512                 d= (5*d + 32) >> 6;
 513                 d*= FFSIGN(-middleEnergy);
 514
 515                 if(q>0){
 516                     d= d<0 ? 0 : d;
 517                     d= d>q ? q : d;
 518                 }else{
 519                     d= d>0 ? 0 : d;
 520                     d= d<q ? q : d;
 521                 }
 522
 523                 src[3*step]-= d;
 524                 src[4*step]+= d;
 525             }
 526         }
 527
 528         src += stride;
 529     }
 530 /*if(step==16){
 531     STOP_TIMER("step16")
 532 }else{
 533     STOP_TIMER("stepX")
 534 }*/
 535 }
 536
 537 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 538 //Plain C versions
 539 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
 540 #define COMPILE_C
 541 #endif
 542
 543 #if HAVE_ALTIVEC
 544 #define COMPILE_ALTIVEC
 545 #endif //HAVE_ALTIVEC
 546
 547 #if ARCH_X86
 548
 549 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 550 #define COMPILE_MMX
 551 #endif
 552
 553 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
 554 #define COMPILE_MMX2
 555 #endif
 556
 557 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 558 #define COMPILE_3DNOW
 559 #endif
 560 #endif /* ARCH_X86 */
 561
 562 #undef HAVE_MMX
 563 #define HAVE_MMX 0
 564 #undef HAVE_MMX2
 565 #define HAVE_MMX2 0
 566 #undef HAVE_AMD3DNOW
 567 #define HAVE_AMD3DNOW 0
 568 #undef HAVE_ALTIVEC
 569 #define HAVE_ALTIVEC 0
 570
 571 #ifdef COMPILE_C
 572 #define RENAME(a) a ## _C
 573 #include "postprocess_template.c"
 574 #endif
 575
 576 #ifdef COMPILE_ALTIVEC
 577 #undef RENAME
 578 #undef HAVE_ALTIVEC
 579 #define HAVE_ALTIVEC 1
 580 #define RENAME(a) a ## _altivec
 581 #include "postprocess_altivec_template.c"
 582 #include "postprocess_template.c"
 583 #endif
 584
 585 //MMX versions
 586 #ifdef COMPILE_MMX
 587 #undef RENAME
 588 #undef HAVE_MMX
 589 #define HAVE_MMX 1
 590 #define RENAME(a) a ## _MMX
 591 #include "postprocess_template.c"
 592 #endif
 593
 594 //MMX2 versions
 595 #ifdef COMPILE_MMX2
 596 #undef RENAME
 597 #undef HAVE_MMX
 598 #undef HAVE_MMX2
 599 #define HAVE_MMX 1
 600 #define HAVE_MMX2 1
 601 #define RENAME(a) a ## _MMX2
 602 #include "postprocess_template.c"
 603 #endif
 604
 605 //3DNOW versions
 606 #ifdef COMPILE_3DNOW
 607 #undef RENAME
 608 #undef HAVE_MMX
 609 #undef HAVE_MMX2
 610 #undef HAVE_AMD3DNOW
 611 #define HAVE_MMX 1
 612 #define HAVE_MMX2 0
 613 #define HAVE_AMD3DNOW 1
 614 #define RENAME(a) a ## _3DNow
 615 #include "postprocess_template.c"
 616 #endif
 617
 618 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 619
 620 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 621         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 622 {
 623     PPContext *c= (PPContext *)vc;
 624     PPMode *ppMode= (PPMode *)vm;
 625     c->ppMode= *ppMode; //FIXME
 626
 627     // Using ifs here as they are faster than function pointers although the
 628     // difference would not be measurable here but it is much better because
 629     // someone might exchange the CPU whithout restarting MPlayer ;)
 630 #if CONFIG_RUNTIME_CPUDETECT
 631 #if ARCH_X86
 632     // ordered per speed fastest first
 633     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 634         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 635     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 636         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 637     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 638         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 639     else
 640         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 641 #else
 642 #if HAVE_ALTIVEC
 643     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 644             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 645     else
 646 #endif
 647             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 648 #endif
 649 #else /* CONFIG_RUNTIME_CPUDETECT */
 650 #if   HAVE_MMX2
 651             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 652 #elif HAVE_AMD3DNOW
 653             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 654 #elif HAVE_MMX
 655             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 656 #elif HAVE_ALTIVEC
 657             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 658 #else
 659             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 660 #endif
 661 #endif /* !CONFIG_RUNTIME_CPUDETECT */
 662 }
 663
 664 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 665 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 666
 667 /* -pp Command line Help
 668 */
 669 #if LIBPOSTPROC_VERSION_INT < (52<<16)
 670 const char *const pp_help=
 671 #else
 672 const char pp_help[] =
 673 #endif
 674 "Available postprocessing filters:\n"
 675 "Filters                        Options\n"
 676 "short  long name       short   long option     Description\n"
 677 "*      *               a       autoq           CPU power dependent enabler\n"
 678 "                       c       chrom           chrominance filtering enabled\n"
 679 "                       y       nochrom         chrominance filtering disabled\n"
 680 "                       n       noluma          luma filtering disabled\n"
 681 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 682 "       1. difference factor: default=32, higher -> more deblocking\n"
 683 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 684 "                       the h & v deblocking filters share these\n"
 685 "                       so you can't set different thresholds for h / v\n"
 686 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 687 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 688 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 689 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 690 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 691 "dr     dering                                  deringing filter\n"
 692 "al     autolevels                              automatic brightness / contrast\n"
 693 "                       f        fullyrange     stretch luminance to (0..255)\n"
 694 "lb     linblenddeint                           linear blend deinterlacer\n"
 695 "li     linipoldeint                            linear interpolating deinterlace\n"
 696 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 697 "md     mediandeint                             median deinterlacer\n"
 698 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 699 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 700 "de     default                                 hb:a,vb:a,dr:a\n"
 701 "fa     fast                                    h1:a,v1:a,dr:a\n"
 702 "ac                                             ha:a:128:7,va:a,dr:a\n"
 703 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 704 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 705 "fq     forceQuant      <quantizer>             force quantizer\n"
 706 "Usage:\n"
 707 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 708 "long form example:\n"
 709 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 710 "short form example:\n"
 711 "vb:a/hb:a/lb                                   de,-vb\n"
 712 "more examples:\n"
 713 "tn:64:128:256\n"
 714 "\n"
 715 ;
 716
 717 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 718 {
 719     char temp[GET_MODE_BUFFER_SIZE];
 720     char *p= temp;
 721     static const char filterDelimiters[] = ",/";
 722     static const char optionDelimiters[] = ":";
 723     struct PPMode *ppMode;
 724     char *filterToken;
 725
 726     ppMode= av_malloc(sizeof(PPMode));
 727
 728     ppMode->lumMode= 0;
 729     ppMode->chromMode= 0;
 730     ppMode->maxTmpNoise[0]= 700;
 731     ppMode->maxTmpNoise[1]= 1500;
 732     ppMode->maxTmpNoise[2]= 3000;
 733     ppMode->maxAllowedY= 234;
 734     ppMode->minAllowedY= 16;
 735     ppMode->baseDcDiff= 256/8;
 736     ppMode->flatnessThreshold= 56-16-1;
 737     ppMode->maxClippedThreshold= 0.01;
 738     ppMode->error=0;
 739
 740     memset(temp, 0, GET_MODE_BUFFER_SIZE);
 741     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
 742
 743     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 744
 745     for(;;){
 746         char *filterName;
 747         int q= 1000000; //PP_QUALITY_MAX;
 748         int chrom=-1;
 749         int luma=-1;
 750         char *option;
 751         char *options[OPTIONS_ARRAY_SIZE];
 752         int i;
 753         int filterNameOk=0;
 754         int numOfUnknownOptions=0;
 755         int enable=1; //does the user want us to enabled or disabled the filter
 756
 757         filterToken= strtok(p, filterDelimiters);
 758         if(filterToken == NULL) break;
 759         p+= strlen(filterToken) + 1; // p points to next filterToken
 760         filterName= strtok(filterToken, optionDelimiters);
 761         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 762
 763         if(*filterName == '-'){
 764             enable=0;
 765             filterName++;
 766         }
 767
 768         for(;;){ //for all options
 769             option= strtok(NULL, optionDelimiters);
 770             if(option == NULL) break;
 771
 772             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 773             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 774             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 775             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 776             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 777             else{
 778                 options[numOfUnknownOptions] = option;
 779                 numOfUnknownOptions++;
 780             }
 781             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 782         }
 783         options[numOfUnknownOptions] = NULL;
 784
 785         /* replace stuff from the replace Table */
 786         for(i=0; replaceTable[2*i]!=NULL; i++){
 787             if(!strcmp(replaceTable[2*i], filterName)){
 788                 int newlen= strlen(replaceTable[2*i + 1]);
 789                 int plen;
 790                 int spaceLeft;
 791
 792                 p--, *p=',';
 793
 794                 plen= strlen(p);
 795                 spaceLeft= p - temp + plen;
 796                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
 797                     ppMode->error++;
 798                     break;
 799                 }
 800                 memmove(p + newlen, p, plen+1);
 801                 memcpy(p, replaceTable[2*i + 1], newlen);
 802                 filterNameOk=1;
 803             }
 804         }
 805
 806         for(i=0; filters[i].shortName!=NULL; i++){
 807             if(   !strcmp(filters[i].longName, filterName)
 808                || !strcmp(filters[i].shortName, filterName)){
 809                 ppMode->lumMode &= ~filters[i].mask;
 810                 ppMode->chromMode &= ~filters[i].mask;
 811
 812                 filterNameOk=1;
 813                 if(!enable) break; // user wants to disable it
 814
 815                 if(q >= filters[i].minLumQuality && luma)
 816                     ppMode->lumMode|= filters[i].mask;
 817                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 818                     if(q >= filters[i].minChromQuality)
 819                             ppMode->chromMode|= filters[i].mask;
 820
 821                 if(filters[i].mask == LEVEL_FIX){
 822                     int o;
 823                     ppMode->minAllowedY= 16;
 824                     ppMode->maxAllowedY= 234;
 825                     for(o=0; options[o]!=NULL; o++){
 826                         if(  !strcmp(options[o],"fullyrange")
 827                            ||!strcmp(options[o],"f")){
 828                             ppMode->minAllowedY= 0;
 829                             ppMode->maxAllowedY= 255;
 830                             numOfUnknownOptions--;
 831                         }
 832                     }
 833                 }
 834                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 835                 {
 836                     int o;
 837                     int numOfNoises=0;
 838
 839                     for(o=0; options[o]!=NULL; o++){
 840                         char *tail;
 841                         ppMode->maxTmpNoise[numOfNoises]=
 842                             strtol(options[o], &tail, 0);
 843                         if(tail!=options[o]){
 844                             numOfNoises++;
 845                             numOfUnknownOptions--;
 846                             if(numOfNoises >= 3) break;
 847                         }
 848                     }
 849                 }
 850                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 851                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 852                     int o;
 853
 854                     for(o=0; options[o]!=NULL && o<2; o++){
 855                         char *tail;
 856                         int val= strtol(options[o], &tail, 0);
 857                         if(tail==options[o]) break;
 858
 859                         numOfUnknownOptions--;
 860                         if(o==0) ppMode->baseDcDiff= val;
 861                         else ppMode->flatnessThreshold= val;
 862                     }
 863                 }
 864                 else if(filters[i].mask == FORCE_QUANT){
 865                     int o;
 866                     ppMode->forcedQuant= 15;
 867
 868                     for(o=0; options[o]!=NULL && o<1; o++){
 869                         char *tail;
 870                         int val= strtol(options[o], &tail, 0);
 871                         if(tail==options[o]) break;
 872
 873                         numOfUnknownOptions--;
 874                         ppMode->forcedQuant= val;
 875                     }
 876                 }
 877             }
 878         }
 879         if(!filterNameOk) ppMode->error++;
 880         ppMode->error += numOfUnknownOptions;
 881     }
 882
 883     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 884     if(ppMode->error){
 885         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 886         av_free(ppMode);
 887         return NULL;
 888     }
 889     return ppMode;
 890 }
 891
 892 void pp_free_mode(pp_mode *mode){
 893     av_free(mode);
 894 }
 895
 896 static void reallocAlign(void **p, int alignment, int size){
 897     av_free(*p);
 898     *p= av_mallocz(size);
 899 }
 900
 901 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 902     int mbWidth = (width+15)>>4;
 903     int mbHeight= (height+15)>>4;
 904     int i;
 905
 906     c->stride= stride;
 907     c->qpStride= qpStride;
 908
 909     reallocAlign((void **)&c->tempDst, 8, stride*24);
 910     reallocAlign((void **)&c->tempSrc, 8, stride*24);
 911     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 912     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 913     for(i=0; i<256; i++)
 914             c->yHistogram[i]= width*height/64*15/256;
 915
 916     for(i=0; i<3; i++){
 917         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
 918         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
 919         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 920     }
 921
 922     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 923     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 924     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 925     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 926 }
 927
 928 static const char * context_to_name(void * ptr) {
 929     return "postproc";
 930 }
 931
 932 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 933
 934 pp_context *pp_get_context(int width, int height, int cpuCaps){
 935     PPContext *c= av_malloc(sizeof(PPContext));
 936     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
 937     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 938
 939     memset(c, 0, sizeof(PPContext));
 940     c->av_class = &av_codec_context_class;
 941     c->cpuCaps= cpuCaps;
 942     if(cpuCaps&PP_FORMAT){
 943         c->hChromaSubSample= cpuCaps&0x3;
 944         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 945     }else{
 946         c->hChromaSubSample= 1;
 947         c->vChromaSubSample= 1;
 948     }
 949
 950     reallocBuffers(c, width, height, stride, qpStride);
 951
 952     c->frameNum=-1;
 953
 954     return c;
 955 }
 956
 957 void pp_free_context(void *vc){
 958     PPContext *c = (PPContext*)vc;
 959     int i;
 960
 961     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
 962     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
 963
 964     av_free(c->tempBlocks);
 965     av_free(c->yHistogram);
 966     av_free(c->tempDst);
 967     av_free(c->tempSrc);
 968     av_free(c->deintTemp);
 969     av_free(c->stdQPTable);
 970     av_free(c->nonBQPTable);
 971     av_free(c->forcedQPTable);
 972
 973     memset(c, 0, sizeof(PPContext));
 974
 975     av_free(c);
 976 }
 977
 978 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 979                      uint8_t * dst[3], const int dstStride[3],
 980                      int width, int height,
 981                      const QP_STORE_T *QP_store,  int QPStride,
 982                      pp_mode *vm,  void *vc, int pict_type)
 983 {
 984     int mbWidth = (width+15)>>4;
 985     int mbHeight= (height+15)>>4;
 986     PPMode *mode = (PPMode*)vm;
 987     PPContext *c = (PPContext*)vc;
 988     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
 989     int absQPStride = FFABS(QPStride);
 990
 991     // c->stride and c->QPStride are always positive
 992     if(c->stride < minStride || c->qpStride < absQPStride)
 993         reallocBuffers(c, width, height,
 994                        FFMAX(minStride, c->stride),
 995                        FFMAX(c->qpStride, absQPStride));
 996
 997     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
 998         int i;
 999         QP_store= c->forcedQPTable;
1000         absQPStride = QPStride = 0;
1001         if(mode->lumMode & FORCE_QUANT)
1002             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1003         else
1004             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1005     }
1006
1007     if(pict_type & PP_PICT_TYPE_QP2){
1008         int i;
1009         const int count= mbHeight * absQPStride;
1010         for(i=0; i<(count>>2); i++){
1011             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1012         }
1013         for(i<<=2; i<count; i++){
1014             c->stdQPTable[i] = QP_store[i]>>1;
1015         }
1016         QP_store= c->stdQPTable;
1017         QPStride= absQPStride;
1018     }
1019
1020     if(0){
1021         int x,y;
1022         for(y=0; y<mbHeight; y++){
1023             for(x=0; x<mbWidth; x++){
1024                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1025             }
1026             av_log(c, AV_LOG_INFO, "\n");
1027         }
1028         av_log(c, AV_LOG_INFO, "\n");
1029     }
1030
1031     if((pict_type&7)!=3){
1032         if (QPStride >= 0){
1033             int i;
1034             const int count= mbHeight * QPStride;
1035             for(i=0; i<(count>>2); i++){
1036                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1037             }
1038             for(i<<=2; i<count; i++){
1039                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1040             }
1041         } else {
1042             int i,j;
1043             for(i=0; i<mbHeight; i++) {
1044                 for(j=0; j<absQPStride; j++) {
1045                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1046                 }
1047             }
1048         }
1049     }
1050
1051     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1052            mode->lumMode, mode->chromMode);
1053
1054     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1055                 width, height, QP_store, QPStride, 0, mode, c);
1056
1057     width  = (width )>>c->hChromaSubSample;
1058     height = (height)>>c->vChromaSubSample;
1059
1060     if(mode->chromMode){
1061         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1062                     width, height, QP_store, QPStride, 1, mode, c);
1063         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1064                     width, height, QP_store, QPStride, 2, mode, c);
1065     }
1066     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1067         linecpy(dst[1], src[1], height, srcStride[1]);
1068         linecpy(dst[2], src[2], height, srcStride[2]);
1069     }else{
1070         int y;
1071         for(y=0; y<height; y++){
1072             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1073             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1074         }
1075     }
1076 }