]> git.sesse.net Git - ffmpeg/blob - libpostproc/postprocess.c
misc spelling fixes
[ffmpeg] / libpostproc / postprocess.c
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 /**
24  * @file postprocess.c
25  * postprocessing.
26  */
27
28 /*
29                         C       MMX     MMX2    3DNow   AltiVec
30 isVertDC                Ec      Ec                      Ec
31 isVertMinMaxOk          Ec      Ec                      Ec
32 doVertLowPass           E               e       e       Ec
33 doVertDefFilter         Ec      Ec      e       e       Ec
34 isHorizDC               Ec      Ec                      Ec
35 isHorizMinMaxOk         a       E                       Ec
36 doHorizLowPass          E               e       e       Ec
37 doHorizDefFilter        Ec      Ec      e       e       Ec
38 do_a_deblock            Ec      E       Ec      E
39 deRing                  E               e       e*      Ecp
40 Vertical RKAlgo1        E               a       a
41 Horizontal RKAlgo1                      a       a
42 Vertical X1#            a               E       E
43 Horizontal X1#          a               E       E
44 LinIpolDeinterlace      e               E       E*
45 CubicIpolDeinterlace    a               e       e*
46 LinBlendDeinterlace     e               E       E*
47 MedianDeinterlace#      E       Ec      Ec
48 TempDeNoiser#           E               e       e       Ec
49
50 * i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = allmost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66         (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73
74 //Changelog: use the Subversion log
75
76 #include "config.h"
77 #include "avutil.h"
78 #include <inttypes.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #ifdef HAVE_MALLOC_H
83 #include <malloc.h>
84 #endif
85 //#undef HAVE_MMX2
86 //#define HAVE_3DNOW
87 //#undef HAVE_MMX
88 //#undef ARCH_X86
89 //#define DEBUG_BRIGHTNESS
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
92
93 #include "mangle.h" //FIXME should be supressed
94
95 #ifdef HAVE_ALTIVEC_H
96 #include <altivec.h>
97 #endif
98
99 #define GET_MODE_BUFFER_SIZE 500
100 #define OPTIONS_ARRAY_SIZE 10
101 #define BLOCK_SIZE 8
102 #define TEMP_STRIDE 8
103 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
104
105 #if defined(ARCH_X86)
106 static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
107 static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
108 static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
109 static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
110 static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
111 static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
112 static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
113 static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
114 #endif
115
116 static uint8_t clip_table[3*256];
117 static uint8_t * const clip_tab= clip_table + 256;
118
119 static const int attribute_used deringThreshold= 20;
120
121
122 static struct PPFilter filters[]=
123 {
124         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
125         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
126 /*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
127         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
128         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
129         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
130         {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
131         {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
132         {"dr", "dering",                1, 5, 6, DERING},
133         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
134         {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135         {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
138         {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
139         {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
140         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
141         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
142         {NULL, NULL,0,0,0,0} //End Marker
143 };
144
145 static const char *replaceTable[]=
146 {
147         "default",      "hdeblock:a,vdeblock:a,dering:a",
148         "de",           "hdeblock:a,vdeblock:a,dering:a",
149         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
150         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
151         "ac",           "ha:a:128:7,va:a,dering:a",
152         NULL //End Marker
153 };
154
155
156 #if defined(ARCH_X86)
157 static inline void prefetchnta(void *p)
158 {
159         asm volatile(   "prefetchnta (%0)\n\t"
160                 : : "r" (p)
161         );
162 }
163
164 static inline void prefetcht0(void *p)
165 {
166         asm volatile(   "prefetcht0 (%0)\n\t"
167                 : : "r" (p)
168         );
169 }
170
171 static inline void prefetcht1(void *p)
172 {
173         asm volatile(   "prefetcht1 (%0)\n\t"
174                 : : "r" (p)
175         );
176 }
177
178 static inline void prefetcht2(void *p)
179 {
180         asm volatile(   "prefetcht2 (%0)\n\t"
181                 : : "r" (p)
182         );
183 }
184 #endif
185
186 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
187
188 /**
189  * Check if the given 8x8 Block is mostly "flat"
190  */
191 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
192 {
193         int numEq= 0;
194         int y;
195         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
196         const int dcThreshold= dcOffset*2 + 1;
197
198         for(y=0; y<BLOCK_SIZE; y++)
199         {
200                 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201                 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202                 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203                 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204                 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205                 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206                 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
207                 src+= stride;
208         }
209         return numEq > c->ppMode.flatnessThreshold;
210 }
211
212 /**
213  * Check if the middle 8x8 Block in the given 8x16 block is flat
214  */
215 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
216         int numEq= 0;
217         int y;
218         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
219         const int dcThreshold= dcOffset*2 + 1;
220
221         src+= stride*4; // src points to begin of the 8x8 Block
222         for(y=0; y<BLOCK_SIZE-1; y++)
223         {
224                 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225                 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226                 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227                 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228                 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229                 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230                 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231                 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
232                 src+= stride;
233         }
234         return numEq > c->ppMode.flatnessThreshold;
235 }
236
237 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
238 {
239         int i;
240 #if 1
241         for(i=0; i<2; i++){
242                 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
243                 src += stride;
244                 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
245                 src += stride;
246                 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
247                 src += stride;
248                 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
249                 src += stride;
250         }
251 #else
252         for(i=0; i<8; i++){
253                 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
254                 src += stride;
255         }
256 #endif
257         return 1;
258 }
259
260 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
261 {
262 #if 1
263 #if 1
264         int x;
265         src+= stride*4;
266         for(x=0; x<BLOCK_SIZE; x+=4)
267         {
268                 if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
269                 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
270                 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
271                 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
272         }
273 #else
274         int x;
275         src+= stride*3;
276         for(x=0; x<BLOCK_SIZE; x++)
277         {
278                 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
279         }
280 #endif
281         return 1;
282 #else
283         int x;
284         src+= stride*4;
285         for(x=0; x<BLOCK_SIZE; x++)
286         {
287                 int min=255;
288                 int max=0;
289                 int y;
290                 for(y=0; y<8; y++){
291                         int v= src[x + y*stride];
292                         if(v>max) max=v;
293                         if(v<min) min=v;
294                 }
295                 if(max-min > 2*QP) return 0;
296         }
297         return 1;
298 #endif
299 }
300
301 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
302         if( isHorizDC_C(src, stride, c) ){
303                 if( isHorizMinMaxOk_C(src, stride, c->QP) )
304                         return 1;
305                 else
306                         return 0;
307         }else{
308                 return 2;
309         }
310 }
311
312 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
313         if( isVertDC_C(src, stride, c) ){
314                 if( isVertMinMaxOk_C(src, stride, c->QP) )
315                         return 1;
316                 else
317                         return 0;
318         }else{
319                 return 2;
320         }
321 }
322
323 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
324 {
325         int y;
326         for(y=0; y<BLOCK_SIZE; y++)
327         {
328                 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
329
330                 if(FFABS(middleEnergy) < 8*c->QP)
331                 {
332                         const int q=(dst[3] - dst[4])/2;
333                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
334                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
335
336                         int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
337                         d= FFMAX(d, 0);
338
339                         d= (5*d + 32) >> 6;
340                         d*= FFSIGN(-middleEnergy);
341
342                         if(q>0)
343                         {
344                                 d= d<0 ? 0 : d;
345                                 d= d>q ? q : d;
346                         }
347                         else
348                         {
349                                 d= d>0 ? 0 : d;
350                                 d= d<q ? q : d;
351                         }
352
353                         dst[3]-= d;
354                         dst[4]+= d;
355                 }
356                 dst+= stride;
357         }
358 }
359
360 /**
361  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
362  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
363  */
364 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
365 {
366         int y;
367         for(y=0; y<BLOCK_SIZE; y++)
368         {
369                 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
370                 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
371
372                 int sums[10];
373                 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
374                 sums[1] = sums[0] - first  + dst[3];
375                 sums[2] = sums[1] - first  + dst[4];
376                 sums[3] = sums[2] - first  + dst[5];
377                 sums[4] = sums[3] - first  + dst[6];
378                 sums[5] = sums[4] - dst[0] + dst[7];
379                 sums[6] = sums[5] - dst[1] + last;
380                 sums[7] = sums[6] - dst[2] + last;
381                 sums[8] = sums[7] - dst[3] + last;
382                 sums[9] = sums[8] - dst[4] + last;
383
384                 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
385                 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
386                 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
387                 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
388                 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
389                 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
390                 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
391                 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
392
393                 dst+= stride;
394         }
395 }
396
397 /**
398  * Experimental Filter 1 (Horizontal)
399  * will not damage linear gradients
400  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
401  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
402  * MMX2 version does correct clipping C version does not
403  * not identical with the vertical one
404  */
405 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
406 {
407         int y;
408         static uint64_t *lut= NULL;
409         if(lut==NULL)
410         {
411                 int i;
412                 lut = av_malloc(256*8);
413                 for(i=0; i<256; i++)
414                 {
415                         int v= i < 128 ? 2*i : 2*(i-256);
416 /*
417 //Simulate 112242211 9-Tap filter
418                         uint64_t a= (v/16) & 0xFF;
419                         uint64_t b= (v/8) & 0xFF;
420                         uint64_t c= (v/4) & 0xFF;
421                         uint64_t d= (3*v/8) & 0xFF;
422 */
423 //Simulate piecewise linear interpolation
424                         uint64_t a= (v/16) & 0xFF;
425                         uint64_t b= (v*3/16) & 0xFF;
426                         uint64_t c= (v*5/16) & 0xFF;
427                         uint64_t d= (7*v/16) & 0xFF;
428                         uint64_t A= (0x100 - a)&0xFF;
429                         uint64_t B= (0x100 - b)&0xFF;
430                         uint64_t C= (0x100 - c)&0xFF;
431                         uint64_t D= (0x100 - c)&0xFF;
432
433                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
434                                 (D<<24) | (C<<16) | (B<<8) | (A);
435                         //lut[i] = (v<<32) | (v<<24);
436                 }
437         }
438
439         for(y=0; y<BLOCK_SIZE; y++)
440         {
441                 int a= src[1] - src[2];
442                 int b= src[3] - src[4];
443                 int c= src[5] - src[6];
444
445                 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
446
447                 if(d < QP)
448                 {
449                         int v = d * FFSIGN(-b);
450
451                         src[1] +=v/8;
452                         src[2] +=v/4;
453                         src[3] +=3*v/8;
454                         src[4] -=3*v/8;
455                         src[5] -=v/4;
456                         src[6] -=v/8;
457
458                 }
459                 src+=stride;
460         }
461 }
462
463 /**
464  * accurate deblock filter
465  */
466 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
467         int y;
468         const int QP= c->QP;
469         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
470         const int dcThreshold= dcOffset*2 + 1;
471 //START_TIMER
472         src+= step*4; // src points to begin of the 8x8 Block
473         for(y=0; y<8; y++){
474                 int numEq= 0;
475
476                 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
477                 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
478                 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
479                 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
480                 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
481                 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
482                 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
483                 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
484                 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
485                 if(numEq > c->ppMode.flatnessThreshold){
486                         int min, max, x;
487
488                         if(src[0] > src[step]){
489                             max= src[0];
490                             min= src[step];
491                         }else{
492                             max= src[step];
493                             min= src[0];
494                         }
495                         for(x=2; x<8; x+=2){
496                                 if(src[x*step] > src[(x+1)*step]){
497                                         if(src[x    *step] > max) max= src[ x   *step];
498                                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
499                                 }else{
500                                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
501                                         if(src[ x   *step] < min) min= src[ x   *step];
502                                 }
503                         }
504                         if(max-min < 2*QP){
505                                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
506                                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
507
508                                 int sums[10];
509                                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
510                                 sums[1] = sums[0] - first       + src[3*step];
511                                 sums[2] = sums[1] - first       + src[4*step];
512                                 sums[3] = sums[2] - first       + src[5*step];
513                                 sums[4] = sums[3] - first       + src[6*step];
514                                 sums[5] = sums[4] - src[0*step] + src[7*step];
515                                 sums[6] = sums[5] - src[1*step] + last;
516                                 sums[7] = sums[6] - src[2*step] + last;
517                                 sums[8] = sums[7] - src[3*step] + last;
518                                 sums[9] = sums[8] - src[4*step] + last;
519
520                                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
521                                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
522                                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
523                                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
524                                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
525                                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
526                                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
527                                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
528                         }
529                 }else{
530                         const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
531
532                         if(FFABS(middleEnergy) < 8*QP)
533                         {
534                                 const int q=(src[3*step] - src[4*step])/2;
535                                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536                                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537
538                                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
539                                 d= FFMAX(d, 0);
540
541                                 d= (5*d + 32) >> 6;
542                                 d*= FFSIGN(-middleEnergy);
543
544                                 if(q>0)
545                                 {
546                                         d= d<0 ? 0 : d;
547                                         d= d>q ? q : d;
548                                 }
549                                 else
550                                 {
551                                         d= d>0 ? 0 : d;
552                                         d= d<q ? q : d;
553                                 }
554
555                                 src[3*step]-= d;
556                                 src[4*step]+= d;
557                         }
558                 }
559
560                 src += stride;
561         }
562 /*if(step==16){
563     STOP_TIMER("step16")
564 }else{
565     STOP_TIMER("stepX")
566 }*/
567 }
568
569 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
570 //Plain C versions
571 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
572 #define COMPILE_C
573 #endif
574
575 #ifdef ARCH_POWERPC
576 #ifdef HAVE_ALTIVEC
577 #define COMPILE_ALTIVEC
578 #endif //HAVE_ALTIVEC
579 #endif //ARCH_POWERPC
580
581 #if defined(ARCH_X86)
582
583 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
584 #define COMPILE_MMX
585 #endif
586
587 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
588 #define COMPILE_MMX2
589 #endif
590
591 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
592 #define COMPILE_3DNOW
593 #endif
594 #endif /* defined(ARCH_X86) */
595
596 #undef HAVE_MMX
597 #undef HAVE_MMX2
598 #undef HAVE_3DNOW
599 #undef HAVE_ALTIVEC
600
601 #ifdef COMPILE_C
602 #undef HAVE_MMX
603 #undef HAVE_MMX2
604 #undef HAVE_3DNOW
605 #define RENAME(a) a ## _C
606 #include "postprocess_template.c"
607 #endif
608
609 #ifdef ARCH_POWERPC
610 #ifdef COMPILE_ALTIVEC
611 #undef RENAME
612 #define HAVE_ALTIVEC
613 #define RENAME(a) a ## _altivec
614 #include "postprocess_altivec_template.c"
615 #include "postprocess_template.c"
616 #endif
617 #endif //ARCH_POWERPC
618
619 //MMX versions
620 #ifdef COMPILE_MMX
621 #undef RENAME
622 #define HAVE_MMX
623 #undef HAVE_MMX2
624 #undef HAVE_3DNOW
625 #define RENAME(a) a ## _MMX
626 #include "postprocess_template.c"
627 #endif
628
629 //MMX2 versions
630 #ifdef COMPILE_MMX2
631 #undef RENAME
632 #define HAVE_MMX
633 #define HAVE_MMX2
634 #undef HAVE_3DNOW
635 #define RENAME(a) a ## _MMX2
636 #include "postprocess_template.c"
637 #endif
638
639 //3DNOW versions
640 #ifdef COMPILE_3DNOW
641 #undef RENAME
642 #define HAVE_MMX
643 #undef HAVE_MMX2
644 #define HAVE_3DNOW
645 #define RENAME(a) a ## _3DNow
646 #include "postprocess_template.c"
647 #endif
648
649 // minor note: the HAVE_xyz is messed up after that line so do not use it.
650
651 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
652         QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
653 {
654         PPContext *c= (PPContext *)vc;
655         PPMode *ppMode= (PPMode *)vm;
656         c->ppMode= *ppMode; //FIXME
657
658         // Using ifs here as they are faster than function pointers although the
659         // difference would not be measureable here but it is much better because
660         // someone might exchange the CPU whithout restarting MPlayer ;)
661 #ifdef RUNTIME_CPUDETECT
662 #if defined(ARCH_X86)
663         // ordered per speed fasterst first
664         if(c->cpuCaps & PP_CPU_CAPS_MMX2)
665                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666         else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
667                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668         else if(c->cpuCaps & PP_CPU_CAPS_MMX)
669                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670         else
671                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672 #else
673 #ifdef ARCH_POWERPC
674 #ifdef HAVE_ALTIVEC
675         if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
676                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677         else
678 #endif
679 #endif
680                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
681 #endif
682 #else //RUNTIME_CPUDETECT
683 #ifdef HAVE_MMX2
684                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 #elif defined (HAVE_3DNOW)
686                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 #elif defined (HAVE_MMX)
688                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 #elif defined (HAVE_ALTIVEC)
690                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691 #else
692                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
693 #endif
694 #endif //!RUNTIME_CPUDETECT
695 }
696
697 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
698 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
699
700 /* -pp Command line Help
701 */
702 char *pp_help=
703 "Available postprocessing filters:\n"
704 "Filters                        Options\n"
705 "short  long name       short   long option     Description\n"
706 "*      *               a       autoq           CPU power dependent enabler\n"
707 "                       c       chrom           chrominance filtering enabled\n"
708 "                       y       nochrom         chrominance filtering disabled\n"
709 "                       n       noluma          luma filtering disabled\n"
710 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
711 "       1. difference factor: default=32, higher -> more deblocking\n"
712 "       2. flatness threshold: default=39, lower -> more deblocking\n"
713 "                       the h & v deblocking filters share these\n"
714 "                       so you can't set different thresholds for h / v\n"
715 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
716 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
717 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
718 "h1     x1hdeblock                              experimental h deblock filter 1\n"
719 "v1     x1vdeblock                              experimental v deblock filter 1\n"
720 "dr     dering                                  deringing filter\n"
721 "al     autolevels                              automatic brightness / contrast\n"
722 "                       f        fullyrange     stretch luminance to (0..255)\n"
723 "lb     linblenddeint                           linear blend deinterlacer\n"
724 "li     linipoldeint                            linear interpolating deinterlace\n"
725 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
726 "md     mediandeint                             median deinterlacer\n"
727 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
728 "l5     lowpass5                                FIR lowpass deinterlacer\n"
729 "de     default                                 hb:a,vb:a,dr:a\n"
730 "fa     fast                                    h1:a,v1:a,dr:a\n"
731 "ac                                             ha:a:128:7,va:a,dr:a\n"
732 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
733 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
734 "fq     forceQuant      <quantizer>             force quantizer\n"
735 "Usage:\n"
736 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737 "long form example:\n"
738 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
739 "short form example:\n"
740 "vb:a/hb:a/lb                                   de,-vb\n"
741 "more examples:\n"
742 "tn:64:128:256\n"
743 "\n"
744 ;
745
746 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
747 {
748         char temp[GET_MODE_BUFFER_SIZE];
749         char *p= temp;
750         const char *filterDelimiters= ",/";
751         const char *optionDelimiters= ":";
752         struct PPMode *ppMode;
753         char *filterToken;
754
755         ppMode= av_malloc(sizeof(PPMode));
756
757         ppMode->lumMode= 0;
758         ppMode->chromMode= 0;
759         ppMode->maxTmpNoise[0]= 700;
760         ppMode->maxTmpNoise[1]= 1500;
761         ppMode->maxTmpNoise[2]= 3000;
762         ppMode->maxAllowedY= 234;
763         ppMode->minAllowedY= 16;
764         ppMode->baseDcDiff= 256/8;
765         ppMode->flatnessThreshold= 56-16-1;
766         ppMode->maxClippedThreshold= 0.01;
767         ppMode->error=0;
768
769         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
770
771         av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772
773         for(;;){
774                 char *filterName;
775                 int q= 1000000; //PP_QUALITY_MAX;
776                 int chrom=-1;
777                 int luma=-1;
778                 char *option;
779                 char *options[OPTIONS_ARRAY_SIZE];
780                 int i;
781                 int filterNameOk=0;
782                 int numOfUnknownOptions=0;
783                 int enable=1; //does the user want us to enabled or disabled the filter
784
785                 filterToken= strtok(p, filterDelimiters);
786                 if(filterToken == NULL) break;
787                 p+= strlen(filterToken) + 1; // p points to next filterToken
788                 filterName= strtok(filterToken, optionDelimiters);
789                 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
790
791                 if(*filterName == '-')
792                 {
793                         enable=0;
794                         filterName++;
795                 }
796
797                 for(;;){ //for all options
798                         option= strtok(NULL, optionDelimiters);
799                         if(option == NULL) break;
800
801                         av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
802                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
803                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
804                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
805                         else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
806                         else
807                         {
808                                 options[numOfUnknownOptions] = option;
809                                 numOfUnknownOptions++;
810                         }
811                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
812                 }
813                 options[numOfUnknownOptions] = NULL;
814
815                 /* replace stuff from the replace Table */
816                 for(i=0; replaceTable[2*i]!=NULL; i++)
817                 {
818                         if(!strcmp(replaceTable[2*i], filterName))
819                         {
820                                 int newlen= strlen(replaceTable[2*i + 1]);
821                                 int plen;
822                                 int spaceLeft;
823
824                                 if(p==NULL) p= temp, *p=0;      //last filter
825                                 else p--, *p=',';               //not last filter
826
827                                 plen= strlen(p);
828                                 spaceLeft= p - temp + plen;
829                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
830                                 {
831                                         ppMode->error++;
832                                         break;
833                                 }
834                                 memmove(p + newlen, p, plen+1);
835                                 memcpy(p, replaceTable[2*i + 1], newlen);
836                                 filterNameOk=1;
837                         }
838                 }
839
840                 for(i=0; filters[i].shortName!=NULL; i++)
841                 {
842                         if(   !strcmp(filters[i].longName, filterName)
843                            || !strcmp(filters[i].shortName, filterName))
844                         {
845                                 ppMode->lumMode &= ~filters[i].mask;
846                                 ppMode->chromMode &= ~filters[i].mask;
847
848                                 filterNameOk=1;
849                                 if(!enable) break; // user wants to disable it
850
851                                 if(q >= filters[i].minLumQuality && luma)
852                                         ppMode->lumMode|= filters[i].mask;
853                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
854                                         if(q >= filters[i].minChromQuality)
855                                                 ppMode->chromMode|= filters[i].mask;
856
857                                 if(filters[i].mask == LEVEL_FIX)
858                                 {
859                                         int o;
860                                         ppMode->minAllowedY= 16;
861                                         ppMode->maxAllowedY= 234;
862                                         for(o=0; options[o]!=NULL; o++)
863                                         {
864                                                 if(  !strcmp(options[o],"fullyrange")
865                                                    ||!strcmp(options[o],"f"))
866                                                 {
867                                                         ppMode->minAllowedY= 0;
868                                                         ppMode->maxAllowedY= 255;
869                                                         numOfUnknownOptions--;
870                                                 }
871                                         }
872                                 }
873                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
874                                 {
875                                         int o;
876                                         int numOfNoises=0;
877
878                                         for(o=0; options[o]!=NULL; o++)
879                                         {
880                                                 char *tail;
881                                                 ppMode->maxTmpNoise[numOfNoises]=
882                                                         strtol(options[o], &tail, 0);
883                                                 if(tail!=options[o])
884                                                 {
885                                                         numOfNoises++;
886                                                         numOfUnknownOptions--;
887                                                         if(numOfNoises >= 3) break;
888                                                 }
889                                         }
890                                 }
891                                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
892                                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
893                                 {
894                                         int o;
895
896                                         for(o=0; options[o]!=NULL && o<2; o++)
897                                         {
898                                                 char *tail;
899                                                 int val= strtol(options[o], &tail, 0);
900                                                 if(tail==options[o]) break;
901
902                                                 numOfUnknownOptions--;
903                                                 if(o==0) ppMode->baseDcDiff= val;
904                                                 else ppMode->flatnessThreshold= val;
905                                         }
906                                 }
907                                 else if(filters[i].mask == FORCE_QUANT)
908                                 {
909                                         int o;
910                                         ppMode->forcedQuant= 15;
911
912                                         for(o=0; options[o]!=NULL && o<1; o++)
913                                         {
914                                                 char *tail;
915                                                 int val= strtol(options[o], &tail, 0);
916                                                 if(tail==options[o]) break;
917
918                                                 numOfUnknownOptions--;
919                                                 ppMode->forcedQuant= val;
920                                         }
921                                 }
922                         }
923                 }
924                 if(!filterNameOk) ppMode->error++;
925                 ppMode->error += numOfUnknownOptions;
926         }
927
928         av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
929         if(ppMode->error)
930         {
931                 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
932                 av_free(ppMode);
933                 return NULL;
934         }
935         return ppMode;
936 }
937
938 void pp_free_mode(pp_mode_t *mode){
939     av_free(mode);
940 }
941
942 static void reallocAlign(void **p, int alignment, int size){
943         av_free(*p);
944         *p= av_mallocz(size);
945 }
946
947 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
948         int mbWidth = (width+15)>>4;
949         int mbHeight= (height+15)>>4;
950         int i;
951
952         c->stride= stride;
953         c->qpStride= qpStride;
954
955         reallocAlign((void **)&c->tempDst, 8, stride*24);
956         reallocAlign((void **)&c->tempSrc, 8, stride*24);
957         reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
958         reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
959         for(i=0; i<256; i++)
960                 c->yHistogram[i]= width*height/64*15/256;
961
962         for(i=0; i<3; i++)
963         {
964                 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
965                 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
966                 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
967         }
968
969         reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
970         reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
971         reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
972         reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
973 }
974
975 static void global_init(void){
976         int i;
977         memset(clip_table, 0, 256);
978         for(i=256; i<512; i++)
979                 clip_table[i]= i;
980         memset(clip_table+512, 0, 256);
981 }
982
983 static const char * context_to_name(void * ptr) {
984     return "postproc";
985 }
986
987 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
988
989 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
990         PPContext *c= av_malloc(sizeof(PPContext));
991         int stride= (width+15)&(~15);    //assumed / will realloc if needed
992         int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
993
994         global_init();
995
996         memset(c, 0, sizeof(PPContext));
997         c->av_class = &av_codec_context_class;
998         c->cpuCaps= cpuCaps;
999         if(cpuCaps&PP_FORMAT){
1000                 c->hChromaSubSample= cpuCaps&0x3;
1001                 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1002         }else{
1003                 c->hChromaSubSample= 1;
1004                 c->vChromaSubSample= 1;
1005         }
1006
1007         reallocBuffers(c, width, height, stride, qpStride);
1008
1009         c->frameNum=-1;
1010
1011         return c;
1012 }
1013
1014 void pp_free_context(void *vc){
1015         PPContext *c = (PPContext*)vc;
1016         int i;
1017
1018         for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1019         for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1020
1021         av_free(c->tempBlocks);
1022         av_free(c->yHistogram);
1023         av_free(c->tempDst);
1024         av_free(c->tempSrc);
1025         av_free(c->deintTemp);
1026         av_free(c->stdQPTable);
1027         av_free(c->nonBQPTable);
1028         av_free(c->forcedQPTable);
1029
1030         memset(c, 0, sizeof(PPContext));
1031
1032         av_free(c);
1033 }
1034
1035 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1036                  uint8_t * dst[3], int dstStride[3],
1037                  int width, int height,
1038                  QP_STORE_T *QP_store,  int QPStride,
1039                  pp_mode_t *vm,  void *vc, int pict_type)
1040 {
1041         int mbWidth = (width+15)>>4;
1042         int mbHeight= (height+15)>>4;
1043         PPMode *mode = (PPMode*)vm;
1044         PPContext *c = (PPContext*)vc;
1045         int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1046         int absQPStride = FFABS(QPStride);
1047
1048         // c->stride and c->QPStride are always positive
1049         if(c->stride < minStride || c->qpStride < absQPStride)
1050                 reallocBuffers(c, width, height,
1051                                 FFMAX(minStride, c->stride),
1052                                 FFMAX(c->qpStride, absQPStride));
1053
1054         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1055         {
1056                 int i;
1057                 QP_store= c->forcedQPTable;
1058                 absQPStride = QPStride = 0;
1059                 if(mode->lumMode & FORCE_QUANT)
1060                         for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1061                 else
1062                         for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1063         }
1064
1065         if(pict_type & PP_PICT_TYPE_QP2){
1066                 int i;
1067                 const int count= mbHeight * absQPStride;
1068                 for(i=0; i<(count>>2); i++){
1069                         ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1070                 }
1071                 for(i<<=2; i<count; i++){
1072                         c->stdQPTable[i] = QP_store[i]>>1;
1073                 }
1074                 QP_store= c->stdQPTable;
1075                 QPStride= absQPStride;
1076         }
1077
1078 if(0){
1079 int x,y;
1080 for(y=0; y<mbHeight; y++){
1081         for(x=0; x<mbWidth; x++){
1082                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1083         }
1084         av_log(c, AV_LOG_INFO, "\n");
1085 }
1086         av_log(c, AV_LOG_INFO, "\n");
1087 }
1088
1089         if((pict_type&7)!=3)
1090         {
1091                 if (QPStride >= 0) {
1092                         int i;
1093                         const int count= mbHeight * QPStride;
1094                         for(i=0; i<(count>>2); i++){
1095                                 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1096                         }
1097                         for(i<<=2; i<count; i++){
1098                                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1099                         }
1100                 } else {
1101                         int i,j;
1102                         for(i=0; i<mbHeight; i++) {
1103                                     for(j=0; j<absQPStride; j++) {
1104                                         c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1105                                 }
1106                         }
1107                 }
1108         }
1109
1110         av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1111                mode->lumMode, mode->chromMode);
1112
1113         postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1114                 width, height, QP_store, QPStride, 0, mode, c);
1115
1116         width  = (width )>>c->hChromaSubSample;
1117         height = (height)>>c->vChromaSubSample;
1118
1119         if(mode->chromMode)
1120         {
1121                 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1122                         width, height, QP_store, QPStride, 1, mode, c);
1123                 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1124                         width, height, QP_store, QPStride, 2, mode, c);
1125         }
1126         else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1127         {
1128                 linecpy(dst[1], src[1], height, srcStride[1]);
1129                 linecpy(dst[2], src[2], height, srcStride[2]);
1130         }
1131         else
1132         {
1133                 int y;
1134                 for(y=0; y<height; y++)
1135                 {
1136                         memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1137                         memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1138                 }
1139         }
1140 }
1141