]> git.sesse.net Git - ffmpeg/blob - libpostproc/postprocess.c
Explicitly include fastmemcpy.h from libvo/.
[ffmpeg] / libpostproc / postprocess.c
1 /*
2     Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
4     AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22  * @file postprocess.c
23  * postprocessing.
24  */
25
26 /*
27                         C       MMX     MMX2    3DNow   AltiVec
28 isVertDC                Ec      Ec                      Ec
29 isVertMinMaxOk          Ec      Ec                      Ec
30 doVertLowPass           E               e       e       Ec
31 doVertDefFilter         Ec      Ec      e       e       Ec
32 isHorizDC               Ec      Ec                      Ec
33 isHorizMinMaxOk         a       E                       Ec
34 doHorizLowPass          E               e       e       Ec
35 doHorizDefFilter        Ec      Ec      e       e       Ec
36 do_a_deblock            Ec      E       Ec      E
37 deRing                  E               e       e*      Ecp
38 Vertical RKAlgo1        E               a       a
39 Horizontal RKAlgo1                      a       a
40 Vertical X1#            a               E       E
41 Horizontal X1#          a               E       E
42 LinIpolDeinterlace      e               E       E*
43 CubicIpolDeinterlace    a               e       e*
44 LinBlendDeinterlace     e               E       E*
45 MedianDeinterlace#      E       Ec      Ec
46 TempDeNoiser#           E               e       e       Ec
47
48 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49 # more or less selfinvented filters so the exactness isnt too meaningfull
50 E = Exact implementation
51 e = allmost exact implementation (slightly different rounding,...)
52 a = alternative / approximate impl
53 c = checked against the other implementations (-vo md5)
54 p = partially optimized, still some work to do
55 */
56
57 /*
58 TODO:
59 reduce the time wasted on the mem transfer
60 unroll stuff if instructions depend too much on the prior one
61 move YScale thing to the end instead of fixing QP
62 write a faster and higher quality deblocking filter :)
63 make the mainloop more flexible (variable number of blocks at once
64         (the if/else stuff per block is slowing things down)
65 compare the quality & speed of all filters
66 split this huge file
67 optimize c versions
68 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
69 ...
70 */
71
72 //Changelog: use the Subversion log
73
74 #include "config.h"
75 #include <inttypes.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #ifdef HAVE_MALLOC_H
80 #include <malloc.h>
81 #endif
82 //#undef HAVE_MMX2
83 //#define HAVE_3DNOW
84 //#undef HAVE_MMX
85 //#undef ARCH_X86
86 //#define DEBUG_BRIGHTNESS
87 #ifdef USE_FASTMEMCPY
88 #include "libvo/fastmemcpy.h"
89 #endif
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
92
93 #include "mangle.h" //FIXME should be supressed
94
95 #ifdef HAVE_ALTIVEC_H
96 #include <altivec.h>
97 #endif
98
99 #ifndef HAVE_MEMALIGN
100 #define memalign(a,b) malloc(b)
101 #endif
102
103 #define MIN(a,b) ((a) > (b) ? (b) : (a))
104 #define MAX(a,b) ((a) < (b) ? (b) : (a))
105 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
106 #define SIGN(a) ((a) > 0 ? 1 : -1)
107
108 #define GET_MODE_BUFFER_SIZE 500
109 #define OPTIONS_ARRAY_SIZE 10
110 #define BLOCK_SIZE 8
111 #define TEMP_STRIDE 8
112 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
113
114 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115 #    define attribute_used __attribute__((used))
116 #    define always_inline __attribute__((always_inline)) inline
117 #else
118 #    define attribute_used
119 #    define always_inline inline
120 #endif
121
122 #if defined(ARCH_X86) || defined(ARCH_X86_64)
123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
131 #endif
132
133 static uint8_t clip_table[3*256];
134 static uint8_t * const clip_tab= clip_table + 256;
135
136 static const int verbose= 0;
137
138 static const int attribute_used deringThreshold= 20;
139
140
141 static struct PPFilter filters[]=
142 {
143         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
144         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
145 /*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
146         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
147         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
148         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
149         {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
150         {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
151         {"dr", "dering",                1, 5, 6, DERING},
152         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
153         {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154         {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
157         {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
158         {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
159         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
160         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
161         {NULL, NULL,0,0,0,0} //End Marker
162 };
163
164 static const char *replaceTable[]=
165 {
166         "default",      "hdeblock:a,vdeblock:a,dering:a",
167         "de",           "hdeblock:a,vdeblock:a,dering:a",
168         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
169         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
170         "ac",           "ha:a:128:7,va:a,dering:a",
171         NULL //End Marker
172 };
173
174
175 #if defined(ARCH_X86) || defined(ARCH_X86_64)
176 static inline void prefetchnta(void *p)
177 {
178         asm volatile(   "prefetchnta (%0)\n\t"
179                 : : "r" (p)
180         );
181 }
182
183 static inline void prefetcht0(void *p)
184 {
185         asm volatile(   "prefetcht0 (%0)\n\t"
186                 : : "r" (p)
187         );
188 }
189
190 static inline void prefetcht1(void *p)
191 {
192         asm volatile(   "prefetcht1 (%0)\n\t"
193                 : : "r" (p)
194         );
195 }
196
197 static inline void prefetcht2(void *p)
198 {
199         asm volatile(   "prefetcht2 (%0)\n\t"
200                 : : "r" (p)
201         );
202 }
203 #endif
204
205 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
206
207 /**
208  * Check if the given 8x8 Block is mostly "flat"
209  */
210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
211 {
212         int numEq= 0;
213         int y;
214         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215         const int dcThreshold= dcOffset*2 + 1;
216
217         for(y=0; y<BLOCK_SIZE; y++)
218         {
219                 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220                 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221                 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222                 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223                 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224                 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225                 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226                 src+= stride;
227         }
228         return numEq > c->ppMode.flatnessThreshold;
229 }
230
231 /**
232  * Check if the middle 8x8 Block in the given 8x16 block is flat
233  */
234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235         int numEq= 0;
236         int y;
237         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
238         const int dcThreshold= dcOffset*2 + 1;
239
240         src+= stride*4; // src points to begin of the 8x8 Block
241         for(y=0; y<BLOCK_SIZE-1; y++)
242         {
243                 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244                 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245                 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246                 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247                 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248                 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249                 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250                 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
251                 src+= stride;
252         }
253         return numEq > c->ppMode.flatnessThreshold;
254 }
255
256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
257 {
258         int i;
259 #if 1
260         for(i=0; i<2; i++){
261                 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262                 src += stride;
263                 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264                 src += stride;
265                 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266                 src += stride;
267                 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268                 src += stride;
269         }
270 #else
271         for(i=0; i<8; i++){
272                 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273                 src += stride;
274         }
275 #endif
276         return 1;
277 }
278
279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280 {
281 #if 1
282 #if 1
283         int x;
284         src+= stride*4;
285         for(x=0; x<BLOCK_SIZE; x+=4)
286         {
287                 if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
288                 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289                 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290                 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291         }
292 #else
293         int x;
294         src+= stride*3;
295         for(x=0; x<BLOCK_SIZE; x++)
296         {
297                 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298         }
299 #endif
300         return 1;
301 #else
302         int x;
303         src+= stride*4;
304         for(x=0; x<BLOCK_SIZE; x++)
305         {
306                 int min=255;
307                 int max=0;
308                 int y;
309                 for(y=0; y<8; y++){
310                         int v= src[x + y*stride];
311                         if(v>max) max=v;
312                         if(v<min) min=v;
313                 }
314                 if(max-min > 2*QP) return 0;
315         }
316         return 1;
317 #endif
318 }
319
320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321         if( isHorizDC_C(src, stride, c) ){
322                 if( isHorizMinMaxOk_C(src, stride, c->QP) )
323                         return 1;
324                 else
325                         return 0;
326         }else{
327                 return 2;
328         }
329 }
330
331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332         if( isVertDC_C(src, stride, c) ){
333                 if( isVertMinMaxOk_C(src, stride, c->QP) )
334                         return 1;
335                 else
336                         return 0;
337         }else{
338                 return 2;
339         }
340 }
341
342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
343 {
344         int y;
345         for(y=0; y<BLOCK_SIZE; y++)
346         {
347                 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
348
349                 if(ABS(middleEnergy) < 8*c->QP)
350                 {
351                         const int q=(dst[3] - dst[4])/2;
352                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354
355                         int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356                         d= MAX(d, 0);
357
358                         d= (5*d + 32) >> 6;
359                         d*= SIGN(-middleEnergy);
360
361                         if(q>0)
362                         {
363                                 d= d<0 ? 0 : d;
364                                 d= d>q ? q : d;
365                         }
366                         else
367                         {
368                                 d= d>0 ? 0 : d;
369                                 d= d<q ? q : d;
370                         }
371
372                         dst[3]-= d;
373                         dst[4]+= d;
374                 }
375                 dst+= stride;
376         }
377 }
378
379 /**
380  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382  */
383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
384 {
385         int y;
386         for(y=0; y<BLOCK_SIZE; y++)
387         {
388                 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389                 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
390
391                 int sums[10];
392                 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393                 sums[1] = sums[0] - first  + dst[3];
394                 sums[2] = sums[1] - first  + dst[4];
395                 sums[3] = sums[2] - first  + dst[5];
396                 sums[4] = sums[3] - first  + dst[6];
397                 sums[5] = sums[4] - dst[0] + dst[7];
398                 sums[6] = sums[5] - dst[1] + last;
399                 sums[7] = sums[6] - dst[2] + last;
400                 sums[8] = sums[7] - dst[3] + last;
401                 sums[9] = sums[8] - dst[4] + last;
402
403                 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404                 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405                 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406                 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407                 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408                 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409                 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410                 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
411
412                 dst+= stride;
413         }
414 }
415
416 /**
417  * Experimental Filter 1 (Horizontal)
418  * will not damage linear gradients
419  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420  * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421  * MMX2 version does correct clipping C version doesnt
422  * not identical with the vertical one
423  */
424 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425 {
426         int y;
427         static uint64_t *lut= NULL;
428         if(lut==NULL)
429         {
430                 int i;
431                 lut= (uint64_t*)memalign(8, 256*8);
432                 for(i=0; i<256; i++)
433                 {
434                         int v= i < 128 ? 2*i : 2*(i-256);
435 /*
436 //Simulate 112242211 9-Tap filter
437                         uint64_t a= (v/16) & 0xFF;
438                         uint64_t b= (v/8) & 0xFF;
439                         uint64_t c= (v/4) & 0xFF;
440                         uint64_t d= (3*v/8) & 0xFF;
441 */
442 //Simulate piecewise linear interpolation
443                         uint64_t a= (v/16) & 0xFF;
444                         uint64_t b= (v*3/16) & 0xFF;
445                         uint64_t c= (v*5/16) & 0xFF;
446                         uint64_t d= (7*v/16) & 0xFF;
447                         uint64_t A= (0x100 - a)&0xFF;
448                         uint64_t B= (0x100 - b)&0xFF;
449                         uint64_t C= (0x100 - c)&0xFF;
450                         uint64_t D= (0x100 - c)&0xFF;
451
452                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453                                 (D<<24) | (C<<16) | (B<<8) | (A);
454                         //lut[i] = (v<<32) | (v<<24);
455                 }
456         }
457
458         for(y=0; y<BLOCK_SIZE; y++)
459         {
460                 int a= src[1] - src[2];
461                 int b= src[3] - src[4];
462                 int c= src[5] - src[6];
463
464                 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465
466                 if(d < QP)
467                 {
468                         int v = d * SIGN(-b);
469
470                         src[1] +=v/8;
471                         src[2] +=v/4;
472                         src[3] +=3*v/8;
473                         src[4] -=3*v/8;
474                         src[5] -=v/4;
475                         src[6] -=v/8;
476
477                 }
478                 src+=stride;
479         }
480 }
481
482 /**
483  * accurate deblock filter
484  */
485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
486         int y;
487         const int QP= c->QP;
488         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489         const int dcThreshold= dcOffset*2 + 1;
490 //START_TIMER
491         src+= step*4; // src points to begin of the 8x8 Block
492         for(y=0; y<8; y++){
493                 int numEq= 0;
494
495                 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496                 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497                 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498                 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499                 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500                 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501                 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502                 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503                 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504                 if(numEq > c->ppMode.flatnessThreshold){
505                         int min, max, x;
506
507                         if(src[0] > src[step]){
508                             max= src[0];
509                             min= src[step];
510                         }else{
511                             max= src[step];
512                             min= src[0];
513                         }
514                         for(x=2; x<8; x+=2){
515                                 if(src[x*step] > src[(x+1)*step]){
516                                         if(src[x    *step] > max) max= src[ x   *step];
517                                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
518                                 }else{
519                                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
520                                         if(src[ x   *step] < min) min= src[ x   *step];
521                                 }
522                         }
523                         if(max-min < 2*QP){
524                                 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525                                 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526
527                                 int sums[10];
528                                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529                                 sums[1] = sums[0] - first       + src[3*step];
530                                 sums[2] = sums[1] - first       + src[4*step];
531                                 sums[3] = sums[2] - first       + src[5*step];
532                                 sums[4] = sums[3] - first       + src[6*step];
533                                 sums[5] = sums[4] - src[0*step] + src[7*step];
534                                 sums[6] = sums[5] - src[1*step] + last;
535                                 sums[7] = sums[6] - src[2*step] + last;
536                                 sums[8] = sums[7] - src[3*step] + last;
537                                 sums[9] = sums[8] - src[4*step] + last;
538
539                                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540                                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541                                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542                                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543                                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544                                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545                                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546                                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547                         }
548                 }else{
549                         const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550
551                         if(ABS(middleEnergy) < 8*QP)
552                         {
553                                 const int q=(src[3*step] - src[4*step])/2;
554                                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555                                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556
557                                 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558                                 d= MAX(d, 0);
559
560                                 d= (5*d + 32) >> 6;
561                                 d*= SIGN(-middleEnergy);
562
563                                 if(q>0)
564                                 {
565                                         d= d<0 ? 0 : d;
566                                         d= d>q ? q : d;
567                                 }
568                                 else
569                                 {
570                                         d= d>0 ? 0 : d;
571                                         d= d<q ? q : d;
572                                 }
573
574                                 src[3*step]-= d;
575                                 src[4*step]+= d;
576                         }
577                 }
578
579                 src += stride;
580         }
581 /*if(step==16){
582     STOP_TIMER("step16")
583 }else{
584     STOP_TIMER("stepX")
585 }*/
586 }
587
588 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
589 //Plain C versions
590 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591 #define COMPILE_C
592 #endif
593
594 #ifdef ARCH_POWERPC
595 #ifdef HAVE_ALTIVEC
596 #define COMPILE_ALTIVEC
597 #endif //HAVE_ALTIVEC
598 #endif //ARCH_POWERPC
599
600 #if defined(ARCH_X86) || defined(ARCH_X86_64)
601
602 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
603 #define COMPILE_MMX
604 #endif
605
606 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
607 #define COMPILE_MMX2
608 #endif
609
610 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
611 #define COMPILE_3DNOW
612 #endif
613 #endif //ARCH_X86
614
615 #undef HAVE_MMX
616 #undef HAVE_MMX2
617 #undef HAVE_3DNOW
618 #undef HAVE_ALTIVEC
619
620 #ifdef COMPILE_C
621 #undef HAVE_MMX
622 #undef HAVE_MMX2
623 #undef HAVE_3DNOW
624 #define RENAME(a) a ## _C
625 #include "postprocess_template.c"
626 #endif
627
628 #ifdef ARCH_POWERPC
629 #ifdef COMPILE_ALTIVEC
630 #undef RENAME
631 #define HAVE_ALTIVEC
632 #define RENAME(a) a ## _altivec
633 #include "postprocess_altivec_template.c"
634 #include "postprocess_template.c"
635 #endif
636 #endif //ARCH_POWERPC
637
638 //MMX versions
639 #ifdef COMPILE_MMX
640 #undef RENAME
641 #define HAVE_MMX
642 #undef HAVE_MMX2
643 #undef HAVE_3DNOW
644 #define RENAME(a) a ## _MMX
645 #include "postprocess_template.c"
646 #endif
647
648 //MMX2 versions
649 #ifdef COMPILE_MMX2
650 #undef RENAME
651 #define HAVE_MMX
652 #define HAVE_MMX2
653 #undef HAVE_3DNOW
654 #define RENAME(a) a ## _MMX2
655 #include "postprocess_template.c"
656 #endif
657
658 //3DNOW versions
659 #ifdef COMPILE_3DNOW
660 #undef RENAME
661 #define HAVE_MMX
662 #undef HAVE_MMX2
663 #define HAVE_3DNOW
664 #define RENAME(a) a ## _3DNow
665 #include "postprocess_template.c"
666 #endif
667
668 // minor note: the HAVE_xyz is messed up after that line so dont use it
669
670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
671         QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
672 {
673         PPContext *c= (PPContext *)vc;
674         PPMode *ppMode= (PPMode *)vm;
675         c->ppMode= *ppMode; //FIXME
676
677         // useing ifs here as they are faster than function pointers allthough the
678         // difference wouldnt be messureable here but its much better because
679         // someone might exchange the cpu whithout restarting mplayer ;)
680 #ifdef RUNTIME_CPUDETECT
681 #if defined(ARCH_X86) || defined(ARCH_X86_64)
682         // ordered per speed fasterst first
683         if(c->cpuCaps & PP_CPU_CAPS_MMX2)
684                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685         else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
686                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687         else if(c->cpuCaps & PP_CPU_CAPS_MMX)
688                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689         else
690                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691 #else
692 #ifdef ARCH_POWERPC
693 #ifdef HAVE_ALTIVEC
694         if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
695                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696         else
697 #endif
698 #endif
699                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
700 #endif
701 #else //RUNTIME_CPUDETECT
702 #ifdef HAVE_MMX2
703                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
704 #elif defined (HAVE_3DNOW)
705                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
706 #elif defined (HAVE_MMX)
707                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
708 #elif defined (HAVE_ALTIVEC)
709                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
710 #else
711                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
712 #endif
713 #endif //!RUNTIME_CPUDETECT
714 }
715
716 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
717 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
718
719 /* -pp Command line Help
720 */
721 char *pp_help=
722 "Available postprocessing filters:\n"
723 "Filters                        Options\n"
724 "short  long name       short   long option     Description\n"
725 "*      *               a       autoq           CPU power dependent enabler\n"
726 "                       c       chrom           chrominance filtering enabled\n"
727 "                       y       nochrom         chrominance filtering disabled\n"
728 "                       n       noluma          luma filtering disabled\n"
729 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
730 "       1. difference factor: default=32, higher -> more deblocking\n"
731 "       2. flatness threshold: default=39, lower -> more deblocking\n"
732 "                       the h & v deblocking filters share these\n"
733 "                       so you can't set different thresholds for h / v\n"
734 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
735 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
736 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
737 "h1     x1hdeblock                              experimental h deblock filter 1\n"
738 "v1     x1vdeblock                              experimental v deblock filter 1\n"
739 "dr     dering                                  deringing filter\n"
740 "al     autolevels                              automatic brightness / contrast\n"
741 "                       f        fullyrange     stretch luminance to (0..255)\n"
742 "lb     linblenddeint                           linear blend deinterlacer\n"
743 "li     linipoldeint                            linear interpolating deinterlace\n"
744 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
745 "md     mediandeint                             median deinterlacer\n"
746 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
747 "l5     lowpass5                                FIR lowpass deinterlacer\n"
748 "de     default                                 hb:a,vb:a,dr:a\n"
749 "fa     fast                                    h1:a,v1:a,dr:a\n"
750 "ac                                             ha:a:128:7,va:a,dr:a\n"
751 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
752 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
753 "fq     forceQuant      <quantizer>             force quantizer\n"
754 "Usage:\n"
755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
756 "long form example:\n"
757 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
758 "short form example:\n"
759 "vb:a/hb:a/lb                                   de,-vb\n"
760 "more examples:\n"
761 "tn:64:128:256\n"
762 "\n"
763 ;
764
765 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
766 {
767         char temp[GET_MODE_BUFFER_SIZE];
768         char *p= temp;
769         const char *filterDelimiters= ",/";
770         const char *optionDelimiters= ":";
771         struct PPMode *ppMode;
772         char *filterToken;
773
774         ppMode= memalign(8, sizeof(PPMode));
775
776         ppMode->lumMode= 0;
777         ppMode->chromMode= 0;
778         ppMode->maxTmpNoise[0]= 700;
779         ppMode->maxTmpNoise[1]= 1500;
780         ppMode->maxTmpNoise[2]= 3000;
781         ppMode->maxAllowedY= 234;
782         ppMode->minAllowedY= 16;
783         ppMode->baseDcDiff= 256/8;
784         ppMode->flatnessThreshold= 56-16-1;
785         ppMode->maxClippedThreshold= 0.01;
786         ppMode->error=0;
787
788         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
789
790         if(verbose>1) printf("pp: %s\n", name);
791
792         for(;;){
793                 char *filterName;
794                 int q= 1000000; //PP_QUALITY_MAX;
795                 int chrom=-1;
796                 int luma=-1;
797                 char *option;
798                 char *options[OPTIONS_ARRAY_SIZE];
799                 int i;
800                 int filterNameOk=0;
801                 int numOfUnknownOptions=0;
802                 int enable=1; //does the user want us to enabled or disabled the filter
803
804                 filterToken= strtok(p, filterDelimiters);
805                 if(filterToken == NULL) break;
806                 p+= strlen(filterToken) + 1; // p points to next filterToken
807                 filterName= strtok(filterToken, optionDelimiters);
808                 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
809
810                 if(*filterName == '-')
811                 {
812                         enable=0;
813                         filterName++;
814                 }
815
816                 for(;;){ //for all options
817                         option= strtok(NULL, optionDelimiters);
818                         if(option == NULL) break;
819
820                         if(verbose>1) printf("pp: option: %s\n", option);
821                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
822                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
823                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
824                         else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
825                         else
826                         {
827                                 options[numOfUnknownOptions] = option;
828                                 numOfUnknownOptions++;
829                         }
830                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
831                 }
832                 options[numOfUnknownOptions] = NULL;
833
834                 /* replace stuff from the replace Table */
835                 for(i=0; replaceTable[2*i]!=NULL; i++)
836                 {
837                         if(!strcmp(replaceTable[2*i], filterName))
838                         {
839                                 int newlen= strlen(replaceTable[2*i + 1]);
840                                 int plen;
841                                 int spaceLeft;
842
843                                 if(p==NULL) p= temp, *p=0;      //last filter
844                                 else p--, *p=',';               //not last filter
845
846                                 plen= strlen(p);
847                                 spaceLeft= p - temp + plen;
848                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
849                                 {
850                                         ppMode->error++;
851                                         break;
852                                 }
853                                 memmove(p + newlen, p, plen+1);
854                                 memcpy(p, replaceTable[2*i + 1], newlen);
855                                 filterNameOk=1;
856                         }
857                 }
858
859                 for(i=0; filters[i].shortName!=NULL; i++)
860                 {
861 //                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
862                         if(   !strcmp(filters[i].longName, filterName)
863                            || !strcmp(filters[i].shortName, filterName))
864                         {
865                                 ppMode->lumMode &= ~filters[i].mask;
866                                 ppMode->chromMode &= ~filters[i].mask;
867
868                                 filterNameOk=1;
869                                 if(!enable) break; // user wants to disable it
870
871                                 if(q >= filters[i].minLumQuality && luma)
872                                         ppMode->lumMode|= filters[i].mask;
873                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
874                                         if(q >= filters[i].minChromQuality)
875                                                 ppMode->chromMode|= filters[i].mask;
876
877                                 if(filters[i].mask == LEVEL_FIX)
878                                 {
879                                         int o;
880                                         ppMode->minAllowedY= 16;
881                                         ppMode->maxAllowedY= 234;
882                                         for(o=0; options[o]!=NULL; o++)
883                                         {
884                                                 if(  !strcmp(options[o],"fullyrange")
885                                                    ||!strcmp(options[o],"f"))
886                                                 {
887                                                         ppMode->minAllowedY= 0;
888                                                         ppMode->maxAllowedY= 255;
889                                                         numOfUnknownOptions--;
890                                                 }
891                                         }
892                                 }
893                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
894                                 {
895                                         int o;
896                                         int numOfNoises=0;
897
898                                         for(o=0; options[o]!=NULL; o++)
899                                         {
900                                                 char *tail;
901                                                 ppMode->maxTmpNoise[numOfNoises]=
902                                                         strtol(options[o], &tail, 0);
903                                                 if(tail!=options[o])
904                                                 {
905                                                         numOfNoises++;
906                                                         numOfUnknownOptions--;
907                                                         if(numOfNoises >= 3) break;
908                                                 }
909                                         }
910                                 }
911                                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
912                                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
913                                 {
914                                         int o;
915
916                                         for(o=0; options[o]!=NULL && o<2; o++)
917                                         {
918                                                 char *tail;
919                                                 int val= strtol(options[o], &tail, 0);
920                                                 if(tail==options[o]) break;
921
922                                                 numOfUnknownOptions--;
923                                                 if(o==0) ppMode->baseDcDiff= val;
924                                                 else ppMode->flatnessThreshold= val;
925                                         }
926                                 }
927                                 else if(filters[i].mask == FORCE_QUANT)
928                                 {
929                                         int o;
930                                         ppMode->forcedQuant= 15;
931
932                                         for(o=0; options[o]!=NULL && o<1; o++)
933                                         {
934                                                 char *tail;
935                                                 int val= strtol(options[o], &tail, 0);
936                                                 if(tail==options[o]) break;
937
938                                                 numOfUnknownOptions--;
939                                                 ppMode->forcedQuant= val;
940                                         }
941                                 }
942                         }
943                 }
944                 if(!filterNameOk) ppMode->error++;
945                 ppMode->error += numOfUnknownOptions;
946         }
947
948         if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
949         if(ppMode->error)
950         {
951                 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
952                 free(ppMode);
953                 return NULL;
954         }
955         return ppMode;
956 }
957
958 void pp_free_mode(pp_mode_t *mode){
959     if(mode) free(mode);
960 }
961
962 static void reallocAlign(void **p, int alignment, int size){
963         if(*p) free(*p);
964         *p= memalign(alignment, size);
965         memset(*p, 0, size);
966 }
967
968 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
969         int mbWidth = (width+15)>>4;
970         int mbHeight= (height+15)>>4;
971         int i;
972
973         c->stride= stride;
974         c->qpStride= qpStride;
975
976         reallocAlign((void **)&c->tempDst, 8, stride*24);
977         reallocAlign((void **)&c->tempSrc, 8, stride*24);
978         reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
979         reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
980         for(i=0; i<256; i++)
981                 c->yHistogram[i]= width*height/64*15/256;
982
983         for(i=0; i<3; i++)
984         {
985                 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
986                 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
987                 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
988         }
989
990         reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
991         reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
992         reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
993         reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
994 }
995
996 static void global_init(void){
997         int i;
998         memset(clip_table, 0, 256);
999         for(i=256; i<512; i++)
1000                 clip_table[i]= i;
1001         memset(clip_table+512, 0, 256);
1002 }
1003
1004 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1005         PPContext *c= memalign(32, sizeof(PPContext));
1006         int stride= (width+15)&(~15);    //assumed / will realloc if needed
1007         int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
1008
1009         global_init();
1010
1011         memset(c, 0, sizeof(PPContext));
1012         c->cpuCaps= cpuCaps;
1013         if(cpuCaps&PP_FORMAT){
1014                 c->hChromaSubSample= cpuCaps&0x3;
1015                 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1016         }else{
1017                 c->hChromaSubSample= 1;
1018                 c->vChromaSubSample= 1;
1019         }
1020
1021         reallocBuffers(c, width, height, stride, qpStride);
1022
1023         c->frameNum=-1;
1024
1025         return c;
1026 }
1027
1028 void pp_free_context(void *vc){
1029         PPContext *c = (PPContext*)vc;
1030         int i;
1031
1032         for(i=0; i<3; i++) free(c->tempBlured[i]);
1033         for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1034
1035         free(c->tempBlocks);
1036         free(c->yHistogram);
1037         free(c->tempDst);
1038         free(c->tempSrc);
1039         free(c->deintTemp);
1040         free(c->stdQPTable);
1041         free(c->nonBQPTable);
1042         free(c->forcedQPTable);
1043
1044         memset(c, 0, sizeof(PPContext));
1045
1046         free(c);
1047 }
1048
1049 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1050                  uint8_t * dst[3], int dstStride[3],
1051                  int width, int height,
1052                  QP_STORE_T *QP_store,  int QPStride,
1053                  pp_mode_t *vm,  void *vc, int pict_type)
1054 {
1055         int mbWidth = (width+15)>>4;
1056         int mbHeight= (height+15)>>4;
1057         PPMode *mode = (PPMode*)vm;
1058         PPContext *c = (PPContext*)vc;
1059         int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1060         int absQPStride = ABS(QPStride);
1061
1062         // c->stride and c->QPStride are always positive
1063         if(c->stride < minStride || c->qpStride < absQPStride)
1064                 reallocBuffers(c, width, height,
1065                                 MAX(minStride, c->stride),
1066                                 MAX(c->qpStride, absQPStride));
1067
1068         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1069         {
1070                 int i;
1071                 QP_store= c->forcedQPTable;
1072                 absQPStride = QPStride = 0;
1073                 if(mode->lumMode & FORCE_QUANT)
1074                         for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1075                 else
1076                         for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1077         }
1078 //printf("pict_type:%d\n", pict_type);
1079
1080         if(pict_type & PP_PICT_TYPE_QP2){
1081                 int i;
1082                 const int count= mbHeight * absQPStride;
1083                 for(i=0; i<(count>>2); i++){
1084                         ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1085                 }
1086                 for(i<<=2; i<count; i++){
1087                         c->stdQPTable[i] = QP_store[i]>>1;
1088                 }
1089                 QP_store= c->stdQPTable;
1090                 QPStride= absQPStride;
1091         }
1092
1093 if(0){
1094 int x,y;
1095 for(y=0; y<mbHeight; y++){
1096         for(x=0; x<mbWidth; x++){
1097                 printf("%2d ", QP_store[x + y*QPStride]);
1098         }
1099         printf("\n");
1100 }
1101         printf("\n");
1102 }
1103
1104         if((pict_type&7)!=3)
1105         {
1106                 if (QPStride >= 0) {
1107                         int i;
1108                         const int count= mbHeight * QPStride;
1109                         for(i=0; i<(count>>2); i++){
1110                                 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1111                         }
1112                         for(i<<=2; i<count; i++){
1113                                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1114                         }
1115                 } else {
1116                         int i,j;
1117                         for(i=0; i<mbHeight; i++) {
1118                                     for(j=0; j<absQPStride; j++) {
1119                                         c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1120                                 }
1121                         }
1122                 }
1123         }
1124
1125         if(verbose>2)
1126         {
1127                 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1128         }
1129
1130         postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1131                 width, height, QP_store, QPStride, 0, mode, c);
1132
1133         width  = (width )>>c->hChromaSubSample;
1134         height = (height)>>c->vChromaSubSample;
1135
1136         if(mode->chromMode)
1137         {
1138                 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1139                         width, height, QP_store, QPStride, 1, mode, c);
1140                 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1141                         width, height, QP_store, QPStride, 2, mode, c);
1142         }
1143         else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1144         {
1145                 linecpy(dst[1], src[1], height, srcStride[1]);
1146                 linecpy(dst[2], src[2], height, srcStride[2]);
1147         }
1148         else
1149         {
1150                 int y;
1151                 for(y=0; y<height; y++)
1152                 {
1153                         memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1154                         memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1155                 }
1156         }
1157 }
1158