]> git.sesse.net Git - ffmpeg/blob - libavcodec/imgconvert.c
Creative YUV (CYUV) decoder by (Mike Melanson <melanson at pcisys dot net>)
[ffmpeg] / libavcodec / imgconvert.c
1 /*
2  * Misc image convertion routines
3  * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 #include "avcodec.h"
20 #include "dsputil.h"
21
22 #ifdef USE_FASTMEMCPY
23 #include "fastmemcpy.h"
24 #endif
25
26 #ifdef HAVE_MMX
27 #include "i386/mmx.h"
28 #endif
29
30 typedef struct PixFmtInfo {
31     const char *name;
32     UINT8 nb_components;     /* number of components in AVPicture array  */
33     UINT8 is_yuv : 1;    /* true if YUV instead of RGB color space */
34     UINT8 is_packed : 1; /* true if multiple components in same word */
35     UINT8 is_paletted : 1; /* true if paletted */
36     UINT8 is_alpha : 1;    /* true if alpha can be specified */
37     UINT8 is_gray : 1;     /* true if gray or monochrome format */
38     UINT8 x_chroma_shift; /* X chroma subsampling factor is 2 ^ shift */
39     UINT8 y_chroma_shift; /* Y chroma subsampling factor is 2 ^ shift */
40 } PixFmtInfo;
41
42 /* this table gives more information about formats */
43 static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
44     /* YUV formats */
45     [PIX_FMT_YUV420P] = {
46         .name = "yuv420p",
47         .nb_components = 3, .is_yuv = 1,
48         .x_chroma_shift = 1, .y_chroma_shift = 1, 
49     },
50     [PIX_FMT_YUV422P] = {
51         .name = "yuv422p",
52         .nb_components = 3, .is_yuv = 1,
53         .x_chroma_shift = 1, .y_chroma_shift = 0, 
54     },
55     [PIX_FMT_YUV444P] = {
56         .name = "yuv444p",
57         .nb_components = 3, .is_yuv = 1,
58         .x_chroma_shift = 0, .y_chroma_shift = 0, 
59     },
60     [PIX_FMT_YUV422] = {
61         .name = "yuv422",
62         .nb_components = 1, .is_yuv = 1, .is_packed = 1,
63         .x_chroma_shift = 1, .y_chroma_shift = 0,
64     },
65     [PIX_FMT_YUV410P] = {
66         .name = "yuv410p",
67         .nb_components = 3, .is_yuv = 1,
68         .x_chroma_shift = 2, .y_chroma_shift = 2,
69     },
70     [PIX_FMT_YUV411P] = {
71         .name = "yuv411p",
72         .nb_components = 3, .is_yuv = 1,
73         .x_chroma_shift = 2, .y_chroma_shift = 0,
74     },
75
76     /* RGB formats */
77     [PIX_FMT_RGB24] = {
78         .name = "rgb24",
79         .nb_components = 1, .is_packed = 1,
80     },
81     [PIX_FMT_BGR24] = {
82         .name = "bgr24",
83         .nb_components = 1, .is_packed = 1,
84     },
85     [PIX_FMT_RGBA32] = {
86         .name = "rgba32",
87         .nb_components = 1, .is_packed = 1, .is_alpha = 1,
88     },
89     [PIX_FMT_RGB565] = {
90         .name = "rgb565",
91         .nb_components = 1, .is_packed = 1,
92     },
93     [PIX_FMT_RGB555] = {
94         .name = "rgb555",
95         .nb_components = 1, .is_packed = 1, .is_alpha = 1,
96     },
97
98     /* gray / mono formats */
99     [PIX_FMT_GRAY8] = {
100         .name = "gray",
101         .nb_components = 1, .is_gray = 1,
102     },
103     [PIX_FMT_MONOWHITE] = {
104         .name = "monow",
105         .nb_components = 1, .is_packed = 1, .is_gray = 1,
106     },
107     [PIX_FMT_MONOBLACK] = {
108         .name = "monob",
109         .nb_components = 1, .is_packed = 1, .is_gray = 1,
110     },
111 };
112
113 void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift)
114 {
115     if (pix_fmt_info[pix_fmt].is_yuv) {
116         *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift;
117         *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift;
118     } else {
119         *h_shift=0;
120         *v_shift=0;
121     }
122 }
123
124 const char *avcodec_get_pix_fmt_name(int pix_fmt)
125 {
126     if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB)
127         return "???";
128     else
129         return pix_fmt_info[pix_fmt].name;
130 }
131
132 /* Picture field are filled with 'ptr' addresses. Also return size */
133 int avpicture_fill(AVPicture *picture, UINT8 *ptr,
134                    int pix_fmt, int width, int height)
135 {
136     int size, w2, h2, size2;
137     PixFmtInfo *pinfo;
138     
139     pinfo = &pix_fmt_info[pix_fmt];
140     size = width * height;
141     switch(pix_fmt) {
142     case PIX_FMT_YUV420P:
143     case PIX_FMT_YUV422P:
144     case PIX_FMT_YUV444P:
145     case PIX_FMT_YUV410P:
146     case PIX_FMT_YUV411P:
147         w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
148         h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
149         size2 = w2 * h2;
150         picture->data[0] = ptr;
151         picture->data[1] = picture->data[0] + size;
152         picture->data[2] = picture->data[1] + size2;
153         picture->linesize[0] = width;
154         picture->linesize[1] = w2;
155         picture->linesize[2] = w2;
156         return size + 2 * size2;
157     case PIX_FMT_RGB24:
158     case PIX_FMT_BGR24:
159         picture->data[0] = ptr;
160         picture->data[1] = NULL;
161         picture->data[2] = NULL;
162         picture->linesize[0] = width * 3;
163         return size * 3;
164     case PIX_FMT_RGBA32:
165         picture->data[0] = ptr;
166         picture->data[1] = NULL;
167         picture->data[2] = NULL;
168         picture->linesize[0] = width * 4;
169         return size * 4;
170     case PIX_FMT_RGB555:
171     case PIX_FMT_RGB565:
172     case PIX_FMT_YUV422:
173         picture->data[0] = ptr;
174         picture->data[1] = NULL;
175         picture->data[2] = NULL;
176         picture->linesize[0] = width * 2;
177         return size * 2;
178     case PIX_FMT_GRAY8:
179         picture->data[0] = ptr;
180         picture->data[1] = NULL;
181         picture->data[2] = NULL;
182         picture->linesize[0] = width;
183         return size;
184     case PIX_FMT_MONOWHITE:
185     case PIX_FMT_MONOBLACK:
186         picture->data[0] = ptr;
187         picture->data[1] = NULL;
188         picture->data[2] = NULL;
189         picture->linesize[0] = (width + 7) >> 3;
190         return picture->linesize[0] * height;
191     default:
192         picture->data[0] = NULL;
193         picture->data[1] = NULL;
194         picture->data[2] = NULL;
195         return -1;
196     }
197 }
198
199 int avpicture_get_size(int pix_fmt, int width, int height)
200 {
201     AVPicture dummy_pict;
202     return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height);
203 }
204
205
206 /* XXX: totally non optimized */
207
208 static void yuv422_to_yuv420p(AVPicture *dst, AVPicture *src,
209                               int width, int height)
210 {
211     UINT8 *lum, *cb, *cr;
212     int x, y;
213     const UINT8 *p;
214  
215     lum = dst->data[0];
216     cb = dst->data[1];
217     cr = dst->data[2];
218     p = src->data[0];
219    
220     for(y=0;y<height;y+=2) {
221         for(x=0;x<width;x+=2) {
222             lum[0] = p[0];
223             cb[0] = p[1];
224             lum[1] = p[2];
225             cr[0] = p[3];
226             p += 4;
227             lum += 2;
228             cb++;
229             cr++;
230         }
231         for(x=0;x<width;x+=2) {
232             lum[0] = p[0];
233             lum[1] = p[2];
234             p += 4;
235             lum += 2;
236         }
237     }
238 }
239
240 #define SCALEBITS 8
241 #define ONE_HALF  (1 << (SCALEBITS - 1))
242 #define FIX(x)          ((int) ((x) * (1L<<SCALEBITS) + 0.5))
243
244 /* XXX: use generic filter ? */
245 /* 1x2 -> 1x1 */
246 static void shrink2(UINT8 *dst, int dst_wrap, 
247                     UINT8 *src, int src_wrap,
248                     int width, int height)
249 {
250     int w;
251     UINT8 *s1, *s2, *d;
252
253     for(;height > 0; height--) {
254         s1 = src;
255         s2 = s1 + src_wrap;
256         d = dst;
257         for(w = width;w >= 4; w-=4) {
258             d[0] = (s1[0] + s2[0]) >> 1;
259             d[1] = (s1[1] + s2[1]) >> 1;
260             d[2] = (s1[2] + s2[2]) >> 1;
261             d[3] = (s1[3] + s2[3]) >> 1;
262             s1 += 4;
263             s2 += 4;
264             d += 4;
265         }
266         for(;w > 0; w--) {
267             d[0] = (s1[0] + s2[0]) >> 1;
268             s1++;
269             s2++;
270             d++;
271         }
272         src += 2 * src_wrap;
273         dst += dst_wrap;
274     }
275 }
276
277 /* 2x2 -> 1x1 */
278 static void shrink22(UINT8 *dst, int dst_wrap, 
279                      UINT8 *src, int src_wrap,
280                      int width, int height)
281 {
282     int w;
283     UINT8 *s1, *s2, *d;
284
285     for(;height > 0; height--) {
286         s1 = src;
287         s2 = s1 + src_wrap;
288         d = dst;
289         for(w = width;w >= 4; w-=4) {
290             d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
291             d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 1;
292             d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 1;
293             d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 1;
294             s1 += 8;
295             s2 += 8;
296             d += 4;
297         }
298         for(;w > 0; w--) {
299             d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
300             s1 += 2;
301             s2 += 2;
302             d++;
303         }
304         src += 2 * src_wrap;
305         dst += dst_wrap;
306     }
307 }
308
309 /* 1x1 -> 2x2 */
310 static void grow22(UINT8 *dst, int dst_wrap,
311                      UINT8 *src, int src_wrap,
312                      int width, int height)
313 {
314     int w;
315     UINT8 *s1, *d;
316
317     for(;height > 0; height--) {
318         s1 = src;
319         d = dst;
320         for(w = width;w >= 4; w-=4) {
321             d[1] = d[0] = s1[0];
322             d[3] = d[2] = s1[1];
323             s1 += 2;
324             d += 4;
325         }
326         for(;w > 0; w--) {
327             d[0] = s1[0];
328             s1 ++;
329             d++;
330         }
331         if (height%2)
332             src += src_wrap;
333         dst += dst_wrap;
334     }
335 }
336
337 /* 1x2 -> 2x1 */
338 static void conv411(UINT8 *dst, int dst_wrap, 
339                     UINT8 *src, int src_wrap,
340                     int width, int height)
341 {
342     int w, c;
343     UINT8 *s1, *s2, *d;
344
345     for(;height > 0; height--) {
346         s1 = src;
347         s2 = src + src_wrap;
348         d = dst;
349         for(w = width;w > 0; w--) {
350             c = (s1[0] + s2[0]) >> 1;
351             d[0] = c;
352             d[1] = c;
353             s1++;
354             s2++;
355             d += 2;
356         }
357         src += src_wrap * 2;
358         dst += dst_wrap;
359     }
360 }
361
362 static void img_copy(UINT8 *dst, int dst_wrap, 
363                      UINT8 *src, int src_wrap,
364                      int width, int height)
365 {
366     for(;height > 0; height--) {
367         memcpy(dst, src, width);
368         dst += dst_wrap;
369         src += src_wrap;
370     }
371 }
372
373 #define SCALE_BITS 10
374
375 #define C_Y  (76309 >> (16 - SCALE_BITS))
376 #define C_RV (117504 >> (16 - SCALE_BITS))
377 #define C_BU (138453 >> (16 - SCALE_BITS))
378 #define C_GU (13954 >> (16 - SCALE_BITS))
379 #define C_GV (34903 >> (16 - SCALE_BITS))
380
381 #define YUV_TO_RGB2(r, g, b, y1)\
382 {\
383     y = (y1 - 16) * C_Y;\
384     r = cm[(y + r_add) >> SCALE_BITS];\
385     g = cm[(y + g_add) >> SCALE_BITS];\
386     b = cm[(y + b_add) >> SCALE_BITS];\
387 }
388
389 /* XXX: no chroma interpolating is done */
390 #define RGB_FUNCTIONS(rgb_name)                                         \
391                                                                         \
392 static void yuv420p_to_ ## rgb_name (AVPicture *dst, AVPicture *src,    \
393                                      int width, int height)             \
394 {                                                                       \
395     UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;             \
396     int w, y, cb, cr, r_add, g_add, b_add, width2;                      \
397     UINT8 *cm = cropTbl + MAX_NEG_CROP;                                 \
398     unsigned int r, g, b;                                               \
399                                                                         \
400     d = dst->data[0];                                                   \
401     y1_ptr = src->data[0];                                              \
402     cb_ptr = src->data[1];                                              \
403     cr_ptr = src->data[2];                                              \
404     width2 = (width + 1) >> 1;                                          \
405     for(;height >= 2; height -= 2) {                                    \
406         d1 = d;                                                         \
407         d2 = d + dst->linesize[0];                                      \
408         y2_ptr = y1_ptr + src->linesize[0];                             \
409         for(w = width; w >= 2; w -= 2) {                                \
410             cb = cb_ptr[0] - 128;                                       \
411             cr = cr_ptr[0] - 128;                                       \
412             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));                \
413             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));  \
414             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));                \
415                                                                         \
416             /* output 4 pixels */                                       \
417             YUV_TO_RGB2(r, g, b, y1_ptr[0]);                            \
418             RGB_OUT(d1, r, g, b);                                       \
419                                                                         \
420             YUV_TO_RGB2(r, g, b, y1_ptr[1]);                            \
421             RGB_OUT(d1 + BPP, r, g, b);                                 \
422                                                                         \
423             YUV_TO_RGB2(r, g, b, y2_ptr[0]);                            \
424             RGB_OUT(d2, r, g, b);                                       \
425                                                                         \
426             YUV_TO_RGB2(r, g, b, y2_ptr[1]);                            \
427             RGB_OUT(d2 + BPP, r, g, b);                                 \
428                                                                         \
429             d1 += 2 * BPP;                                              \
430             d2 += 2 * BPP;                                              \
431                                                                         \
432             y1_ptr += 2;                                                \
433             y2_ptr += 2;                                                \
434             cb_ptr++;                                                   \
435             cr_ptr++;                                                   \
436         }                                                               \
437         /* handle odd width */                                          \
438         if (w) {                                                        \
439             cb = cb_ptr[0] - 128;                                       \
440             cr = cr_ptr[0] - 128;                                       \
441             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));                \
442             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));  \
443             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));                \
444                                                                         \
445             YUV_TO_RGB2(r, g, b, y1_ptr[0]);                            \
446             RGB_OUT(d1, r, g, b);                                       \
447                                                                         \
448             YUV_TO_RGB2(r, g, b, y2_ptr[0]);                            \
449             RGB_OUT(d2, r, g, b);                                       \
450             d1 += BPP;                                                  \
451             d2 += BPP;                                                  \
452             y1_ptr++;                                                   \
453             y2_ptr++;                                                   \
454             cb_ptr++;                                                   \
455             cr_ptr++;                                                   \
456         }                                                               \
457         d += 2 * dst->linesize[0];                                      \
458         y1_ptr += 2 * src->linesize[0] - width;                         \
459         cb_ptr += src->linesize[1] - width2;                            \
460         cr_ptr += src->linesize[2] - width2;                            \
461     }                                                                   \
462     /* handle odd height */                                             \
463     if (height) {                                                       \
464         d1 = d;                                                         \
465         for(w = width; w >= 2; w -= 2) {                                \
466             cb = cb_ptr[0] - 128;                                       \
467             cr = cr_ptr[0] - 128;                                       \
468             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));                \
469             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));  \
470             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));                \
471                                                                         \
472             /* output 2 pixels */                                       \
473             YUV_TO_RGB2(r, g, b, y1_ptr[0]);                            \
474             RGB_OUT(d1, r, g, b);                                       \
475                                                                         \
476             YUV_TO_RGB2(r, g, b, y1_ptr[1]);                            \
477             RGB_OUT(d1 + BPP, r, g, b);                                 \
478                                                                         \
479             d1 += 2 * BPP;                                              \
480                                                                         \
481             y1_ptr += 2;                                                \
482             cb_ptr++;                                                   \
483             cr_ptr++;                                                   \
484         }                                                               \
485         /* handle width */                                              \
486         if (w) {                                                        \
487             cb = cb_ptr[0] - 128;                                       \
488             cr = cr_ptr[0] - 128;                                       \
489             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));                \
490             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));  \
491             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));                \
492                                                                         \
493             /* output 2 pixels */                                       \
494             YUV_TO_RGB2(r, g, b, y1_ptr[0]);                            \
495             RGB_OUT(d1, r, g, b);                                       \
496             d1 += BPP;                                                  \
497                                                                         \
498             y1_ptr++;                                                   \
499             cb_ptr++;                                                   \
500             cr_ptr++;                                                   \
501         }                                                               \
502     }                                                                   \
503 }                                                                       \
504                                                                         \
505 /* XXX: no chroma interpolating is done */                              \
506 static void yuv422p_to_ ## rgb_name (AVPicture *dst, AVPicture *src,    \
507                                     int width, int height)              \
508 {                                                                       \
509     UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;                           \
510     int w, y, cb, cr, r_add, g_add, b_add, width2;                      \
511     UINT8 *cm = cropTbl + MAX_NEG_CROP;                                 \
512     unsigned int r, g, b;                                               \
513                                                                         \
514     d = dst->data[0];                                                   \
515     y1_ptr = src->data[0];                                              \
516     cb_ptr = src->data[1];                                              \
517     cr_ptr = src->data[2];                                              \
518     width2 = (width + 1) >> 1;                                          \
519     for(;height > 0; height --) {                                       \
520         d1 = d;                                                         \
521         for(w = width; w >= 2; w -= 2) {                                \
522             cb = cb_ptr[0] - 128;                                       \
523             cr = cr_ptr[0] - 128;                                       \
524             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));                \
525             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));  \
526             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));                \
527                                                                         \
528             /* output 2 pixels */                                       \
529             YUV_TO_RGB2(r, g, b, y1_ptr[0]);                            \
530             RGB_OUT(d1, r, g, b);                                       \
531                                                                         \
532             YUV_TO_RGB2(r, g, b, y1_ptr[1]);                            \
533             RGB_OUT(d1 + BPP, r, g, b);                                 \
534                                                                         \
535             d1 += 2 * BPP;                                              \
536                                                                         \
537             y1_ptr += 2;                                                \
538             cb_ptr++;                                                   \
539             cr_ptr++;                                                   \
540         }                                                               \
541         /* handle width */                                              \
542         if (w) {                                                        \
543             cb = cb_ptr[0] - 128;                                       \
544             cr = cr_ptr[0] - 128;                                       \
545             r_add = C_RV * cr + (1 << (SCALE_BITS - 1));                \
546             g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));  \
547             b_add = C_BU * cb + (1 << (SCALE_BITS - 1));                \
548                                                                         \
549             /* output 2 pixels */                                       \
550             YUV_TO_RGB2(r, g, b, y1_ptr[0]);                            \
551             RGB_OUT(d1, r, g, b);                                       \
552             d1 += BPP;                                                  \
553                                                                         \
554             y1_ptr++;                                                   \
555             cb_ptr++;                                                   \
556             cr_ptr++;                                                   \
557         }                                                               \
558         d += dst->linesize[0];                                          \
559         y1_ptr += src->linesize[0] - width;                             \
560         cb_ptr += src->linesize[1] - width2;                            \
561         cr_ptr += src->linesize[2] - width2;                            \
562     }                                                                   \
563 }                                                                       \
564                                                                         \
565 static void rgb_name ## _to_yuv420p(AVPicture *dst, AVPicture *src,     \
566                                     int width, int height)              \
567 {                                                                       \
568     int wrap, wrap3, x, y;                                              \
569     int r, g, b, r1, g1, b1;                                            \
570     UINT8 *lum, *cb, *cr;                                               \
571     const UINT8 *p;                                                     \
572                                                                         \
573     lum = dst->data[0];                                                 \
574     cb = dst->data[1];                                                  \
575     cr = dst->data[2];                                                  \
576                                                                         \
577     wrap = dst->linesize[0];                                            \
578     wrap3 = src->linesize[0];                                           \
579     p = src->data[0];                                                   \
580     for(y=0;y<height;y+=2) {                                            \
581         for(x=0;x<width;x+=2) {                                         \
582             RGB_IN(r, g, b, p);                                         \
583             r1 = r;                                                     \
584             g1 = g;                                                     \
585             b1 = b;                                                     \
586             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +             \
587                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;        \
588             RGB_IN(r, g, b, p + BPP);                                   \
589             r1 += r;                                                    \
590             g1 += g;                                                    \
591             b1 += b;                                                    \
592             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +             \
593                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;        \
594             p += wrap3;                                                 \
595             lum += wrap;                                                \
596                                                                         \
597             RGB_IN(r, g, b, p);                                         \
598             r1 += r;                                                    \
599             g1 += g;                                                    \
600             b1 += b;                                                    \
601             lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +             \
602                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;        \
603                                                                         \
604             RGB_IN(r, g, b, p + BPP);                                   \
605             r1 += r;                                                    \
606             g1 += g;                                                    \
607             b1 += b;                                                    \
608             lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +             \
609                       FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;        \
610                                                                         \
611             cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +         \
612                       FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >>          \
613                      (SCALEBITS + 2)) + 128;                            \
614             cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -           \
615                      FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >>           \
616                      (SCALEBITS + 2)) + 128;                            \
617                                                                         \
618             cb++;                                                       \
619             cr++;                                                       \
620             p += -wrap3 + 2 * BPP;                                      \
621             lum += -wrap + 2;                                           \
622         }                                                               \
623         p += wrap3 + (wrap3 - width * BPP);                             \
624         lum += wrap + (wrap - width);                                   \
625         cb += dst->linesize[1] - width / 2;                             \
626         cr += dst->linesize[2] - width / 2;                             \
627     }                                                                   \
628 }                                                                       \
629                                                                         \
630 static void rgb_name ## _to_gray(AVPicture *dst, AVPicture *src,        \
631                                  int width, int height)                 \
632 {                                                                       \
633     const unsigned char *p;                                             \
634     unsigned char *q;                                                   \
635     int r, g, b, dst_wrap, src_wrap;                                    \
636     int x, y;                                                           \
637                                                                         \
638     p = src->data[0];                                                   \
639     src_wrap = src->linesize[0] - BPP * width;                          \
640                                                                         \
641     q = dst->data[0];                                                   \
642     dst_wrap = dst->linesize[0] - width;                                \
643                                                                         \
644     for(y=0;y<height;y++) {                                             \
645         for(x=0;x<width;x++) {                                          \
646             RGB_IN(r, g, b, p);                                         \
647             q[0] = (FIX(0.29900) * r + FIX(0.58700) * g +               \
648                     FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;          \
649             q++;                                                        \
650             p += BPP;                                                   \
651         }                                                               \
652         p += src_wrap;                                                  \
653         q += dst_wrap;                                                  \
654     }                                                                   \
655 }                                                                       \
656                                                                         \
657 static void gray_to_ ## rgb_name(AVPicture *dst, AVPicture *src,        \
658                                  int width, int height)                 \
659 {                                                                       \
660     const unsigned char *p;                                             \
661     unsigned char *q;                                                   \
662     int r, dst_wrap, src_wrap;                                          \
663     int x, y;                                                           \
664                                                                         \
665     p = src->data[0];                                                   \
666     src_wrap = src->linesize[0] - width;                                \
667                                                                         \
668     q = dst->data[0];                                                   \
669     dst_wrap = dst->linesize[0] - BPP * width;                          \
670                                                                         \
671     for(y=0;y<height;y++) {                                             \
672         for(x=0;x<width;x++) {                                          \
673             r = p[0];                                                   \
674             RGB_OUT(q, r, r, r);                                        \
675             q += BPP;                                                   \
676             p ++;                                                       \
677         }                                                               \
678         p += src_wrap;                                                  \
679         q += dst_wrap;                                                  \
680     }                                                                   \
681 }
682
683 /* copy bit n to bits 0 ... n - 1 */
684 static inline unsigned int bitcopy_n(unsigned int a, int n)
685 {
686     int mask;
687     mask = (1 << n) - 1;
688     return (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask);
689 }
690
691 /* rgb555 handling */
692
693 #define RGB_IN(r, g, b, s)\
694 {\
695     unsigned int v = ((UINT16 *)(s))[0];\
696     r = bitcopy_n(v >> (10 - 3), 3);\
697     g = bitcopy_n(v >> (5 - 3), 3);\
698     b = bitcopy_n(v << 3, 3);\
699 }
700
701 #define RGB_OUT(d, r, g, b)\
702 {\
703     ((UINT16 *)(d))[0] = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | 0x8000;\
704 }
705
706 #define BPP 2
707
708 RGB_FUNCTIONS(rgb555)
709
710 #undef RGB_IN
711 #undef RGB_OUT
712 #undef BPP
713
714 /* rgb565 handling */
715
716 #define RGB_IN(r, g, b, s)\
717 {\
718     unsigned int v = ((UINT16 *)(s))[0];\
719     r = bitcopy_n(v >> (11 - 3), 3);\
720     g = bitcopy_n(v >> (5 - 2), 2);\
721     b = bitcopy_n(v << 3, 3);\
722 }
723
724 #define RGB_OUT(d, r, g, b)\
725 {\
726     ((UINT16 *)(d))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);\
727 }
728
729 #define BPP 2
730
731 RGB_FUNCTIONS(rgb565)
732
733 #undef RGB_IN
734 #undef RGB_OUT
735 #undef BPP
736
737 /* bgr24 handling */
738
739 #define RGB_IN(r, g, b, s)\
740 {\
741     b = (s)[0];\
742     g = (s)[1];\
743     r = (s)[2];\
744 }
745
746 #define RGB_OUT(d, r, g, b)\
747 {\
748     (d)[0] = b;\
749     (d)[1] = g;\
750     (d)[2] = r;\
751 }
752
753 #define BPP 3
754
755 RGB_FUNCTIONS(bgr24)
756
757 #undef RGB_IN
758 #undef RGB_OUT
759 #undef BPP
760
761 /* rgb24 handling */
762
763 #define RGB_IN(r, g, b, s)\
764 {\
765     r = (s)[0];\
766     g = (s)[1];\
767     b = (s)[2];\
768 }
769
770 #define RGB_OUT(d, r, g, b)\
771 {\
772     (d)[0] = r;\
773     (d)[1] = g;\
774     (d)[2] = b;\
775 }
776
777 #define BPP 3
778
779 RGB_FUNCTIONS(rgb24)
780
781 #undef RGB_IN
782 #undef RGB_OUT
783 #undef BPP
784
785 /* rgba32 handling */
786
787 #define RGB_IN(r, g, b, s)\
788 {\
789     unsigned int v = ((UINT32 *)(s))[0];\
790     r = (v >> 16) & 0xff;\
791     g = (v >> 8) & 0xff;\
792     b = v & 0xff;\
793 }
794
795 #define RGB_OUT(d, r, g, b)\
796 {\
797     ((UINT32 *)(d))[0] = (0xff << 24) | (r << 16) | (g << 8) | b;\
798 }
799
800 #define BPP 4
801
802 RGB_FUNCTIONS(rgba32)
803
804 #undef RGB_IN
805 #undef RGB_OUT
806 #undef BPP
807
808
809 static void rgb24_to_rgb565(AVPicture *dst, AVPicture *src,
810                             int width, int height)
811 {
812     const unsigned char *p;
813     unsigned char *q;
814     int r, g, b, dst_wrap, src_wrap;
815     int x, y;
816
817     p = src->data[0];
818     src_wrap = src->linesize[0] - 3 * width;
819
820     q = dst->data[0];
821     dst_wrap = dst->linesize[0] - 2 * width;
822
823     for(y=0;y<height;y++) {
824         for(x=0;x<width;x++) {
825             r = p[0];
826             g = p[1];
827             b = p[2];
828
829             ((unsigned short *)q)[0] = 
830                 ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
831             q += 2;
832             p += 3;
833         }
834         p += src_wrap;
835         q += dst_wrap;
836     }
837 }
838
839 /* NOTE: we also add a dummy alpha bit */
840 static void rgb24_to_rgb555(AVPicture *dst, AVPicture *src,
841                             int width, int height)
842 {
843     const unsigned char *p;
844     unsigned char *q;
845     int r, g, b, dst_wrap, src_wrap;
846     int x, y;
847
848     p = src->data[0];
849     src_wrap = src->linesize[0] - 3 * width;
850
851     q = dst->data[0];
852     dst_wrap = dst->linesize[0] - 2 * width;
853
854     for(y=0;y<height;y++) {
855         for(x=0;x<width;x++) {
856             r = p[0];
857             g = p[1];
858             b = p[2];
859
860             ((unsigned short *)q)[0] = 
861                 ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | 0x8000;
862             q += 2;
863             p += 3;
864         }
865         p += src_wrap;
866         q += dst_wrap;
867     }
868 }
869
870 static void mono_to_gray(AVPicture *dst, AVPicture *src,
871                          int width, int height, int xor_mask)
872 {
873     const unsigned char *p;
874     unsigned char *q;
875     int v, dst_wrap, src_wrap;
876     int y, w;
877
878     p = src->data[0];
879     src_wrap = src->linesize[0] - ((width + 7) >> 3);
880
881     q = dst->data[0];
882     dst_wrap = dst->linesize[0] - width;
883     for(y=0;y<height;y++) {
884         w = width; 
885         while (w >= 8) {
886             v = *p++ ^ xor_mask;
887             q[0] = -(v >> 7);
888             q[1] = -((v >> 6) & 1);
889             q[2] = -((v >> 5) & 1);
890             q[3] = -((v >> 4) & 1);
891             q[4] = -((v >> 3) & 1);
892             q[5] = -((v >> 2) & 1);
893             q[6] = -((v >> 1) & 1);
894             q[7] = -((v >> 0) & 1);
895             w -= 8;
896             q += 8;
897         }
898         if (w > 0) {
899             v = *p++ ^ xor_mask;
900             do {
901                 q[0] = -((v >> 7) & 1);
902                 q++;
903                 v <<= 1;
904             } while (--w);
905         }
906         p += src_wrap;
907         q += dst_wrap;
908     }
909 }
910
911 static void monowhite_to_gray(AVPicture *dst, AVPicture *src,
912                                int width, int height)
913 {
914     mono_to_gray(dst, src, width, height, 0xff);
915 }
916
917 static void monoblack_to_gray(AVPicture *dst, AVPicture *src,
918                                int width, int height)
919 {
920     mono_to_gray(dst, src, width, height, 0x00);
921 }
922
923 static void gray_to_mono(AVPicture *dst, AVPicture *src,
924                          int width, int height, int xor_mask)
925 {
926     int n;
927     const UINT8 *s;
928     UINT8 *d;
929     int j, b, v, n1, src_wrap, dst_wrap, y;
930
931     s = src->data[0];
932     src_wrap = src->linesize[0] - width;
933
934     d = dst->data[0];
935     dst_wrap = dst->linesize[0] - ((width + 7) >> 3);
936     printf("%d %d\n", width, height);
937
938     for(y=0;y<height;y++) {
939         n = width;
940         while (n >= 8) {
941             v = 0;
942             for(j=0;j<8;j++) {
943                 b = s[0];
944                 s++;
945                 v = (v << 1) | (b >> 7);
946             }
947             d[0] = v ^ xor_mask;
948             d++;
949             n -= 8;
950         }
951         if (n > 0) {
952             n1 = n;
953             v = 0;
954             while (n > 0) {
955                 b = s[0];
956                 s++;
957                 v = (v << 1) | (b >> 7);
958                 n--;
959             }
960             d[0] = (v << (8 - (n1 & 7))) ^ xor_mask;
961             d++;
962         }
963         s += src_wrap;
964         d += dst_wrap;
965     }
966 }
967
968 static void gray_to_monowhite(AVPicture *dst, AVPicture *src,
969                               int width, int height)
970 {
971     gray_to_mono(dst, src, width, height, 0xff);
972 }
973
974 static void gray_to_monoblack(AVPicture *dst, AVPicture *src,
975                               int width, int height)
976 {
977     gray_to_mono(dst, src, width, height, 0x00);
978 }
979
980 typedef struct ConvertEntry {
981     void (*convert)(AVPicture *dst, AVPicture *src, int width, int height);
982 } ConvertEntry;
983
984 /* add each new convertion function in this table */
985 /* constraints;
986    - all non YUV modes must convert at least to and from PIX_FMT_RGB24
987 */
988 static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
989     [PIX_FMT_YUV420P] = {
990         [PIX_FMT_RGB555] = { 
991             .convert = yuv420p_to_rgb555
992         },
993         [PIX_FMT_RGB565] = { 
994             .convert = yuv420p_to_rgb565
995         },
996         [PIX_FMT_BGR24] = { 
997             .convert = yuv420p_to_bgr24
998         },
999         [PIX_FMT_RGB24] = { 
1000             .convert = yuv420p_to_rgb24
1001         },
1002         [PIX_FMT_RGBA32] = { 
1003             .convert = yuv420p_to_rgba32
1004         },
1005     },
1006     [PIX_FMT_YUV422P] = {
1007         [PIX_FMT_RGB555] = { 
1008             .convert = yuv422p_to_rgb555
1009         },
1010         [PIX_FMT_RGB565] = { 
1011             .convert = yuv422p_to_rgb565
1012         },
1013         [PIX_FMT_BGR24] = { 
1014             .convert = yuv422p_to_bgr24
1015         },
1016         [PIX_FMT_RGB24] = { 
1017             .convert = yuv422p_to_rgb24
1018         },
1019         [PIX_FMT_RGBA32] = { 
1020             .convert = yuv422p_to_rgba32
1021         },
1022     },
1023     [PIX_FMT_YUV422] = { 
1024         [PIX_FMT_YUV420P] = { 
1025             .convert = yuv422_to_yuv420p,
1026         },
1027     },
1028
1029     [PIX_FMT_RGB24] = {
1030         [PIX_FMT_YUV420P] = { 
1031             .convert = rgb24_to_yuv420p
1032         },
1033         [PIX_FMT_RGB565] = { 
1034             .convert = rgb24_to_rgb565
1035         },
1036         [PIX_FMT_RGB555] = { 
1037             .convert = rgb24_to_rgb555
1038         },
1039         [PIX_FMT_GRAY8] = { 
1040             .convert = rgb24_to_gray
1041         },
1042     },
1043     [PIX_FMT_RGBA32] = {
1044         [PIX_FMT_YUV420P] = { 
1045             .convert = rgba32_to_yuv420p
1046         },
1047         [PIX_FMT_GRAY8] = { 
1048             .convert = rgba32_to_gray
1049         },
1050     },
1051     [PIX_FMT_BGR24] = {
1052         [PIX_FMT_YUV420P] = { 
1053             .convert = bgr24_to_yuv420p
1054         },
1055         [PIX_FMT_GRAY8] = { 
1056             .convert = bgr24_to_gray
1057         },
1058     },
1059     [PIX_FMT_RGB555] = {
1060         [PIX_FMT_YUV420P] = { 
1061             .convert = rgb555_to_yuv420p
1062         },
1063         [PIX_FMT_GRAY8] = { 
1064             .convert = rgb555_to_gray
1065         },
1066     },
1067     [PIX_FMT_RGB565] = {
1068         [PIX_FMT_YUV420P] = { 
1069             .convert = rgb565_to_yuv420p
1070         },
1071         [PIX_FMT_GRAY8] = { 
1072             .convert = rgb565_to_gray
1073         },
1074     },
1075     [PIX_FMT_GRAY8] = {
1076         [PIX_FMT_RGB555] = { 
1077             .convert = gray_to_rgb555
1078         },
1079         [PIX_FMT_RGB565] = { 
1080             .convert = gray_to_rgb565
1081         },
1082         [PIX_FMT_RGB24] = { 
1083             .convert = gray_to_rgb24
1084         },
1085         [PIX_FMT_BGR24] = { 
1086             .convert = gray_to_bgr24
1087         },
1088         [PIX_FMT_RGBA32] = { 
1089             .convert = gray_to_rgba32
1090         },
1091         [PIX_FMT_MONOWHITE] = { 
1092             .convert = gray_to_monowhite
1093         },
1094         [PIX_FMT_MONOBLACK] = { 
1095             .convert = gray_to_monoblack
1096         },
1097     },
1098     [PIX_FMT_MONOWHITE] = {
1099         [PIX_FMT_GRAY8] = { 
1100             .convert = monowhite_to_gray
1101         },
1102     },
1103     [PIX_FMT_MONOBLACK] = {
1104         [PIX_FMT_GRAY8] = { 
1105             .convert = monoblack_to_gray
1106         },
1107     },
1108 };
1109
1110 static int avpicture_alloc(AVPicture *picture,
1111                            int pix_fmt, int width, int height)
1112 {
1113     int size;
1114     void *ptr;
1115
1116     size = avpicture_get_size(pix_fmt, width, height);
1117     if (size < 0)
1118         goto fail;
1119     ptr = av_malloc(size);
1120     if (!ptr)
1121         goto fail;
1122     avpicture_fill(picture, ptr, pix_fmt, width, height);
1123     return 0;
1124  fail:
1125     memset(picture, 0, sizeof(AVPicture));
1126     return -1;
1127 }
1128
1129 static void avpicture_free(AVPicture *picture)
1130 {
1131     av_free(picture->data[0]);
1132 }
1133
1134 /* XXX: always use linesize. Return -1 if not supported */
1135 int img_convert(AVPicture *dst, int dst_pix_fmt,
1136                 AVPicture *src, int src_pix_fmt, 
1137                 int src_width, int src_height)
1138 {
1139     int i, ret, dst_width, dst_height, int_pix_fmt;
1140     PixFmtInfo *src_pix, *dst_pix;
1141     ConvertEntry *ce;
1142     AVPicture tmp1, *tmp = &tmp1;
1143
1144     if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB ||
1145         dst_pix_fmt < 0 || dst_pix_fmt >= PIX_FMT_NB)
1146         return -1;
1147     if (src_width <= 0 || src_height <= 0)
1148         return 0;
1149
1150     dst_width = src_width;
1151     dst_height = src_height;
1152
1153     dst_pix = &pix_fmt_info[dst_pix_fmt];
1154     src_pix = &pix_fmt_info[src_pix_fmt];
1155     if (src_pix_fmt == dst_pix_fmt) {
1156         /* XXX: incorrect */
1157         /* same format: just copy */
1158         for(i = 0; i < dst_pix->nb_components; i++) {
1159             int w, h;
1160             w = dst_width;
1161             h = dst_height;
1162             if (dst_pix->is_yuv && (i == 1 || i == 2)) {
1163                 w >>= dst_pix->x_chroma_shift;
1164                 h >>= dst_pix->y_chroma_shift;
1165             }
1166             img_copy(dst->data[i], dst->linesize[i],
1167                      src->data[i], src->linesize[i],
1168                      w, h);
1169         }
1170         return 0;
1171     }
1172
1173     ce = &convert_table[src_pix_fmt][dst_pix_fmt];
1174     if (ce->convert) {
1175         /* specific convertion routine */
1176         ce->convert(dst, src, dst_width, dst_height);
1177         return 0;
1178     }
1179
1180     /* gray to YUV */
1181     if (dst_pix->is_yuv && src_pix_fmt == PIX_FMT_GRAY8) {
1182         int w, h, y;
1183         uint8_t *d;
1184
1185         img_copy(dst->data[0], dst->linesize[0],
1186                  src->data[0], src->linesize[0],
1187                  dst_width, dst_height);
1188         /* fill U and V with 128 */
1189         w = dst_width;
1190         h = dst_height;
1191         w >>= dst_pix->x_chroma_shift;
1192         h >>= dst_pix->y_chroma_shift;
1193         for(i = 1; i <= 2; i++) {
1194             d = dst->data[i];
1195             for(y = 0; y< h; y++) {
1196                 memset(d, 128, w);
1197                 d += dst->linesize[i];
1198             }
1199         }
1200         return 0;
1201     }
1202
1203     /* YUV to gray */
1204     if (src_pix->is_yuv && dst_pix_fmt == PIX_FMT_GRAY8) {
1205         img_copy(dst->data[0], dst->linesize[0],
1206                  src->data[0], src->linesize[0],
1207                  dst_width, dst_height);
1208         return 0;
1209     }
1210
1211     /* YUV to YUV */
1212     if (dst_pix->is_yuv && src_pix->is_yuv) {
1213         int x_shift, y_shift, w, h;
1214         void (*resize_func)(UINT8 *dst, int dst_wrap, 
1215                             UINT8 *src, int src_wrap,
1216                             int width, int height);
1217
1218         /* compute chroma size of the smallest dimensions */
1219         w = dst_width;
1220         h = dst_height;
1221         if (dst_pix->x_chroma_shift >= src_pix->x_chroma_shift)
1222             w >>= dst_pix->x_chroma_shift;
1223         else
1224             w >>= src_pix->x_chroma_shift;
1225         if (dst_pix->y_chroma_shift >= src_pix->y_chroma_shift)
1226             h >>= dst_pix->y_chroma_shift;
1227         else
1228             h >>= src_pix->y_chroma_shift;
1229
1230         x_shift = (dst_pix->x_chroma_shift - src_pix->x_chroma_shift);
1231         y_shift = (dst_pix->y_chroma_shift - src_pix->y_chroma_shift);
1232         if (x_shift == 0 && y_shift == 0) {
1233             resize_func = img_copy; /* should never happen */
1234         } else if (x_shift == 0 && y_shift == 1) {
1235             resize_func = shrink2;
1236         } else if (x_shift == 1 && y_shift == 1) {
1237             resize_func = shrink22;
1238         } else if (x_shift == -1 && y_shift == -1) {
1239             resize_func = grow22;
1240         } else if (x_shift == -1 && y_shift == 1) {
1241             resize_func = conv411;
1242         } else {
1243             /* currently not handled */
1244             return -1;
1245         }
1246
1247         img_copy(dst->data[0], dst->linesize[0],
1248                  src->data[0], src->linesize[0],
1249                  dst_width, dst_height);
1250
1251         for(i = 1;i <= 2; i++)
1252             resize_func(dst->data[i], dst->linesize[i],
1253                         src->data[i], src->linesize[i],
1254                         w, h);
1255        return 0;
1256     }
1257
1258     /* try to use an intermediate format */
1259     if (src_pix_fmt == PIX_FMT_MONOWHITE ||
1260         src_pix_fmt == PIX_FMT_MONOBLACK ||
1261         dst_pix_fmt == PIX_FMT_MONOWHITE ||
1262         dst_pix_fmt == PIX_FMT_MONOBLACK) {
1263         int_pix_fmt = PIX_FMT_GRAY8;
1264     } else {
1265         int_pix_fmt = PIX_FMT_RGB24;
1266     }
1267     if (avpicture_alloc(tmp, int_pix_fmt, dst_width, dst_height) < 0)
1268         return -1;
1269     ret = -1;
1270     if (img_convert(tmp, int_pix_fmt,
1271                     src, src_pix_fmt, src_width, src_height) < 0)
1272         goto fail1;
1273     if (img_convert(dst, dst_pix_fmt,
1274                     tmp, int_pix_fmt, dst_width, dst_height) < 0)
1275         goto fail1;
1276     ret = 0;
1277  fail1:
1278     avpicture_free(tmp);
1279     return ret;
1280 }
1281
1282
1283 #ifdef HAVE_MMX
1284 #define DEINT_INPLACE_LINE_LUM \
1285                     movd_m2r(lum_m4[0],mm0);\
1286                     movd_m2r(lum_m3[0],mm1);\
1287                     movd_m2r(lum_m2[0],mm2);\
1288                     movd_m2r(lum_m1[0],mm3);\
1289                     movd_m2r(lum[0],mm4);\
1290                     punpcklbw_r2r(mm7,mm0);\
1291                     movd_r2m(mm2,lum_m4[0]);\
1292                     punpcklbw_r2r(mm7,mm1);\
1293                     punpcklbw_r2r(mm7,mm2);\
1294                     punpcklbw_r2r(mm7,mm3);\
1295                     punpcklbw_r2r(mm7,mm4);\
1296                     paddw_r2r(mm3,mm1);\
1297                     psllw_i2r(1,mm2);\
1298                     paddw_r2r(mm4,mm0);\
1299                     psllw_i2r(2,mm1);\
1300                     paddw_r2r(mm6,mm2);\
1301                     paddw_r2r(mm2,mm1);\
1302                     psubusw_r2r(mm0,mm1);\
1303                     psrlw_i2r(3,mm1);\
1304                     packuswb_r2r(mm7,mm1);\
1305                     movd_r2m(mm1,lum_m2[0]);
1306
1307 #define DEINT_LINE_LUM \
1308                     movd_m2r(lum_m4[0],mm0);\
1309                     movd_m2r(lum_m3[0],mm1);\
1310                     movd_m2r(lum_m2[0],mm2);\
1311                     movd_m2r(lum_m1[0],mm3);\
1312                     movd_m2r(lum[0],mm4);\
1313                     punpcklbw_r2r(mm7,mm0);\
1314                     punpcklbw_r2r(mm7,mm1);\
1315                     punpcklbw_r2r(mm7,mm2);\
1316                     punpcklbw_r2r(mm7,mm3);\
1317                     punpcklbw_r2r(mm7,mm4);\
1318                     paddw_r2r(mm3,mm1);\
1319                     psllw_i2r(1,mm2);\
1320                     paddw_r2r(mm4,mm0);\
1321                     psllw_i2r(2,mm1);\
1322                     paddw_r2r(mm6,mm2);\
1323                     paddw_r2r(mm2,mm1);\
1324                     psubusw_r2r(mm0,mm1);\
1325                     psrlw_i2r(3,mm1);\
1326                     packuswb_r2r(mm7,mm1);\
1327                     movd_r2m(mm1,dst[0]);
1328 #endif
1329
1330 /* filter parameters: [-1 4 2 4 -1] // 8 */
1331 static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
1332                                 int size)
1333 {
1334 #ifndef HAVE_MMX
1335     UINT8 *cm = cropTbl + MAX_NEG_CROP;
1336     int sum;
1337
1338     for(;size > 0;size--) {
1339         sum = -lum_m4[0];
1340         sum += lum_m3[0] << 2;
1341         sum += lum_m2[0] << 1;
1342         sum += lum_m1[0] << 2;
1343         sum += -lum[0];
1344         dst[0] = cm[(sum + 4) >> 3];
1345         lum_m4++;
1346         lum_m3++;
1347         lum_m2++;
1348         lum_m1++;
1349         lum++;
1350         dst++;
1351     }
1352 #else
1353
1354     {
1355         mmx_t rounder;
1356         rounder.uw[0]=4;
1357         rounder.uw[1]=4;
1358         rounder.uw[2]=4;
1359         rounder.uw[3]=4;
1360         pxor_r2r(mm7,mm7);
1361         movq_m2r(rounder,mm6);
1362     }
1363     for (;size > 3; size-=4) {
1364         DEINT_LINE_LUM
1365         lum_m4+=4;
1366         lum_m3+=4;
1367         lum_m2+=4;
1368         lum_m1+=4;
1369         lum+=4;
1370         dst+=4;
1371     }
1372 #endif
1373 }
1374 static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
1375                              int size)
1376 {
1377 #ifndef HAVE_MMX
1378     UINT8 *cm = cropTbl + MAX_NEG_CROP;
1379     int sum;
1380
1381     for(;size > 0;size--) {
1382         sum = -lum_m4[0];
1383         sum += lum_m3[0] << 2;
1384         sum += lum_m2[0] << 1;
1385         lum_m4[0]=lum_m2[0];
1386         sum += lum_m1[0] << 2;
1387         sum += -lum[0];
1388         lum_m2[0] = cm[(sum + 4) >> 3];
1389         lum_m4++;
1390         lum_m3++;
1391         lum_m2++;
1392         lum_m1++;
1393         lum++;
1394     }
1395 #else
1396
1397     {
1398         mmx_t rounder;
1399         rounder.uw[0]=4;
1400         rounder.uw[1]=4;
1401         rounder.uw[2]=4;
1402         rounder.uw[3]=4;
1403         pxor_r2r(mm7,mm7);
1404         movq_m2r(rounder,mm6);
1405     }
1406     for (;size > 3; size-=4) {
1407         DEINT_INPLACE_LINE_LUM
1408         lum_m4+=4;
1409         lum_m3+=4;
1410         lum_m2+=4;
1411         lum_m1+=4;
1412         lum+=4;
1413     }
1414 #endif
1415 }
1416
1417 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
1418    top field is copied as is, but the bottom field is deinterlaced
1419    against the top field. */
1420 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
1421                                     UINT8 *src1, int src_wrap,
1422                                     int width, int height)
1423 {
1424     UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
1425     int y;
1426
1427     src_m2 = src1;
1428     src_m1 = src1;
1429     src_0=&src_m1[src_wrap];
1430     src_p1=&src_0[src_wrap];
1431     src_p2=&src_p1[src_wrap];
1432     for(y=0;y<(height-2);y+=2) {
1433         memcpy(dst,src_m1,width);
1434         dst += dst_wrap;
1435         deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
1436         src_m2 = src_0;
1437         src_m1 = src_p1;
1438         src_0 = src_p2;
1439         src_p1 += 2*src_wrap;
1440         src_p2 += 2*src_wrap;
1441         dst += dst_wrap;
1442     }
1443     memcpy(dst,src_m1,width);
1444     dst += dst_wrap;
1445     /* do last line */
1446     deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
1447 }
1448
1449 static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap,
1450                                      int width, int height)
1451 {
1452     UINT8 *src_m1, *src_0, *src_p1, *src_p2;
1453     int y;
1454     UINT8 *buf;
1455     buf = (UINT8*)av_malloc(width);
1456
1457     src_m1 = src1;
1458     memcpy(buf,src_m1,width);
1459     src_0=&src_m1[src_wrap];
1460     src_p1=&src_0[src_wrap];
1461     src_p2=&src_p1[src_wrap];
1462     for(y=0;y<(height-2);y+=2) {
1463         deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
1464         src_m1 = src_p1;
1465         src_0 = src_p2;
1466         src_p1 += 2*src_wrap;
1467         src_p2 += 2*src_wrap;
1468     }
1469     /* do last line */
1470     deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
1471     av_free(buf);
1472 }
1473
1474
1475 /* deinterlace - if not supported return -1 */
1476 int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
1477                           int pix_fmt, int width, int height)
1478 {
1479     int i;
1480
1481     if (pix_fmt != PIX_FMT_YUV420P &&
1482         pix_fmt != PIX_FMT_YUV422P &&
1483         pix_fmt != PIX_FMT_YUV444P)
1484         return -1;
1485     if ((width & 3) != 0 || (height & 3) != 0)
1486         return -1;
1487
1488     for(i=0;i<3;i++) {
1489         if (i == 1) {
1490             switch(pix_fmt) {
1491             case PIX_FMT_YUV420P:
1492                 width >>= 1;
1493                 height >>= 1;
1494                 break;
1495             case PIX_FMT_YUV422P:
1496                 width >>= 1;
1497                 break;
1498             default:
1499                 break;
1500             }
1501         }
1502         if (src == dst) {
1503             deinterlace_bottom_field_inplace(src->data[i], src->linesize[i],
1504                                  width, height);
1505         } else {
1506             deinterlace_bottom_field(dst->data[i],dst->linesize[i],
1507                                         src->data[i], src->linesize[i],
1508                                         width, height);
1509         }
1510     }
1511 #ifdef HAVE_MMX
1512     emms();
1513 #endif
1514     return 0;
1515 }
1516
1517 #undef FIX