]> git.sesse.net Git - vlc/blob - modules/video_chroma/i420_rgb16.c
chromas converstion: optimize MMX/SSE acceleration even more by leveraging out of...
[vlc] / modules / video_chroma / i420_rgb16.c
1 /*****************************************************************************
2  * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <string.h>                                            /* strerror() */
28 #include <stdlib.h>                                      /* malloc(), free() */
29
30 #include <vlc/vlc.h>
31 #include <vlc_vout.h>
32
33 #include "i420_rgb.h"
34 #if defined (MODULE_NAME_IS_i420_rgb)
35 #   include "i420_rgb_c.h"
36 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
37 #   if defined(HAVE_MMX_INTRINSICS)
38 #       include <mmintrin.h>
39 #   endif
40 #   include "i420_rgb_mmx.h"
41 #endif
42
43 static void SetOffset( int, int, int, int, vlc_bool_t *,
44                        unsigned int *, int * );
45
46 #if defined (MODULE_NAME_IS_i420_rgb)
47 /*****************************************************************************
48  * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
49  *****************************************************************************
50  * Horizontal alignment needed:
51  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
52  *  - output: 1 pixel (2 bytes), margins allowed
53  * Vertical alignment needed:
54  *  - input: 2 lines (2 Y lines, 1 U/V line)
55  *  - output: 1 line
56  *****************************************************************************/
57 void E_(I420_RGB16_dither)( vout_thread_t *p_vout, picture_t *p_src,
58                                                       picture_t *p_dest )
59 {
60     /* We got this one from the old arguments */
61     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
62     uint8_t  *p_y   = p_src->Y_PIXELS;
63     uint8_t  *p_u   = p_src->U_PIXELS;
64     uint8_t  *p_v   = p_src->V_PIXELS;
65
66     vlc_bool_t   b_hscale;                        /* horizontal scaling type */
67     unsigned int i_vscale;                          /* vertical scaling type */
68     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
69     unsigned int i_real_y;                                          /* y % 4 */
70
71     int         i_right_margin;
72     int         i_rewind;
73     int         i_scale_count;                       /* scale modulo counter */
74     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
75     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
76     int         i_uval, i_vval;                           /* U and V samples */
77     int         i_red, i_green, i_blue;          /* U and V modified samples */
78     uint16_t *  p_yuv = p_vout->chroma.p_sys->p_rgb16;
79     uint16_t *  p_ybase;                     /* Y dependant conversion table */
80
81     /* Conversion buffer pointer */
82     uint16_t *  p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
83     uint16_t *  p_buffer;
84
85     /* Offset array pointer */
86     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
87     int *       p_offset;
88
89     const int i_source_margin = p_src->p[0].i_pitch
90                                  - p_src->p[0].i_visible_pitch;
91     const int i_source_margin_c = p_src->p[1].i_pitch
92                                  - p_src->p[1].i_visible_pitch;
93
94     /* The dithering matrices */
95     int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
96     int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
97     int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
98     int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
99
100     for(i_x = 0; i_x < 4; i_x++)
101     {
102         dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
103         dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
104         dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
105         dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
106     }
107
108     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
109
110     if( p_vout->render.i_width & 7 )
111     {
112         i_rewind = 8 - ( p_vout->render.i_width & 7 );
113     }
114     else
115     {
116         i_rewind = 0;
117     }
118
119     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
120      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
121      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
122     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
123                p_vout->output.i_width, p_vout->output.i_height,
124                &b_hscale, &i_vscale, p_offset_start );
125
126     /*
127      * Perform conversion
128      */
129     i_scale_count = ( i_vscale == 1 ) ?
130                     p_vout->output.i_height : p_vout->render.i_height;
131     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
132     {
133         i_real_y = i_y & 0x3;
134         p_pic_start = p_pic;
135         p_buffer = b_hscale ? p_buffer_start : p_pic;
136
137         for ( i_x = p_vout->render.i_width / 8; i_x--; )
138         {
139             int *p_dither = dither10;
140             CONVERT_YUV_PIXEL_DITHER(2);
141             p_dither = dither11;
142             CONVERT_Y_PIXEL_DITHER(2);
143             p_dither = dither12;
144             CONVERT_YUV_PIXEL_DITHER(2);
145             p_dither = dither13;
146             CONVERT_Y_PIXEL_DITHER(2);
147             p_dither = dither10;
148             CONVERT_YUV_PIXEL_DITHER(2);
149             p_dither = dither11;
150             CONVERT_Y_PIXEL_DITHER(2);
151             p_dither = dither12;
152             CONVERT_YUV_PIXEL_DITHER(2);
153             p_dither = dither13;
154             CONVERT_Y_PIXEL_DITHER(2);
155         }
156
157         /* Here we do some unaligned reads and duplicate conversions, but
158          * at least we have all the pixels */
159         if( i_rewind )
160         {
161             int *p_dither = dither10;
162             p_y -= i_rewind;
163             p_u -= i_rewind >> 1;
164             p_v -= i_rewind >> 1;
165             p_buffer -= i_rewind;
166             CONVERT_YUV_PIXEL_DITHER(2);
167             p_dither = dither11;
168             CONVERT_Y_PIXEL_DITHER(2);
169             p_dither = dither12;
170             CONVERT_YUV_PIXEL_DITHER(2);
171             p_dither = dither13;
172             CONVERT_Y_PIXEL_DITHER(2);
173             p_dither = dither10;
174             CONVERT_YUV_PIXEL_DITHER(2);
175             p_dither = dither11;
176             CONVERT_Y_PIXEL_DITHER(2);
177             p_dither = dither12;
178             CONVERT_YUV_PIXEL_DITHER(2);
179             p_dither = dither13;
180             CONVERT_Y_PIXEL_DITHER(2);
181         }
182         SCALE_WIDTH;
183         SCALE_HEIGHT( 420, 2 );
184
185         p_y += i_source_margin;
186         if( i_y % 2 )
187         {
188             p_u += i_source_margin_c;
189             p_v += i_source_margin_c;
190         }
191     }
192 }
193 #endif
194
195 /*****************************************************************************
196  * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
197  *****************************************************************************
198  * Horizontal alignment needed:
199  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
200  *  - output: 1 pixel (2 bytes), margins allowed
201  * Vertical alignment needed:
202  *  - input: 2 lines (2 Y lines, 1 U/V line)
203  *  - output: 1 line
204  *****************************************************************************/
205 void E_(I420_RGB16)( vout_thread_t *p_vout, picture_t *p_src,
206                                             picture_t *p_dest )
207 {
208     /* We got this one from the old arguments */
209     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
210     uint8_t  *p_y   = p_src->Y_PIXELS;
211     uint8_t  *p_u   = p_src->U_PIXELS;
212     uint8_t  *p_v   = p_src->V_PIXELS;
213
214     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
215     unsigned int i_vscale;                          /* vertical scaling type */
216     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
217
218     int         i_right_margin;
219     int         i_rewind;
220     int         i_scale_count;                       /* scale modulo counter */
221     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
222     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
223 #if defined (MODULE_NAME_IS_i420_rgb)
224     int         i_uval, i_vval;                           /* U and V samples */
225     int         i_red, i_green, i_blue;          /* U and V modified samples */
226     uint16_t *  p_yuv = p_vout->chroma.p_sys->p_rgb16;
227     uint16_t *  p_ybase;                     /* Y dependant conversion table */
228 #endif
229
230     /* Conversion buffer pointer */
231     uint16_t *  p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
232     uint16_t *  p_buffer;
233
234     /* Offset array pointer */
235     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
236     int *       p_offset;
237
238     const int i_source_margin = p_src->p[0].i_pitch
239                                  - p_src->p[0].i_visible_pitch;
240     const int i_source_margin_c = p_src->p[1].i_pitch
241                                  - p_src->p[1].i_visible_pitch;
242
243     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
244
245     if( p_vout->render.i_width & 7 )
246     {
247         i_rewind = 8 - ( p_vout->render.i_width & 7 );
248     }
249     else
250     {
251         i_rewind = 0;
252     }
253
254     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
255      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
256      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
257     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
258                p_vout->output.i_width, p_vout->output.i_height,
259                &b_hscale, &i_vscale, p_offset_start );
260
261     /*
262      * Perform conversion
263      */
264     i_scale_count = ( i_vscale == 1 ) ?
265                     p_vout->output.i_height : p_vout->render.i_height;
266     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
267     {
268         p_pic_start = p_pic;
269         p_buffer = b_hscale ? p_buffer_start : p_pic;
270
271 #if defined (MODULE_NAME_IS_i420_rgb)
272         for ( i_x = p_vout->render.i_width / 8; i_x--; )
273         {
274             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
275             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
276             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
277             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
278         }
279 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
280         if( p_vout->output.i_rmask == 0x7c00 )
281         {
282             /* 15bpp 5/5/5 */
283             for ( i_x = p_vout->render.i_width / 8; i_x--; )
284             {
285 #   if defined (HAVE_MMX_INTRINSICS)
286                 __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
287                 uint64_t tmp64;
288                 INTRINSICS_INIT_16
289                 INTRINSICS_YUV_MUL
290                 INTRINSICS_YUV_ADD
291                 INTRINSICS_UNPACK_15
292 #   else
293                 __asm__( MMX_INIT_16
294                          : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
295
296                 __asm__( ".p2align 3"
297                          MMX_YUV_MUL
298                          MMX_YUV_ADD
299                          MMX_UNPACK_15
300                          : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
301 #   endif
302
303                 p_y += 8;
304                 p_u += 4;
305                 p_v += 4;
306                 p_buffer += 8;
307             }
308         }
309         else
310         {
311             /* 16bpp 5/6/5 */
312             for ( i_x = p_vout->render.i_width / 8; i_x--; )
313             {
314 #   if defined (HAVE_MMX_INTRINSICS)
315                 __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
316                 uint64_t tmp64;
317                 INTRINSICS_INIT_16
318                 INTRINSICS_YUV_MUL
319                 INTRINSICS_YUV_ADD
320                 INTRINSICS_UNPACK_16
321 #   else
322                 __asm__( MMX_INIT_16
323                          : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
324
325                 __asm__( ".p2align 3"
326                          MMX_YUV_MUL
327                          MMX_YUV_ADD
328                          MMX_UNPACK_16
329                          : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
330 #   endif
331
332                 p_y += 8;
333                 p_u += 4;
334                 p_v += 4;
335                 p_buffer += 8;
336             }
337         }
338 #endif
339
340         /* Here we do some unaligned reads and duplicate conversions, but
341          * at least we have all the pixels */
342         if( i_rewind )
343         {
344 #if defined (MODULE_NAME_IS_i420_rgb_mmx)
345 #   if defined (HAVE_MMX_INTRINSICS)
346             __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
347             uint64_t tmp64;
348 #   endif
349 #endif
350             p_y -= i_rewind;
351             p_u -= i_rewind >> 1;
352             p_v -= i_rewind >> 1;
353             p_buffer -= i_rewind;
354 #if defined (MODULE_NAME_IS_i420_rgb)
355             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
356             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
357             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
358             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
359 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
360
361 #   if defined (HAVE_MMX_INTRINSICS)
362             INTRINSICS_INIT_16
363 #   else
364             __asm__( MMX_INIT_16
365                      : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
366 #   endif
367
368             if( p_vout->output.i_rmask == 0x7c00 )
369             {
370                 /* 15bpp 5/5/5 */
371 #   if defined (HAVE_MMX_INTRINSICS)
372                 INTRINSICS_YUV_MUL
373                 INTRINSICS_YUV_ADD
374                 INTRINSICS_UNPACK_15
375 #   else
376                 __asm__( ".p2align 3"
377                          MMX_YUV_MUL
378                          MMX_YUV_ADD
379                          MMX_UNPACK_15
380                          : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
381 #   endif
382             }
383             else
384             {
385 #   if defined (HAVE_MMX_INTRINSICS)
386                 INTRINSICS_YUV_MUL
387                 INTRINSICS_YUV_ADD
388                 INTRINSICS_UNPACK_16
389 #   else
390                 /* 16bpp 5/6/5 */
391                 __asm__( ".p2align 3"
392                          MMX_YUV_MUL
393                          MMX_YUV_ADD
394                          MMX_UNPACK_16
395                          : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
396 #   endif
397             }
398
399             p_y += 8;
400             p_u += 4;
401             p_v += 4;
402             p_buffer += 8;
403 #endif
404         }
405         SCALE_WIDTH;
406         SCALE_HEIGHT( 420, 2 );
407
408         p_y += i_source_margin;
409         if( i_y % 2 )
410         {
411             p_u += i_source_margin_c;
412             p_v += i_source_margin_c;
413         }
414     }
415 }
416
417 /*****************************************************************************
418  * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
419  *****************************************************************************
420  * Horizontal alignment needed:
421  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
422  *  - output: 1 pixel (2 bytes), margins allowed
423  * Vertical alignment needed:
424  *  - input: 2 lines (2 Y lines, 1 U/V line)
425  *  - output: 1 line
426  *****************************************************************************/
427 void E_(I420_RGB32)( vout_thread_t *p_vout, picture_t *p_src,
428                                             picture_t *p_dest )
429 {
430     /* We got this one from the old arguments */
431     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
432     uint8_t  *p_y   = p_src->Y_PIXELS;
433     uint8_t  *p_u   = p_src->U_PIXELS;
434     uint8_t  *p_v   = p_src->V_PIXELS;
435
436     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
437     unsigned int i_vscale;                          /* vertical scaling type */
438     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
439
440     int         i_right_margin;
441     int         i_rewind;
442     int         i_scale_count;                       /* scale modulo counter */
443     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
444     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
445 #if defined (MODULE_NAME_IS_i420_rgb)
446     int         i_uval, i_vval;                           /* U and V samples */
447     int         i_red, i_green, i_blue;          /* U and V modified samples */
448     uint32_t *  p_yuv = p_vout->chroma.p_sys->p_rgb32;
449     uint32_t *  p_ybase;                     /* Y dependant conversion table */
450 #endif
451
452     /* Conversion buffer pointer */
453     uint32_t *  p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
454     uint32_t *  p_buffer;
455
456     /* Offset array pointer */
457     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
458     int *       p_offset;
459
460     const int i_source_margin = p_src->p[0].i_pitch
461                                  - p_src->p[0].i_visible_pitch;
462     const int i_source_margin_c = p_src->p[1].i_pitch
463                                  - p_src->p[1].i_visible_pitch;
464
465     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
466
467     if( p_vout->render.i_width & 7 )
468     {
469         i_rewind = 8 - ( p_vout->render.i_width & 7 );
470     }
471     else
472     {
473         i_rewind = 0;
474     }
475
476     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
477      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
478      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
479     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
480                p_vout->output.i_width, p_vout->output.i_height,
481                &b_hscale, &i_vscale, p_offset_start );
482
483     /*
484      * Perform conversion
485      */
486     i_scale_count = ( i_vscale == 1 ) ?
487                     p_vout->output.i_height : p_vout->render.i_height;
488     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
489     {
490         p_pic_start = p_pic;
491         p_buffer = b_hscale ? p_buffer_start : p_pic;
492
493         for ( i_x = p_vout->render.i_width / 8; i_x--; )
494         {
495 #if defined (MODULE_NAME_IS_i420_rgb)
496             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
497             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
498             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
499             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
500 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
501 #   if defined (HAVE_MMX_INTRINSICS)
502             __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
503             uint64_t tmp64;
504             INTRINSICS_INIT_32
505             INTRINSICS_YUV_MUL
506             INTRINSICS_YUV_ADD
507             INTRINSICS_UNPACK_32
508 #   else
509             __asm__( MMX_INIT_32
510                      : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
511
512             __asm__( ".p2align 3"
513                      MMX_YUV_MUL
514                      MMX_YUV_ADD
515                      MMX_UNPACK_32
516                      : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
517 #   endif
518
519             p_y += 8;
520             p_u += 4;
521             p_v += 4;
522             p_buffer += 8;
523 #endif
524         }
525
526         /* Here we do some unaligned reads and duplicate conversions, but
527          * at least we have all the pixels */
528         if( i_rewind )
529         {
530 #if defined (MODULE_NAME_IS_i420_rgb_mmx)
531 #   if defined (HAVE_MMX_INTRINSICS)
532             __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
533             uint64_t tmp64;
534 #   endif
535 #endif
536             p_y -= i_rewind;
537             p_u -= i_rewind >> 1;
538             p_v -= i_rewind >> 1;
539             p_buffer -= i_rewind;
540 #if defined (MODULE_NAME_IS_i420_rgb)
541             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
542             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
543             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
544             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
545 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
546 #   if defined (HAVE_MMX_INTRINSICS)
547             INTRINSICS_INIT_32
548             INTRINSICS_YUV_MUL
549             INTRINSICS_YUV_ADD
550             INTRINSICS_UNPACK_32
551 #   else
552             __asm__( MMX_INIT_32
553                      : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
554
555             __asm__( ".p2align 3"
556                      MMX_YUV_MUL
557                      MMX_YUV_ADD
558                      MMX_UNPACK_32
559                      : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
560 #   endif
561
562             p_y += 8;
563             p_u += 4;
564             p_v += 4;
565             p_buffer += 8;
566 #endif
567         }
568         SCALE_WIDTH;
569         SCALE_HEIGHT( 420, 4 );
570
571         p_y += i_source_margin;
572         if( i_y % 2 )
573         {
574             p_u += i_source_margin_c;
575             p_v += i_source_margin_c;
576         }
577     }
578 }
579
580 /* Following functions are local */
581
582 /*****************************************************************************
583  * SetOffset: build offset array for conversion functions
584  *****************************************************************************
585  * This function will build an offset array used in later conversion functions.
586  * It will also set horizontal and vertical scaling indicators.
587  *****************************************************************************/
588 static void SetOffset( int i_width, int i_height, int i_pic_width,
589                        int i_pic_height, vlc_bool_t *pb_hscale,
590                        unsigned int *pi_vscale, int *p_offset )
591 {
592     int i_x;                                    /* x position in destination */
593     int i_scale_count;                                     /* modulo counter */
594
595     /*
596      * Prepare horizontal offset array
597      */
598     if( i_pic_width - i_width == 0 )
599     {
600         /* No horizontal scaling: YUV conversion is done directly to picture */
601         *pb_hscale = 0;
602     }
603     else if( i_pic_width - i_width > 0 )
604     {
605         /* Prepare scaling array for horizontal extension */
606         *pb_hscale = 1;
607         i_scale_count = i_pic_width;
608         for( i_x = i_width; i_x--; )
609         {
610             while( (i_scale_count -= i_width) > 0 )
611             {
612                 *p_offset++ = 0;
613             }
614             *p_offset++ = 1;
615             i_scale_count += i_pic_width;
616         }
617     }
618     else /* if( i_pic_width - i_width < 0 ) */
619     {
620         /* Prepare scaling array for horizontal reduction */
621         *pb_hscale = 1;
622         i_scale_count = i_width;
623         for( i_x = i_pic_width; i_x--; )
624         {
625             *p_offset = 1;
626             while( (i_scale_count -= i_pic_width) > 0 )
627             {
628                 *p_offset += 1;
629             }
630             p_offset++;
631             i_scale_count += i_width;
632         }
633     }
634
635     /*
636      * Set vertical scaling indicator
637      */
638     if( i_pic_height - i_height == 0 )
639     {
640         *pi_vscale = 0;
641     }
642     else if( i_pic_height - i_height > 0 )
643     {
644         *pi_vscale = 1;
645     }
646     else /* if( i_pic_height - i_height < 0 ) */
647     {
648         *pi_vscale = -1;
649     }
650 }
651