git.sesse.net Git - vlc/blob - modules/video_chroma/i420_rgb16.c

   1 /*****************************************************************************
   2  * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2000 the VideoLAN team
   5  * $Id$
   6  *
   7  * Authors: Samuel Hocevar <sam@zoy.org>
   8  *          Damien Fouilleul <damienf@videolan.org>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  23  *****************************************************************************/
  24
  25 /*****************************************************************************
  26  * Preamble
  27  *****************************************************************************/
  28
  29 #ifdef HAVE_CONFIG_H
  30 # include "config.h"
  31 #endif
  32
  33 #include <vlc/vlc.h>
  34 #include <vlc_vout.h>
  35
  36 #include "i420_rgb.h"
  37 #if defined (MODULE_NAME_IS_i420_rgb)
  38 #   include "i420_rgb_c.h"
  39 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
  40 #   include "i420_rgb_mmx.h"
  41 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
  42 #   include "i420_rgb_mmx.h"
  43 #endif
  44
  45 static void SetOffset( int, int, int, int, vlc_bool_t *,
  46                        unsigned int *, int * );
  47
  48 #if defined (MODULE_NAME_IS_i420_rgb)
  49 /*****************************************************************************
  50  * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
  51  *****************************************************************************
  52  * Horizontal alignment needed:
  53  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
  54  *  - output: 1 pixel (2 bytes), margins allowed
  55  * Vertical alignment needed:
  56  *  - input: 2 lines (2 Y lines, 1 U/V line)
  57  *  - output: 1 line
  58  *****************************************************************************/
  59 void E_(I420_RGB16_dither)( vout_thread_t *p_vout, picture_t *p_src,
  60                                                       picture_t *p_dest )
  61 {
  62     /* We got this one from the old arguments */
  63     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
  64     uint8_t  *p_y   = p_src->Y_PIXELS;
  65     uint8_t  *p_u   = p_src->U_PIXELS;
  66     uint8_t  *p_v   = p_src->V_PIXELS;
  67
  68     vlc_bool_t   b_hscale;                        /* horizontal scaling type */
  69     unsigned int i_vscale;                          /* vertical scaling type */
  70     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
  71     unsigned int i_real_y;                                          /* y % 4 */
  72
  73     int         i_right_margin;
  74     int         i_rewind;
  75     int         i_scale_count;                       /* scale modulo counter */
  76     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
  77     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
  78     int         i_uval, i_vval;                           /* U and V samples */
  79     int         i_red, i_green, i_blue;          /* U and V modified samples */
  80     uint16_t *  p_yuv = p_vout->chroma.p_sys->p_rgb16;
  81     uint16_t *  p_ybase;                     /* Y dependant conversion table */
  82
  83     /* Conversion buffer pointer */
  84     uint16_t *  p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
  85     uint16_t *  p_buffer;
  86
  87     /* Offset array pointer */
  88     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
  89     int *       p_offset;
  90
  91     const int i_source_margin = p_src->p[0].i_pitch
  92                                  - p_src->p[0].i_visible_pitch;
  93     const int i_source_margin_c = p_src->p[1].i_pitch
  94                                  - p_src->p[1].i_visible_pitch;
  95
  96     /* The dithering matrices */
  97     int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
  98     int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
  99     int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
 100     int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
 101
 102     for(i_x = 0; i_x < 4; i_x++)
 103     {
 104         dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
 105         dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
 106         dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
 107         dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
 108     }
 109
 110     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 111
 112     if( p_vout->render.i_width & 7 )
 113     {
 114         i_rewind = 8 - ( p_vout->render.i_width & 7 );
 115     }
 116     else
 117     {
 118         i_rewind = 0;
 119     }
 120
 121     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 122      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 123      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 124     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
 125                p_vout->output.i_width, p_vout->output.i_height,
 126                &b_hscale, &i_vscale, p_offset_start );
 127
 128     /*
 129      * Perform conversion
 130      */
 131     i_scale_count = ( i_vscale == 1 ) ?
 132                     p_vout->output.i_height : p_vout->render.i_height;
 133     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 134     {
 135         i_real_y = i_y & 0x3;
 136         p_pic_start = p_pic;
 137         p_buffer = b_hscale ? p_buffer_start : p_pic;
 138
 139         for ( i_x = p_vout->render.i_width / 8; i_x--; )
 140         {
 141             int *p_dither = dither10;
 142             CONVERT_YUV_PIXEL_DITHER(2);
 143             p_dither = dither11;
 144             CONVERT_Y_PIXEL_DITHER(2);
 145             p_dither = dither12;
 146             CONVERT_YUV_PIXEL_DITHER(2);
 147             p_dither = dither13;
 148             CONVERT_Y_PIXEL_DITHER(2);
 149             p_dither = dither10;
 150             CONVERT_YUV_PIXEL_DITHER(2);
 151             p_dither = dither11;
 152             CONVERT_Y_PIXEL_DITHER(2);
 153             p_dither = dither12;
 154             CONVERT_YUV_PIXEL_DITHER(2);
 155             p_dither = dither13;
 156             CONVERT_Y_PIXEL_DITHER(2);
 157         }
 158
 159         /* Here we do some unaligned reads and duplicate conversions, but
 160          * at least we have all the pixels */
 161         if( i_rewind )
 162         {
 163             int *p_dither = dither10;
 164             p_y -= i_rewind;
 165             p_u -= i_rewind >> 1;
 166             p_v -= i_rewind >> 1;
 167             p_buffer -= i_rewind;
 168             CONVERT_YUV_PIXEL_DITHER(2);
 169             p_dither = dither11;
 170             CONVERT_Y_PIXEL_DITHER(2);
 171             p_dither = dither12;
 172             CONVERT_YUV_PIXEL_DITHER(2);
 173             p_dither = dither13;
 174             CONVERT_Y_PIXEL_DITHER(2);
 175             p_dither = dither10;
 176             CONVERT_YUV_PIXEL_DITHER(2);
 177             p_dither = dither11;
 178             CONVERT_Y_PIXEL_DITHER(2);
 179             p_dither = dither12;
 180             CONVERT_YUV_PIXEL_DITHER(2);
 181             p_dither = dither13;
 182             CONVERT_Y_PIXEL_DITHER(2);
 183         }
 184         SCALE_WIDTH;
 185         SCALE_HEIGHT( 420, 2 );
 186
 187         p_y += i_source_margin;
 188         if( i_y % 2 )
 189         {
 190             p_u += i_source_margin_c;
 191             p_v += i_source_margin_c;
 192         }
 193     }
 194 }
 195 #endif
 196
 197 /*****************************************************************************
 198  * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
 199  *****************************************************************************
 200  * Horizontal alignment needed:
 201  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
 202  *  - output: 1 pixel (2 bytes), margins allowed
 203  * Vertical alignment needed:
 204  *  - input: 2 lines (2 Y lines, 1 U/V line)
 205  *  - output: 1 line
 206  *****************************************************************************/
 207
 208 #if defined (MODULE_NAME_IS_i420_rgb)
 209
 210 void E_(I420_RGB16)( vout_thread_t *p_vout, picture_t *p_src,
 211                                             picture_t *p_dest )
 212 {
 213     /* We got this one from the old arguments */
 214     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
 215     uint8_t  *p_y   = p_src->Y_PIXELS;
 216     uint8_t  *p_u   = p_src->U_PIXELS;
 217     uint8_t  *p_v   = p_src->V_PIXELS;
 218
 219     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
 220     unsigned int i_vscale;                          /* vertical scaling type */
 221     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 222
 223     int         i_right_margin;
 224     int         i_rewind;
 225     int         i_scale_count;                       /* scale modulo counter */
 226     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
 227     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
 228     int         i_uval, i_vval;                           /* U and V samples */
 229     int         i_red, i_green, i_blue;          /* U and V modified samples */
 230     uint16_t *  p_yuv = p_vout->chroma.p_sys->p_rgb16;
 231     uint16_t *  p_ybase;                     /* Y dependant conversion table */
 232
 233     /* Conversion buffer pointer */
 234     uint16_t *  p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
 235     uint16_t *  p_buffer;
 236
 237     /* Offset array pointer */
 238     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
 239     int *       p_offset;
 240
 241     const int i_source_margin = p_src->p[0].i_pitch
 242                                  - p_src->p[0].i_visible_pitch;
 243     const int i_source_margin_c = p_src->p[1].i_pitch
 244                                  - p_src->p[1].i_visible_pitch;
 245
 246     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 247
 248     if( p_vout->render.i_width & 7 )
 249     {
 250         i_rewind = 8 - ( p_vout->render.i_width & 7 );
 251     }
 252     else
 253     {
 254         i_rewind = 0;
 255     }
 256
 257     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 258      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 259      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 260     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
 261                p_vout->output.i_width, p_vout->output.i_height,
 262                &b_hscale, &i_vscale, p_offset_start );
 263
 264     /*
 265      * Perform conversion
 266      */
 267     i_scale_count = ( i_vscale == 1 ) ?
 268                     p_vout->output.i_height : p_vout->render.i_height;
 269     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 270     {
 271         p_pic_start = p_pic;
 272         p_buffer = b_hscale ? p_buffer_start : p_pic;
 273
 274         for ( i_x = p_vout->render.i_width / 8; i_x--; )
 275         {
 276             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 277             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 278             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 279             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 280         }
 281
 282         /* Here we do some unaligned reads and duplicate conversions, but
 283          * at least we have all the pixels */
 284         if( i_rewind )
 285         {
 286             p_y -= i_rewind;
 287             p_u -= i_rewind >> 1;
 288             p_v -= i_rewind >> 1;
 289             p_buffer -= i_rewind;
 290
 291             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 292             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 293             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 294             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 295         }
 296         SCALE_WIDTH;
 297         SCALE_HEIGHT( 420, 2 );
 298
 299         p_y += i_source_margin;
 300         if( i_y % 2 )
 301         {
 302             p_u += i_source_margin_c;
 303             p_v += i_source_margin_c;
 304         }
 305     }
 306 }
 307
 308 #else // ! defined (MODULE_NAME_IS_i420_rgb)
 309
 310 void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
 311                                             picture_t *p_dest )
 312 {
 313     /* We got this one from the old arguments */
 314     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
 315     uint8_t  *p_y   = p_src->Y_PIXELS;
 316     uint8_t  *p_u   = p_src->U_PIXELS;
 317     uint8_t  *p_v   = p_src->V_PIXELS;
 318
 319     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
 320     unsigned int i_vscale;                          /* vertical scaling type */
 321     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 322
 323     int         i_right_margin;
 324     int         i_rewind;
 325     int         i_scale_count;                       /* scale modulo counter */
 326     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
 327     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
 328
 329     /* Conversion buffer pointer */
 330     uint16_t *  p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
 331     uint16_t *  p_buffer;
 332
 333     /* Offset array pointer */
 334     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
 335     int *       p_offset;
 336
 337     const int i_source_margin = p_src->p[0].i_pitch
 338                                  - p_src->p[0].i_visible_pitch;
 339     const int i_source_margin_c = p_src->p[1].i_pitch
 340                                  - p_src->p[1].i_visible_pitch;
 341
 342     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 343
 344     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 345      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 346      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 347     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
 348                p_vout->output.i_width, p_vout->output.i_height,
 349                &b_hscale, &i_vscale, p_offset_start );
 350
 351
 352     /*
 353      * Perform conversion
 354      */
 355     i_scale_count = ( i_vscale == 1 ) ?
 356                     p_vout->output.i_height : p_vout->render.i_height;
 357
 358 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
 359
 360     if( p_vout->render.i_width & 15 )
 361     {
 362         i_rewind = 16 - ( p_vout->render.i_width & 15 );
 363     }
 364     else
 365     {
 366         i_rewind = 0;
 367     }
 368
 369     /*
 370     ** SSE2 128 bits fetch/store instructions are faster
 371     ** if memory access is 16 bytes aligned
 372     */
 373
 374     p_buffer = b_hscale ? p_buffer_start : p_pic;
 375     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
 376                     p_dest->p->i_pitch|
 377                     ((intptr_t)p_y)|
 378                     ((intptr_t)p_buffer))) )
 379     {
 380         /* use faster SSE2 aligned fetch and store */
 381         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 382         {
 383             p_pic_start = p_pic;
 384
 385             for ( i_x = p_vout->render.i_width/16; i_x--; )
 386             {
 387                 SSE2_CALL (
 388                     SSE2_INIT_16_ALIGNED
 389                     SSE2_YUV_MUL
 390                     SSE2_YUV_ADD
 391                     SSE2_UNPACK_15_ALIGNED
 392                 );
 393                 p_y += 16;
 394                 p_u += 8;
 395                 p_v += 8;
 396                 p_buffer += 16;
 397             }
 398             /* Here we do some unaligned reads and duplicate conversions, but
 399              * at least we have all the pixels */
 400             if( i_rewind )
 401             {
 402                 p_y -= i_rewind;
 403                 p_u -= i_rewind >> 1;
 404                 p_v -= i_rewind >> 1;
 405                 p_buffer -= i_rewind;
 406
 407                 SSE2_CALL (
 408                     SSE2_INIT_16_UNALIGNED
 409                     SSE2_YUV_MUL
 410                     SSE2_YUV_ADD
 411                     SSE2_UNPACK_15_UNALIGNED
 412                 );
 413                 p_y += 16;
 414                 p_u += 8;
 415                 p_v += 8;
 416             }
 417             SCALE_WIDTH;
 418             SCALE_HEIGHT( 420, 2 );
 419
 420             p_y += i_source_margin;
 421             if( i_y % 2 )
 422             {
 423                 p_u += i_source_margin_c;
 424                 p_v += i_source_margin_c;
 425             }
 426             p_buffer = b_hscale ? p_buffer_start : p_pic;
 427         }
 428     }
 429     else
 430     {
 431         /* use slower SSE2 unaligned fetch and store */
 432         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 433         {
 434             p_pic_start = p_pic;
 435             p_buffer = b_hscale ? p_buffer_start : p_pic;
 436
 437             for ( i_x = p_vout->render.i_width/16; i_x--; )
 438             {
 439                 SSE2_CALL (
 440                     SSE2_INIT_16_UNALIGNED
 441                     SSE2_YUV_MUL
 442                     SSE2_YUV_ADD
 443                     SSE2_UNPACK_15_UNALIGNED
 444                 );
 445                 p_y += 16;
 446                 p_u += 8;
 447                 p_v += 8;
 448                 p_buffer += 16;
 449             }
 450             /* Here we do some unaligned reads and duplicate conversions, but
 451              * at least we have all the pixels */
 452             if( i_rewind )
 453             {
 454                 p_y -= i_rewind;
 455                 p_u -= i_rewind >> 1;
 456                 p_v -= i_rewind >> 1;
 457                 p_buffer -= i_rewind;
 458
 459                 SSE2_CALL (
 460                     SSE2_INIT_16_UNALIGNED
 461                     SSE2_YUV_MUL
 462                     SSE2_YUV_ADD
 463                     SSE2_UNPACK_15_UNALIGNED
 464                 );
 465                 p_y += 16;
 466                 p_u += 8;
 467                 p_v += 8;
 468             }
 469             SCALE_WIDTH;
 470             SCALE_HEIGHT( 420, 2 );
 471
 472             p_y += i_source_margin;
 473             if( i_y % 2 )
 474             {
 475                 p_u += i_source_margin_c;
 476                 p_v += i_source_margin_c;
 477             }
 478             p_buffer = b_hscale ? p_buffer_start : p_pic;
 479         }
 480     }
 481
 482     /* make sure all SSE2 stores are visible thereafter */
 483     SSE2_END;
 484
 485 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
 486
 487     if( p_vout->render.i_width & 7 )
 488     {
 489         i_rewind = 8 - ( p_vout->render.i_width & 7 );
 490     }
 491     else
 492     {
 493         i_rewind = 0;
 494     }
 495
 496     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 497     {
 498         p_pic_start = p_pic;
 499         p_buffer = b_hscale ? p_buffer_start : p_pic;
 500
 501         for ( i_x = p_vout->render.i_width / 8; i_x--; )
 502         {
 503             MMX_CALL (
 504                 MMX_INIT_16
 505                 MMX_YUV_MUL
 506                 MMX_YUV_ADD
 507                 MMX_UNPACK_15
 508             );
 509             p_y += 8;
 510             p_u += 4;
 511             p_v += 4;
 512             p_buffer += 8;
 513         }
 514
 515         /* Here we do some unaligned reads and duplicate conversions, but
 516          * at least we have all the pixels */
 517         if( i_rewind )
 518         {
 519             p_y -= i_rewind;
 520             p_u -= i_rewind >> 1;
 521             p_v -= i_rewind >> 1;
 522             p_buffer -= i_rewind;
 523
 524             MMX_CALL (
 525                 MMX_INIT_16
 526                 MMX_YUV_MUL
 527                 MMX_YUV_ADD
 528                 MMX_UNPACK_15
 529             );
 530             p_y += 8;
 531             p_u += 4;
 532             p_v += 4;
 533             p_buffer += 8;
 534         }
 535         SCALE_WIDTH;
 536         SCALE_HEIGHT( 420, 2 );
 537
 538         p_y += i_source_margin;
 539         if( i_y % 2 )
 540         {
 541             p_u += i_source_margin_c;
 542             p_v += i_source_margin_c;
 543         }
 544     }
 545     /* re-enable FPU registers */
 546     MMX_END;
 547
 548 #endif
 549 }
 550
 551 void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
 552                                             picture_t *p_dest )
 553 {
 554     /* We got this one from the old arguments */
 555     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
 556     uint8_t  *p_y   = p_src->Y_PIXELS;
 557     uint8_t  *p_u   = p_src->U_PIXELS;
 558     uint8_t  *p_v   = p_src->V_PIXELS;
 559
 560     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
 561     unsigned int i_vscale;                          /* vertical scaling type */
 562     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 563
 564     int         i_right_margin;
 565     int         i_rewind;
 566     int         i_scale_count;                       /* scale modulo counter */
 567     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
 568     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
 569
 570     /* Conversion buffer pointer */
 571     uint16_t *  p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
 572     uint16_t *  p_buffer;
 573
 574     /* Offset array pointer */
 575     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
 576     int *       p_offset;
 577
 578     const int i_source_margin = p_src->p[0].i_pitch
 579                                  - p_src->p[0].i_visible_pitch;
 580     const int i_source_margin_c = p_src->p[1].i_pitch
 581                                  - p_src->p[1].i_visible_pitch;
 582
 583     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 584
 585     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 586      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 587      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 588     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
 589                p_vout->output.i_width, p_vout->output.i_height,
 590                &b_hscale, &i_vscale, p_offset_start );
 591
 592
 593     /*
 594      * Perform conversion
 595      */
 596     i_scale_count = ( i_vscale == 1 ) ?
 597                     p_vout->output.i_height : p_vout->render.i_height;
 598
 599 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
 600
 601     if( p_vout->render.i_width & 15 )
 602     {
 603         i_rewind = 16 - ( p_vout->render.i_width & 15 );
 604     }
 605     else
 606     {
 607         i_rewind = 0;
 608     }
 609
 610     /*
 611     ** SSE2 128 bits fetch/store instructions are faster
 612     ** if memory access is 16 bytes aligned
 613     */
 614
 615     p_buffer = b_hscale ? p_buffer_start : p_pic;
 616     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
 617                     p_dest->p->i_pitch|
 618                     ((intptr_t)p_y)|
 619                     ((intptr_t)p_buffer))) )
 620     {
 621         /* use faster SSE2 aligned fetch and store */
 622         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 623         {
 624             p_pic_start = p_pic;
 625
 626             for ( i_x = p_vout->render.i_width/16; i_x--; )
 627             {
 628                 SSE2_CALL (
 629                     SSE2_INIT_16_ALIGNED
 630                     SSE2_YUV_MUL
 631                     SSE2_YUV_ADD
 632                     SSE2_UNPACK_16_ALIGNED
 633                 );
 634                 p_y += 16;
 635                 p_u += 8;
 636                 p_v += 8;
 637                 p_buffer += 16;
 638             }
 639             /* Here we do some unaligned reads and duplicate conversions, but
 640              * at least we have all the pixels */
 641             if( i_rewind )
 642             {
 643                 p_y -= i_rewind;
 644                 p_u -= i_rewind >> 1;
 645                 p_v -= i_rewind >> 1;
 646                 p_buffer -= i_rewind;
 647
 648                 SSE2_CALL (
 649                     SSE2_INIT_16_UNALIGNED
 650                     SSE2_YUV_MUL
 651                     SSE2_YUV_ADD
 652                     SSE2_UNPACK_16_UNALIGNED
 653                 );
 654                 p_y += 16;
 655                 p_u += 8;
 656                 p_v += 8;
 657             }
 658             SCALE_WIDTH;
 659             SCALE_HEIGHT( 420, 2 );
 660
 661             p_y += i_source_margin;
 662             if( i_y % 2 )
 663             {
 664                 p_u += i_source_margin_c;
 665                 p_v += i_source_margin_c;
 666             }
 667             p_buffer = b_hscale ? p_buffer_start : p_pic;
 668         }
 669     }
 670     else
 671     {
 672         /* use slower SSE2 unaligned fetch and store */
 673         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 674         {
 675             p_pic_start = p_pic;
 676             p_buffer = b_hscale ? p_buffer_start : p_pic;
 677
 678             for ( i_x = p_vout->render.i_width/16; i_x--; )
 679             {
 680                 SSE2_CALL(
 681                     SSE2_INIT_16_UNALIGNED
 682                     SSE2_YUV_MUL
 683                     SSE2_YUV_ADD
 684                     SSE2_UNPACK_16_UNALIGNED
 685                 );
 686                 p_y += 16;
 687                 p_u += 8;
 688                 p_v += 8;
 689                 p_buffer += 16;
 690             }
 691             /* Here we do some unaligned reads and duplicate conversions, but
 692              * at least we have all the pixels */
 693             if( i_rewind )
 694             {
 695                 p_y -= i_rewind;
 696                 p_u -= i_rewind >> 1;
 697                 p_v -= i_rewind >> 1;
 698                 p_buffer -= i_rewind;
 699
 700                 SSE2_CALL(
 701                     SSE2_INIT_16_UNALIGNED
 702                     SSE2_YUV_MUL
 703                     SSE2_YUV_ADD
 704                     SSE2_UNPACK_16_UNALIGNED
 705                 );
 706                 p_y += 16;
 707                 p_u += 8;
 708                 p_v += 8;
 709             }
 710             SCALE_WIDTH;
 711             SCALE_HEIGHT( 420, 2 );
 712
 713             p_y += i_source_margin;
 714             if( i_y % 2 )
 715             {
 716                 p_u += i_source_margin_c;
 717                 p_v += i_source_margin_c;
 718             }
 719             p_buffer = b_hscale ? p_buffer_start : p_pic;
 720         }
 721     }
 722
 723     /* make sure all SSE2 stores are visible thereafter */
 724     SSE2_END;
 725
 726 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
 727
 728     if( p_vout->render.i_width & 7 )
 729     {
 730         i_rewind = 8 - ( p_vout->render.i_width & 7 );
 731     }
 732     else
 733     {
 734         i_rewind = 0;
 735     }
 736
 737     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 738     {
 739         p_pic_start = p_pic;
 740         p_buffer = b_hscale ? p_buffer_start : p_pic;
 741
 742         for ( i_x = p_vout->render.i_width / 8; i_x--; )
 743         {
 744             MMX_CALL (
 745                 MMX_INIT_16
 746                 MMX_YUV_MUL
 747                 MMX_YUV_ADD
 748                 MMX_UNPACK_16
 749             );
 750             p_y += 8;
 751             p_u += 4;
 752             p_v += 4;
 753             p_buffer += 8;
 754         }
 755
 756         /* Here we do some unaligned reads and duplicate conversions, but
 757          * at least we have all the pixels */
 758         if( i_rewind )
 759         {
 760             p_y -= i_rewind;
 761             p_u -= i_rewind >> 1;
 762             p_v -= i_rewind >> 1;
 763             p_buffer -= i_rewind;
 764
 765             MMX_CALL (
 766                 MMX_INIT_16
 767                 MMX_YUV_MUL
 768                 MMX_YUV_ADD
 769                 MMX_UNPACK_16
 770             );
 771             p_y += 8;
 772             p_u += 4;
 773             p_v += 4;
 774             p_buffer += 8;
 775         }
 776         SCALE_WIDTH;
 777         SCALE_HEIGHT( 420, 2 );
 778
 779         p_y += i_source_margin;
 780         if( i_y % 2 )
 781         {
 782             p_u += i_source_margin_c;
 783             p_v += i_source_margin_c;
 784         }
 785     }
 786     /* re-enable FPU registers */
 787     MMX_END;
 788
 789 #endif
 790 }
 791
 792 #endif
 793
 794 /*****************************************************************************
 795  * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
 796  *****************************************************************************
 797  * Horizontal alignment needed:
 798  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
 799  *  - output: 1 pixel (2 bytes), margins allowed
 800  * Vertical alignment needed:
 801  *  - input: 2 lines (2 Y lines, 1 U/V line)
 802  *  - output: 1 line
 803  *****************************************************************************/
 804
 805 #if defined (MODULE_NAME_IS_i420_rgb)
 806
 807 void E_(I420_RGB32)( vout_thread_t *p_vout, picture_t *p_src,
 808                                             picture_t *p_dest )
 809 {
 810     /* We got this one from the old arguments */
 811     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
 812     uint8_t  *p_y   = p_src->Y_PIXELS;
 813     uint8_t  *p_u   = p_src->U_PIXELS;
 814     uint8_t  *p_v   = p_src->V_PIXELS;
 815
 816     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
 817     unsigned int i_vscale;                          /* vertical scaling type */
 818     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 819
 820     int         i_right_margin;
 821     int         i_rewind;
 822     int         i_scale_count;                       /* scale modulo counter */
 823     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
 824     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
 825     int         i_uval, i_vval;                           /* U and V samples */
 826     int         i_red, i_green, i_blue;          /* U and V modified samples */
 827     uint32_t *  p_yuv = p_vout->chroma.p_sys->p_rgb32;
 828     uint32_t *  p_ybase;                     /* Y dependant conversion table */
 829
 830     /* Conversion buffer pointer */
 831     uint32_t *  p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
 832     uint32_t *  p_buffer;
 833
 834     /* Offset array pointer */
 835     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
 836     int *       p_offset;
 837
 838     const int i_source_margin = p_src->p[0].i_pitch
 839                                  - p_src->p[0].i_visible_pitch;
 840     const int i_source_margin_c = p_src->p[1].i_pitch
 841                                  - p_src->p[1].i_visible_pitch;
 842
 843     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 844
 845     if( p_vout->render.i_width & 7 )
 846     {
 847         i_rewind = 8 - ( p_vout->render.i_width & 7 );
 848     }
 849     else
 850     {
 851         i_rewind = 0;
 852     }
 853
 854     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 855      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 856      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 857     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
 858                p_vout->output.i_width, p_vout->output.i_height,
 859                &b_hscale, &i_vscale, p_offset_start );
 860
 861     /*
 862      * Perform conversion
 863      */
 864     i_scale_count = ( i_vscale == 1 ) ?
 865                     p_vout->output.i_height : p_vout->render.i_height;
 866     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 867     {
 868         p_pic_start = p_pic;
 869         p_buffer = b_hscale ? p_buffer_start : p_pic;
 870
 871         for ( i_x = p_vout->render.i_width / 8; i_x--; )
 872         {
 873             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 874             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 875             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 876             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 877         }
 878
 879         /* Here we do some unaligned reads and duplicate conversions, but
 880          * at least we have all the pixels */
 881         if( i_rewind )
 882         {
 883             p_y -= i_rewind;
 884             p_u -= i_rewind >> 1;
 885             p_v -= i_rewind >> 1;
 886             p_buffer -= i_rewind;
 887             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 888             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 889             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 890             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 891         }
 892         SCALE_WIDTH;
 893         SCALE_HEIGHT( 420, 4 );
 894
 895         p_y += i_source_margin;
 896         if( i_y % 2 )
 897         {
 898             p_u += i_source_margin_c;
 899             p_v += i_source_margin_c;
 900         }
 901     }
 902 }
 903
 904 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
 905
 906 void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
 907                                             picture_t *p_dest )
 908 {
 909     /* We got this one from the old arguments */
 910     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
 911     uint8_t  *p_y   = p_src->Y_PIXELS;
 912     uint8_t  *p_u   = p_src->U_PIXELS;
 913     uint8_t  *p_v   = p_src->V_PIXELS;
 914
 915     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
 916     unsigned int i_vscale;                          /* vertical scaling type */
 917     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 918
 919     int         i_right_margin;
 920     int         i_rewind;
 921     int         i_scale_count;                       /* scale modulo counter */
 922     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
 923     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
 924     /* Conversion buffer pointer */
 925     uint32_t *  p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
 926     uint32_t *  p_buffer;
 927
 928     /* Offset array pointer */
 929     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
 930     int *       p_offset;
 931
 932     const int i_source_margin = p_src->p[0].i_pitch
 933                                  - p_src->p[0].i_visible_pitch;
 934     const int i_source_margin_c = p_src->p[1].i_pitch
 935                                  - p_src->p[1].i_visible_pitch;
 936
 937     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 938
 939     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 940      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 941      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 942     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
 943                p_vout->output.i_width, p_vout->output.i_height,
 944                &b_hscale, &i_vscale, p_offset_start );
 945
 946     /*
 947      * Perform conversion
 948      */
 949     i_scale_count = ( i_vscale == 1 ) ?
 950                     p_vout->output.i_height : p_vout->render.i_height;
 951
 952 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
 953
 954     if( p_vout->render.i_width & 15 )
 955     {
 956         i_rewind = 16 - ( p_vout->render.i_width & 15 );
 957     }
 958     else
 959     {
 960         i_rewind = 0;
 961     }
 962
 963     /*
 964     ** SSE2 128 bits fetch/store instructions are faster
 965     ** if memory access is 16 bytes aligned
 966     */
 967
 968     p_buffer = b_hscale ? p_buffer_start : p_pic;
 969     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
 970                     p_dest->p->i_pitch|
 971                     ((intptr_t)p_y)|
 972                     ((intptr_t)p_buffer))) )
 973     {
 974         /* use faster SSE2 aligned fetch and store */
 975         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
 976         {
 977             p_pic_start = p_pic;
 978
 979             for ( i_x = p_vout->render.i_width / 16; i_x--; )
 980             {
 981                 SSE2_CALL (
 982                     SSE2_INIT_32_ALIGNED
 983                     SSE2_YUV_MUL
 984                     SSE2_YUV_ADD
 985                     SSE2_UNPACK_32_ARGB_ALIGNED
 986                 );
 987                 p_y += 16;
 988                 p_u += 8;
 989                 p_v += 8;
 990                 p_buffer += 16;
 991             }
 992
 993             /* Here we do some unaligned reads and duplicate conversions, but
 994              * at least we have all the pixels */
 995             if( i_rewind )
 996             {
 997                 p_y -= i_rewind;
 998                 p_u -= i_rewind >> 1;
 999                 p_v -= i_rewind >> 1;
1000                 p_buffer -= i_rewind;
1001                 SSE2_CALL (
1002                     SSE2_INIT_32_UNALIGNED
1003                     SSE2_YUV_MUL
1004                     SSE2_YUV_ADD
1005                     SSE2_UNPACK_32_ARGB_UNALIGNED
1006                 );
1007                 p_y += 16;
1008                 p_u += 4;
1009                 p_v += 4;
1010             }
1011             SCALE_WIDTH;
1012             SCALE_HEIGHT( 420, 4 );
1013
1014             p_y += i_source_margin;
1015             if( i_y % 2 )
1016             {
1017                 p_u += i_source_margin_c;
1018                 p_v += i_source_margin_c;
1019             }
1020             p_buffer = b_hscale ? p_buffer_start : p_pic;
1021         }
1022     }
1023     else
1024     {
1025         /* use slower SSE2 unaligned fetch and store */
1026         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1027         {
1028             p_pic_start = p_pic;
1029             p_buffer = b_hscale ? p_buffer_start : p_pic;
1030
1031             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1032             {
1033                 SSE2_CALL (
1034                     SSE2_INIT_32_UNALIGNED
1035                     SSE2_YUV_MUL
1036                     SSE2_YUV_ADD
1037                     SSE2_UNPACK_32_ARGB_UNALIGNED
1038                 );
1039                 p_y += 16;
1040                 p_u += 8;
1041                 p_v += 8;
1042                 p_buffer += 16;
1043             }
1044
1045             /* Here we do some unaligned reads and duplicate conversions, but
1046              * at least we have all the pixels */
1047             if( i_rewind )
1048             {
1049                 p_y -= i_rewind;
1050                 p_u -= i_rewind >> 1;
1051                 p_v -= i_rewind >> 1;
1052                 p_buffer -= i_rewind;
1053                 SSE2_CALL (
1054                     SSE2_INIT_32_UNALIGNED
1055                     SSE2_YUV_MUL
1056                     SSE2_YUV_ADD
1057                     SSE2_UNPACK_32_ARGB_UNALIGNED
1058                 );
1059                 p_y += 16;
1060                 p_u += 8;
1061                 p_v += 8;
1062             }
1063             SCALE_WIDTH;
1064             SCALE_HEIGHT( 420, 4 );
1065
1066             p_y += i_source_margin;
1067             if( i_y % 2 )
1068             {
1069                 p_u += i_source_margin_c;
1070                 p_v += i_source_margin_c;
1071             }
1072             p_buffer = b_hscale ? p_buffer_start : p_pic;
1073         }
1074     }
1075
1076     /* make sure all SSE2 stores are visible thereafter */
1077     SSE2_END;
1078
1079 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1080
1081     if( p_vout->render.i_width & 7 )
1082     {
1083         i_rewind = 8 - ( p_vout->render.i_width & 7 );
1084     }
1085     else
1086     {
1087         i_rewind = 0;
1088     }
1089
1090     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1091     {
1092         p_pic_start = p_pic;
1093         p_buffer = b_hscale ? p_buffer_start : p_pic;
1094
1095         for ( i_x = p_vout->render.i_width / 8; i_x--; )
1096         {
1097             MMX_CALL (
1098                 MMX_INIT_32
1099                 MMX_YUV_MUL
1100                 MMX_YUV_ADD
1101                 MMX_UNPACK_32_ARGB
1102             );
1103             p_y += 8;
1104             p_u += 4;
1105             p_v += 4;
1106             p_buffer += 8;
1107         }
1108
1109         /* Here we do some unaligned reads and duplicate conversions, but
1110          * at least we have all the pixels */
1111         if( i_rewind )
1112         {
1113             p_y -= i_rewind;
1114             p_u -= i_rewind >> 1;
1115             p_v -= i_rewind >> 1;
1116             p_buffer -= i_rewind;
1117             MMX_CALL (
1118                 MMX_INIT_32
1119                 MMX_YUV_MUL
1120                 MMX_YUV_ADD
1121                 MMX_UNPACK_32_ARGB
1122             );
1123             p_y += 8;
1124             p_u += 4;
1125             p_v += 4;
1126             p_buffer += 8;
1127         }
1128         SCALE_WIDTH;
1129         SCALE_HEIGHT( 420, 4 );
1130
1131         p_y += i_source_margin;
1132         if( i_y % 2 )
1133         {
1134             p_u += i_source_margin_c;
1135             p_v += i_source_margin_c;
1136         }
1137     }
1138
1139     /* re-enable FPU registers */
1140     MMX_END;
1141
1142 #endif
1143 }
1144
1145 void E_(I420_R8G8B8A8)( vout_thread_t *p_vout, picture_t *p_src,
1146                                             picture_t *p_dest )
1147 {
1148     /* We got this one from the old arguments */
1149     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1150     uint8_t  *p_y   = p_src->Y_PIXELS;
1151     uint8_t  *p_u   = p_src->U_PIXELS;
1152     uint8_t  *p_v   = p_src->V_PIXELS;
1153
1154     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
1155     unsigned int i_vscale;                          /* vertical scaling type */
1156     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
1157
1158     int         i_right_margin;
1159     int         i_rewind;
1160     int         i_scale_count;                       /* scale modulo counter */
1161     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
1162     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
1163     /* Conversion buffer pointer */
1164     uint32_t *  p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
1165     uint32_t *  p_buffer;
1166
1167     /* Offset array pointer */
1168     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
1169     int *       p_offset;
1170
1171     const int i_source_margin = p_src->p[0].i_pitch
1172                                  - p_src->p[0].i_visible_pitch;
1173     const int i_source_margin_c = p_src->p[1].i_pitch
1174                                  - p_src->p[1].i_visible_pitch;
1175
1176     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1177
1178     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1179      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1180      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1181     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
1182                p_vout->output.i_width, p_vout->output.i_height,
1183                &b_hscale, &i_vscale, p_offset_start );
1184
1185     /*
1186      * Perform conversion
1187      */
1188     i_scale_count = ( i_vscale == 1 ) ?
1189                     p_vout->output.i_height : p_vout->render.i_height;
1190
1191 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1192
1193     if( p_vout->render.i_width & 15 )
1194     {
1195         i_rewind = 16 - ( p_vout->render.i_width & 15 );
1196     }
1197     else
1198     {
1199         i_rewind = 0;
1200     }
1201
1202     /*
1203     ** SSE2 128 bits fetch/store instructions are faster
1204     ** if memory access is 16 bytes aligned
1205     */
1206
1207     p_buffer = b_hscale ? p_buffer_start : p_pic;
1208     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1209                     p_dest->p->i_pitch|
1210                     ((intptr_t)p_y)|
1211                     ((intptr_t)p_buffer))) )
1212     {
1213         /* use faster SSE2 aligned fetch and store */
1214         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1215         {
1216             p_pic_start = p_pic;
1217
1218             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1219             {
1220                 SSE2_CALL (
1221                     SSE2_INIT_32_ALIGNED
1222                     SSE2_YUV_MUL
1223                     SSE2_YUV_ADD
1224                     SSE2_UNPACK_32_RGBA_ALIGNED
1225                 );
1226                 p_y += 16;
1227                 p_u += 8;
1228                 p_v += 8;
1229                 p_buffer += 16;
1230             }
1231
1232             /* Here we do some unaligned reads and duplicate conversions, but
1233              * at least we have all the pixels */
1234             if( i_rewind )
1235             {
1236                 p_y -= i_rewind;
1237                 p_u -= i_rewind >> 1;
1238                 p_v -= i_rewind >> 1;
1239                 p_buffer -= i_rewind;
1240                 SSE2_CALL (
1241                     SSE2_INIT_32_UNALIGNED
1242                     SSE2_YUV_MUL
1243                     SSE2_YUV_ADD
1244                     SSE2_UNPACK_32_RGBA_UNALIGNED
1245                 );
1246                 p_y += 16;
1247                 p_u += 4;
1248                 p_v += 4;
1249             }
1250             SCALE_WIDTH;
1251             SCALE_HEIGHT( 420, 4 );
1252
1253             p_y += i_source_margin;
1254             if( i_y % 2 )
1255             {
1256                 p_u += i_source_margin_c;
1257                 p_v += i_source_margin_c;
1258             }
1259             p_buffer = b_hscale ? p_buffer_start : p_pic;
1260         }
1261     }
1262     else
1263     {
1264         /* use slower SSE2 unaligned fetch and store */
1265         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1266         {
1267             p_pic_start = p_pic;
1268             p_buffer = b_hscale ? p_buffer_start : p_pic;
1269
1270             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1271             {
1272                 SSE2_CALL (
1273                     SSE2_INIT_32_UNALIGNED
1274                     SSE2_YUV_MUL
1275                     SSE2_YUV_ADD
1276                     SSE2_UNPACK_32_RGBA_UNALIGNED
1277                 );
1278                 p_y += 16;
1279                 p_u += 8;
1280                 p_v += 8;
1281                 p_buffer += 16;
1282             }
1283
1284             /* Here we do some unaligned reads and duplicate conversions, but
1285              * at least we have all the pixels */
1286             if( i_rewind )
1287             {
1288                 p_y -= i_rewind;
1289                 p_u -= i_rewind >> 1;
1290                 p_v -= i_rewind >> 1;
1291                 p_buffer -= i_rewind;
1292                 SSE2_CALL (
1293                     SSE2_INIT_32_UNALIGNED
1294                     SSE2_YUV_MUL
1295                     SSE2_YUV_ADD
1296                     SSE2_UNPACK_32_RGBA_UNALIGNED
1297                 );
1298                 p_y += 16;
1299                 p_u += 8;
1300                 p_v += 8;
1301             }
1302             SCALE_WIDTH;
1303             SCALE_HEIGHT( 420, 4 );
1304
1305             p_y += i_source_margin;
1306             if( i_y % 2 )
1307             {
1308                 p_u += i_source_margin_c;
1309                 p_v += i_source_margin_c;
1310             }
1311             p_buffer = b_hscale ? p_buffer_start : p_pic;
1312         }
1313     }
1314
1315     /* make sure all SSE2 stores are visible thereafter */
1316     SSE2_END;
1317
1318 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1319
1320     if( p_vout->render.i_width & 7 )
1321     {
1322         i_rewind = 8 - ( p_vout->render.i_width & 7 );
1323     }
1324     else
1325     {
1326         i_rewind = 0;
1327     }
1328
1329     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1330     {
1331         p_pic_start = p_pic;
1332         p_buffer = b_hscale ? p_buffer_start : p_pic;
1333
1334         for ( i_x = p_vout->render.i_width / 8; i_x--; )
1335         {
1336             MMX_CALL (
1337                 MMX_INIT_32
1338                 MMX_YUV_MUL
1339                 MMX_YUV_ADD
1340                 MMX_UNPACK_32_RGBA
1341             );
1342             p_y += 8;
1343             p_u += 4;
1344             p_v += 4;
1345             p_buffer += 8;
1346         }
1347
1348         /* Here we do some unaligned reads and duplicate conversions, but
1349          * at least we have all the pixels */
1350         if( i_rewind )
1351         {
1352             p_y -= i_rewind;
1353             p_u -= i_rewind >> 1;
1354             p_v -= i_rewind >> 1;
1355             p_buffer -= i_rewind;
1356             MMX_CALL (
1357                 MMX_INIT_32
1358                 MMX_YUV_MUL
1359                 MMX_YUV_ADD
1360                 MMX_UNPACK_32_RGBA
1361             );
1362             p_y += 8;
1363             p_u += 4;
1364             p_v += 4;
1365             p_buffer += 8;
1366         }
1367         SCALE_WIDTH;
1368         SCALE_HEIGHT( 420, 4 );
1369
1370         p_y += i_source_margin;
1371         if( i_y % 2 )
1372         {
1373             p_u += i_source_margin_c;
1374             p_v += i_source_margin_c;
1375         }
1376     }
1377
1378     /* re-enable FPU registers */
1379     MMX_END;
1380
1381 #endif
1382 }
1383
1384 void E_(I420_B8G8R8A8)( vout_thread_t *p_vout, picture_t *p_src,
1385                                             picture_t *p_dest )
1386 {
1387     /* We got this one from the old arguments */
1388     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1389     uint8_t  *p_y   = p_src->Y_PIXELS;
1390     uint8_t  *p_u   = p_src->U_PIXELS;
1391     uint8_t  *p_v   = p_src->V_PIXELS;
1392
1393     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
1394     unsigned int i_vscale;                          /* vertical scaling type */
1395     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
1396
1397     int         i_right_margin;
1398     int         i_rewind;
1399     int         i_scale_count;                       /* scale modulo counter */
1400     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
1401     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
1402     /* Conversion buffer pointer */
1403     uint32_t *  p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
1404     uint32_t *  p_buffer;
1405
1406     /* Offset array pointer */
1407     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
1408     int *       p_offset;
1409
1410     const int i_source_margin = p_src->p[0].i_pitch
1411                                  - p_src->p[0].i_visible_pitch;
1412     const int i_source_margin_c = p_src->p[1].i_pitch
1413                                  - p_src->p[1].i_visible_pitch;
1414
1415     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1416
1417     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1418      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1419      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1420     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
1421                p_vout->output.i_width, p_vout->output.i_height,
1422                &b_hscale, &i_vscale, p_offset_start );
1423
1424     /*
1425      * Perform conversion
1426      */
1427     i_scale_count = ( i_vscale == 1 ) ?
1428                     p_vout->output.i_height : p_vout->render.i_height;
1429
1430 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1431
1432     if( p_vout->render.i_width & 15 )
1433     {
1434         i_rewind = 16 - ( p_vout->render.i_width & 15 );
1435     }
1436     else
1437     {
1438         i_rewind = 0;
1439     }
1440
1441     /*
1442     ** SSE2 128 bits fetch/store instructions are faster
1443     ** if memory access is 16 bytes aligned
1444     */
1445
1446     p_buffer = b_hscale ? p_buffer_start : p_pic;
1447     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1448                     p_dest->p->i_pitch|
1449                     ((intptr_t)p_y)|
1450                     ((intptr_t)p_buffer))) )
1451     {
1452         /* use faster SSE2 aligned fetch and store */
1453         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1454         {
1455             p_pic_start = p_pic;
1456
1457             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1458             {
1459                 SSE2_CALL (
1460                     SSE2_INIT_32_ALIGNED
1461                     SSE2_YUV_MUL
1462                     SSE2_YUV_ADD
1463                     SSE2_UNPACK_32_BGRA_ALIGNED
1464                 );
1465                 p_y += 16;
1466                 p_u += 8;
1467                 p_v += 8;
1468                 p_buffer += 16;
1469             }
1470
1471             /* Here we do some unaligned reads and duplicate conversions, but
1472              * at least we have all the pixels */
1473             if( i_rewind )
1474             {
1475                 p_y -= i_rewind;
1476                 p_u -= i_rewind >> 1;
1477                 p_v -= i_rewind >> 1;
1478                 p_buffer -= i_rewind;
1479                 SSE2_CALL (
1480                     SSE2_INIT_32_UNALIGNED
1481                     SSE2_YUV_MUL
1482                     SSE2_YUV_ADD
1483                     SSE2_UNPACK_32_BGRA_UNALIGNED
1484                 );
1485                 p_y += 16;
1486                 p_u += 4;
1487                 p_v += 4;
1488             }
1489             SCALE_WIDTH;
1490             SCALE_HEIGHT( 420, 4 );
1491
1492             p_y += i_source_margin;
1493             if( i_y % 2 )
1494             {
1495                 p_u += i_source_margin_c;
1496                 p_v += i_source_margin_c;
1497             }
1498             p_buffer = b_hscale ? p_buffer_start : p_pic;
1499         }
1500     }
1501     else
1502     {
1503         /* use slower SSE2 unaligned fetch and store */
1504         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1505         {
1506             p_pic_start = p_pic;
1507             p_buffer = b_hscale ? p_buffer_start : p_pic;
1508
1509             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1510             {
1511                 SSE2_CALL (
1512                     SSE2_INIT_32_UNALIGNED
1513                     SSE2_YUV_MUL
1514                     SSE2_YUV_ADD
1515                     SSE2_UNPACK_32_BGRA_UNALIGNED
1516                 );
1517                 p_y += 16;
1518                 p_u += 8;
1519                 p_v += 8;
1520                 p_buffer += 16;
1521             }
1522
1523             /* Here we do some unaligned reads and duplicate conversions, but
1524              * at least we have all the pixels */
1525             if( i_rewind )
1526             {
1527                 p_y -= i_rewind;
1528                 p_u -= i_rewind >> 1;
1529                 p_v -= i_rewind >> 1;
1530                 p_buffer -= i_rewind;
1531                 SSE2_CALL (
1532                     SSE2_INIT_32_UNALIGNED
1533                     SSE2_YUV_MUL
1534                     SSE2_YUV_ADD
1535                     SSE2_UNPACK_32_BGRA_UNALIGNED
1536                 );
1537                 p_y += 16;
1538                 p_u += 8;
1539                 p_v += 8;
1540             }
1541             SCALE_WIDTH;
1542             SCALE_HEIGHT( 420, 4 );
1543
1544             p_y += i_source_margin;
1545             if( i_y % 2 )
1546             {
1547                 p_u += i_source_margin_c;
1548                 p_v += i_source_margin_c;
1549             }
1550             p_buffer = b_hscale ? p_buffer_start : p_pic;
1551         }
1552     }
1553
1554 #else
1555
1556     if( p_vout->render.i_width & 7 )
1557     {
1558         i_rewind = 8 - ( p_vout->render.i_width & 7 );
1559     }
1560     else
1561     {
1562         i_rewind = 0;
1563     }
1564
1565     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1566     {
1567         p_pic_start = p_pic;
1568         p_buffer = b_hscale ? p_buffer_start : p_pic;
1569
1570         for ( i_x = p_vout->render.i_width / 8; i_x--; )
1571         {
1572             MMX_CALL (
1573                 MMX_INIT_32
1574                 MMX_YUV_MUL
1575                 MMX_YUV_ADD
1576                 MMX_UNPACK_32_BGRA
1577             );
1578             p_y += 8;
1579             p_u += 4;
1580             p_v += 4;
1581             p_buffer += 8;
1582         }
1583
1584         /* Here we do some unaligned reads and duplicate conversions, but
1585          * at least we have all the pixels */
1586         if( i_rewind )
1587         {
1588             p_y -= i_rewind;
1589             p_u -= i_rewind >> 1;
1590             p_v -= i_rewind >> 1;
1591             p_buffer -= i_rewind;
1592             MMX_CALL (
1593                 MMX_INIT_32
1594                 MMX_YUV_MUL
1595                 MMX_YUV_ADD
1596                 MMX_UNPACK_32_BGRA
1597             );
1598             p_y += 8;
1599             p_u += 4;
1600             p_v += 4;
1601             p_buffer += 8;
1602         }
1603         SCALE_WIDTH;
1604         SCALE_HEIGHT( 420, 4 );
1605
1606         p_y += i_source_margin;
1607         if( i_y % 2 )
1608         {
1609             p_u += i_source_margin_c;
1610             p_v += i_source_margin_c;
1611         }
1612     }
1613
1614     /* re-enable FPU registers */
1615     MMX_END;
1616
1617 #endif
1618 }
1619
1620 void E_(I420_A8B8G8R8)( vout_thread_t *p_vout, picture_t *p_src,
1621                                             picture_t *p_dest )
1622 {
1623     /* We got this one from the old arguments */
1624     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1625     uint8_t  *p_y   = p_src->Y_PIXELS;
1626     uint8_t  *p_u   = p_src->U_PIXELS;
1627     uint8_t  *p_v   = p_src->V_PIXELS;
1628
1629     vlc_bool_t  b_hscale;                         /* horizontal scaling type */
1630     unsigned int i_vscale;                          /* vertical scaling type */
1631     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
1632
1633     int         i_right_margin;
1634     int         i_rewind;
1635     int         i_scale_count;                       /* scale modulo counter */
1636     int         i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
1637     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
1638     /* Conversion buffer pointer */
1639     uint32_t *  p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
1640     uint32_t *  p_buffer;
1641
1642     /* Offset array pointer */
1643     int *       p_offset_start = p_vout->chroma.p_sys->p_offset;
1644     int *       p_offset;
1645
1646     const int i_source_margin = p_src->p[0].i_pitch
1647                                  - p_src->p[0].i_visible_pitch;
1648     const int i_source_margin_c = p_src->p[1].i_pitch
1649                                  - p_src->p[1].i_visible_pitch;
1650
1651     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1652
1653     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1654      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1655      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1656     SetOffset( p_vout->render.i_width, p_vout->render.i_height,
1657                p_vout->output.i_width, p_vout->output.i_height,
1658                &b_hscale, &i_vscale, p_offset_start );
1659
1660     /*
1661      * Perform conversion
1662      */
1663     i_scale_count = ( i_vscale == 1 ) ?
1664                     p_vout->output.i_height : p_vout->render.i_height;
1665
1666 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1667
1668     if( p_vout->render.i_width & 15 )
1669     {
1670         i_rewind = 16 - ( p_vout->render.i_width & 15 );
1671     }
1672     else
1673     {
1674         i_rewind = 0;
1675     }
1676
1677     /*
1678     ** SSE2 128 bits fetch/store instructions are faster
1679     ** if memory access is 16 bytes aligned
1680     */
1681
1682     p_buffer = b_hscale ? p_buffer_start : p_pic;
1683     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1684                     p_dest->p->i_pitch|
1685                     ((intptr_t)p_y)|
1686                     ((intptr_t)p_buffer))) )
1687     {
1688         /* use faster SSE2 aligned fetch and store */
1689         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1690         {
1691             p_pic_start = p_pic;
1692
1693             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1694             {
1695                 SSE2_CALL (
1696                     SSE2_INIT_32_ALIGNED
1697                     SSE2_YUV_MUL
1698                     SSE2_YUV_ADD
1699                     SSE2_UNPACK_32_ABGR_ALIGNED
1700                 );
1701                 p_y += 16;
1702                 p_u += 8;
1703                 p_v += 8;
1704                 p_buffer += 16;
1705             }
1706
1707             /* Here we do some unaligned reads and duplicate conversions, but
1708              * at least we have all the pixels */
1709             if( i_rewind )
1710             {
1711                 p_y -= i_rewind;
1712                 p_u -= i_rewind >> 1;
1713                 p_v -= i_rewind >> 1;
1714                 p_buffer -= i_rewind;
1715                 SSE2_CALL (
1716                     SSE2_INIT_32_UNALIGNED
1717                     SSE2_YUV_MUL
1718                     SSE2_YUV_ADD
1719                     SSE2_UNPACK_32_ABGR_UNALIGNED
1720                 );
1721                 p_y += 16;
1722                 p_u += 4;
1723                 p_v += 4;
1724             }
1725             SCALE_WIDTH;
1726             SCALE_HEIGHT( 420, 4 );
1727
1728             p_y += i_source_margin;
1729             if( i_y % 2 )
1730             {
1731                 p_u += i_source_margin_c;
1732                 p_v += i_source_margin_c;
1733             }
1734             p_buffer = b_hscale ? p_buffer_start : p_pic;
1735         }
1736     }
1737     else
1738     {
1739         /* use slower SSE2 unaligned fetch and store */
1740         for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1741         {
1742             p_pic_start = p_pic;
1743             p_buffer = b_hscale ? p_buffer_start : p_pic;
1744
1745             for ( i_x = p_vout->render.i_width / 16; i_x--; )
1746             {
1747                 SSE2_CALL (
1748                     SSE2_INIT_32_UNALIGNED
1749                     SSE2_YUV_MUL
1750                     SSE2_YUV_ADD
1751                     SSE2_UNPACK_32_ABGR_UNALIGNED
1752                 );
1753                 p_y += 16;
1754                 p_u += 8;
1755                 p_v += 8;
1756                 p_buffer += 16;
1757             }
1758
1759             /* Here we do some unaligned reads and duplicate conversions, but
1760              * at least we have all the pixels */
1761             if( i_rewind )
1762             {
1763                 p_y -= i_rewind;
1764                 p_u -= i_rewind >> 1;
1765                 p_v -= i_rewind >> 1;
1766                 p_buffer -= i_rewind;
1767                 SSE2_CALL (
1768                     SSE2_INIT_32_UNALIGNED
1769                     SSE2_YUV_MUL
1770                     SSE2_YUV_ADD
1771                     SSE2_UNPACK_32_ABGR_UNALIGNED
1772                 );
1773                 p_y += 16;
1774                 p_u += 8;
1775                 p_v += 8;
1776             }
1777             SCALE_WIDTH;
1778             SCALE_HEIGHT( 420, 4 );
1779
1780             p_y += i_source_margin;
1781             if( i_y % 2 )
1782             {
1783                 p_u += i_source_margin_c;
1784                 p_v += i_source_margin_c;
1785             }
1786             p_buffer = b_hscale ? p_buffer_start : p_pic;
1787         }
1788     }
1789
1790 #else
1791
1792     if( p_vout->render.i_width & 7 )
1793     {
1794         i_rewind = 8 - ( p_vout->render.i_width & 7 );
1795     }
1796     else
1797     {
1798         i_rewind = 0;
1799     }
1800
1801     for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1802     {
1803         p_pic_start = p_pic;
1804         p_buffer = b_hscale ? p_buffer_start : p_pic;
1805
1806         for ( i_x = p_vout->render.i_width / 8; i_x--; )
1807         {
1808             MMX_CALL (
1809                 MMX_INIT_32
1810                 MMX_YUV_MUL
1811                 MMX_YUV_ADD
1812                 MMX_UNPACK_32_ABGR
1813             );
1814             p_y += 8;
1815             p_u += 4;
1816             p_v += 4;
1817             p_buffer += 8;
1818         }
1819
1820         /* Here we do some unaligned reads and duplicate conversions, but
1821          * at least we have all the pixels */
1822         if( i_rewind )
1823         {
1824             p_y -= i_rewind;
1825             p_u -= i_rewind >> 1;
1826             p_v -= i_rewind >> 1;
1827             p_buffer -= i_rewind;
1828             MMX_CALL (
1829                 MMX_INIT_32
1830                 MMX_YUV_MUL
1831                 MMX_YUV_ADD
1832                 MMX_UNPACK_32_ABGR
1833             );
1834             p_y += 8;
1835             p_u += 4;
1836             p_v += 4;
1837             p_buffer += 8;
1838         }
1839         SCALE_WIDTH;
1840         SCALE_HEIGHT( 420, 4 );
1841
1842         p_y += i_source_margin;
1843         if( i_y % 2 )
1844         {
1845             p_u += i_source_margin_c;
1846             p_v += i_source_margin_c;
1847         }
1848     }
1849
1850     /* re-enable FPU registers */
1851     MMX_END;
1852
1853 #endif
1854 }
1855
1856 #endif
1857
1858 /* Following functions are local */
1859
1860 /*****************************************************************************
1861  * SetOffset: build offset array for conversion functions
1862  *****************************************************************************
1863  * This function will build an offset array used in later conversion functions.
1864  * It will also set horizontal and vertical scaling indicators.
1865  *****************************************************************************/
1866 static void SetOffset( int i_width, int i_height, int i_pic_width,
1867                        int i_pic_height, vlc_bool_t *pb_hscale,
1868                        unsigned int *pi_vscale, int *p_offset )
1869 {
1870     int i_x;                                    /* x position in destination */
1871     int i_scale_count;                                     /* modulo counter */
1872
1873     /*
1874      * Prepare horizontal offset array
1875      */
1876     if( i_pic_width - i_width == 0 )
1877     {
1878         /* No horizontal scaling: YUV conversion is done directly to picture */
1879         *pb_hscale = 0;
1880     }
1881     else if( i_pic_width - i_width > 0 )
1882     {
1883         /* Prepare scaling array for horizontal extension */
1884         *pb_hscale = 1;
1885         i_scale_count = i_pic_width;
1886         for( i_x = i_width; i_x--; )
1887         {
1888             while( (i_scale_count -= i_width) > 0 )
1889             {
1890                 *p_offset++ = 0;
1891             }
1892             *p_offset++ = 1;
1893             i_scale_count += i_pic_width;
1894         }
1895     }
1896     else /* if( i_pic_width - i_width < 0 ) */
1897     {
1898         /* Prepare scaling array for horizontal reduction */
1899         *pb_hscale = 1;
1900         i_scale_count = i_width;
1901         for( i_x = i_pic_width; i_x--; )
1902         {
1903             *p_offset = 1;
1904             while( (i_scale_count -= i_pic_width) > 0 )
1905             {
1906                 *p_offset += 1;
1907             }
1908             p_offset++;
1909             i_scale_count += i_width;
1910         }
1911     }
1912
1913     /*
1914      * Set vertical scaling indicator
1915      */
1916     if( i_pic_height - i_height == 0 )
1917     {
1918         *pi_vscale = 0;
1919     }
1920     else if( i_pic_height - i_height > 0 )
1921     {
1922         *pi_vscale = 1;
1923     }
1924     else /* if( i_pic_height - i_height < 0 ) */
1925     {
1926         *pi_vscale = -1;
1927     }
1928 }
1929