]> git.sesse.net Git - vlc/commitdiff
Integration de display.c � vout.
authorVincent Seguin <seguin@videolan.org>
Sun, 16 Jan 2000 16:43:10 +0000 (16:43 +0000)
committerVincent Seguin <seguin@videolan.org>
Sun, 16 Jan 2000 16:43:10 +0000 (16:43 +0000)
Mise au point du driver X11 (plus de XShm error).
Incrustation de texte en X11, calcul des FPS, etc...
Int�gration de la conversion MMX.
Mode 'niveaux de gris' pour les machines lentes non MMX (les pauvres !)
Attention: les drivers GGI et FB ne sont pas � jour...

15 files changed:
.cvsignore
Makefile
include/config.h
include/main.h
include/video.h
include/video_output.h
include/video_sys.h
include/vlc.h
src/interface/intf_msg.c
src/interface/main.c
src/misc/mtime.c
src/video_output/video_ggi.c
src/video_output/video_output.c
src/video_output/video_x11.c
src/video_output/yuv_mmx.S [new file with mode: 0644]

index 96942a1cb9057c2e67b2015dfb63e3c144e72c49..9b6a76c7d7ee3e4553fcd796e0d93c19b923da10 100644 (file)
@@ -5,3 +5,4 @@ gmon.out
 vlc
 vlc.init
 vlc-debug.log
+vlc-debug.ct
index feb73a387c899c832ba0c5919669d71f7422c5b4..a53066a41fd2bd3c25f0beba5a0ec6ea3d9d40b3 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -215,7 +215,7 @@ endif
 
 ifeq ($(ARCH),MMX)
 ASM_OBJ =                      video_decoder_ref/idctmmx.o \
-                                               video_decoder_ref/yuv12-rgb16.o
+                                               video_output/yuv-mmx.o
 endif
 
 C_OBJ = $(interface_obj) \
index df37b29f9caa2f86f897593f3f71a9837b8c58f7..450253acb5ec0f83be185bc9b66d30b2464cd612 100644 (file)
@@ -60,7 +60,7 @@
 //#define MPEG2_COMPLIANT
 
 /* Define for profiling support */
-//#define STATS
+#define STATS
 
 /* Define for unthreaded version of the program - ?? not yet implemented */
 //#define NO_THREAD
  * (~1 Mbyte) before using huge values */
 #define VOUT_MAX_PICTURES               10
 
+/* Environment variable for grayscale output mode, and default value */
+#define VOUT_GRAYSCALE_VAR              "vlc_grayscale"
+#define VOUT_GRAYSCALE_DEFAULT          0
+
+/* Number of pictures required to computes the FPS rate */
+#define VOUT_FPS_SAMPLES                5
+
 /*
  * Time settings
  */
 /* Time during which the thread will sleep if it has nothing to 
  * display (in micro-seconds) */
 /* ?? this constant will probably evolve to a calculated value */
-#define VOUT_IDLE_SLEEP                 50000
+#define VOUT_IDLE_SLEEP                 20000
 
 /* Maximum lap of time allowed between the beginning of rendering and
  * display. If, compared to the current date, the next image is too
  * at least VOUT_IDLE_SLEEP plus the time required to render a few
  * images, to avoid trashing of decoded images */
 /* ?? this constant will probably evolve to a calculated value */
-#define VOUT_DISPLAY_DELAY              150000
+#define VOUT_DISPLAY_DELAY              100000
 
 /*
  * Framebuffer settings
 /* Allow use of X11 XShm (shared memory) extension if possible */
 #define VOUT_XSHM                       1
 
+/* Font maximum and minimum characters - characters outside this range are not
+ * printed - maximum range is 0-256 */
+#define VOUT_MIN_CHAR 1
+#define VOUT_MAX_CHAR 128
+
 /*******************************************************************************
  * Video parser configuration
  *******************************************************************************/
 
 /* Maximal size of the message queue - in case of overflow, all messages in the
  * queue are printed by the calling thread */
-#define INTF_MSG_QSIZE                  32
+#define INTF_MSG_QSIZE                  64
 
 /* Define to enable messages queues - disabling messages queue can be usefull
  * when debugging, since it allows messages which would not otherwise be printed,
  * due to a crash, to be printed anyway */
-//#define INTF_MSG_QUEUE
+#define INTF_MSG_QUEUE
 
 /* Format of the header for debug messages. The arguments following this header
  * are the file (char *), the function (char *) and the line (int) in which the
index d8ea417171739fa0534bed037d62dac4c8182c62..2a328e6d9f99b641950b0e013e2abbd6d20f08bb 100644 (file)
@@ -45,4 +45,5 @@ extern main_t *p_main;
  *******************************************************************************/
 int    main_GetIntVariable( char *psz_name, int i_default );
 char * main_GetPszVariable( char *psz_name, char *psz_default );
-
+void   main_PutIntVariable( char *psz_name, int i_value );
+void   main_PutPszVariable( char *psz_name, char *psz_value );
index 8715ce646a64c5bd19c3035a9b4fdc9325078aff..e1cdc88116815694de2b486ca6dbfac6f48dde33 100644 (file)
@@ -14,7 +14,7 @@
 /*******************************************************************************
  * yuv_data_t: type for storing one Y, U or V sample.
  *******************************************************************************/
-typedef s16 yuv_data_t;
+typedef u8 yuv_data_t;
 
 /*******************************************************************************
  * picture_t: video picture                                            
index 53ffd3d284e30b1a58157e00e38d4c57a33eb339..4f974bb3f27e7779e1a7cfc3d8300b96646e10cc 100644 (file)
@@ -25,8 +25,11 @@ typedef struct vout_thread_s
     int *               pi_status;                    /* temporary status flag */
 
     /* Common display properties */
+    boolean_t           b_info;              /* print additionnal informations */    
+    boolean_t           b_grayscale;             /* color or grayscale display */    
     int                 i_width;                /* current output method width */
     int                 i_height;              /* current output method height */
+    int                 i_bytes_per_line;/* bytes per line (including virtual) */    
     int                 i_screen_depth;                      /* bits per pixel */
     int                 i_bytes_per_pixel;                /* real screen depth */
     float               f_x_ratio;                 /* horizontal display ratio */
@@ -34,9 +37,18 @@ typedef struct vout_thread_s
 
 #ifdef STATS    
     /* Statistics */
-    count_t         c_loops;                               /* number of loops */
-    count_t         c_idle_loops;                     /* number of idle loops */
-    count_t         c_pictures;           /* number of pictures added to heap */
+    count_t             c_loops;                            /* number of loops */
+    count_t             c_idle_loops;                  /* number of idle loops */
+    count_t             c_pictures;        /* number of pictures added to heap */
+
+    /* FPS */
+    mtime_t             fps_sample[ VOUT_FPS_SAMPLES ];       /* samples dates */
+    int                 i_fps_index;                       /* index in samples */
+#endif
+
+#ifdef DEBUG_VIDEO
+    /* Video debugging informations */
+    mtime_t             picture_render_time;    /* last picture rendering time */
 #endif
 
     /* Output method */
@@ -49,12 +61,12 @@ typedef struct vout_thread_s
     /* YUV translation tables, for 15,16 and 24/32 bpp displays. 16 bits and 32
      * bits pointers points on the same data.
      * CAUTION: these tables are translated: their origin is -384 */
-    u16 *           pi_trans16_red;
-    u16 *           pi_trans16_green;
-    u16 *           pi_trans16_blue;
-    u32 *           pi_trans32_red;
-    u32 *           pi_trans32_green;
-    u32 *           pi_trans32_blue;          
+    u16 *               pi_trans16_red;
+    u16 *               pi_trans16_green;
+    u16 *               pi_trans16_blue;
+    u32 *               pi_trans32_red;
+    u32 *               pi_trans32_green;
+    u32 *               pi_trans32_blue;          
 } vout_thread_t;
 
 /*******************************************************************************
index 02dd1eb0eaae55d11ff0c9414aa9924811426783..090390c7d0a523359b75f98a3659fbd47e459123 100644 (file)
@@ -16,6 +16,10 @@ void         vout_SysEnd        ( p_vout_thread_t p_vout );
 void         vout_SysDestroy    ( p_vout_thread_t p_vout );
 int          vout_SysManage     ( p_vout_thread_t p_vout );
 void         vout_SysDisplay    ( p_vout_thread_t p_vout );
-byte_t *     vout_SysGetPicture ( p_vout_thread_t p_vout, int *pi_eol_offset );
+byte_t *     vout_SysGetPicture ( p_vout_thread_t p_vout );
+void         vout_SysPrint      ( p_vout_thread_t p_vout, int i_x, int i_y, 
+                                  int i_halign, int i_valign,  
+                                  unsigned char *psz_text );
+
 
 
index c39a5d3ee546e5317bc4984efc7ac9f98fe90a0c..21246b7951406987b0ea42424db266bbb5488805 100644 (file)
 
 /* Video */
 #include "video.h"
-#include "video_sys.h"
 #include "video_output.h"
 #include "video_decoder.h"
 
 /* Interface */
 #include "intf_cmd.h"
 #include "intf_ctrl.h"
-#include "intf_sys.h"
-#include "intf_console.h"
 #include "interface.h"
 
 #include "main.h"
index 3b0e11e3827dee60c724495f6a36b8e5a83fda4e..8f68b8606d0303a223e5ae509b18cba8deccb267 100644 (file)
@@ -119,8 +119,8 @@ p_intf_msg_t intf_MsgCreate( void )
     {
 #ifdef INTF_MSG_QUEUE
     /* Message queue initialization */
-    vlc_mutex_init( &p_intf_msg->lock );                     /* intialize lock */
-    p_intf_msg->i_count = 0;                                 /* queue is empty */
+    vlc_mutex_init( &p_msg->lock );                          /* intialize lock */
+    p_msg->i_count = 0;                                      /* queue is empty */
 #endif
 
 #ifdef DEBUG_LOG
@@ -293,9 +293,9 @@ void _intf_DbgMsgImm( char *psz_file, char *psz_function, int i_line,
 #ifdef INTF_MSG_QUEUE
 void intf_FlushMsg( void )
 {
-    vlc_mutex_lock( &p_program_data->intf_msg.lock );              /* get lock */
-    FlushLockedMsg( &p_program_data->intf_msg );             /* flush messages */
-    vlc_mutex_unlock( &p_program_data->intf_msg.lock );      /* give lock back */
+    vlc_mutex_lock( &p_main->p_msg->lock );                        /* get lock */
+    FlushLockedMsg( p_main->p_msg );                         /* flush messages */
+    vlc_mutex_unlock( &p_main->p_msg->lock );                /* give lock back */
 }
 #endif
 
@@ -325,7 +325,7 @@ static void QueueMsg( intf_msg_t *p_msg, int i_type, char *psz_format, va_list a
     vasprintf( &psz_str, psz_format, ap );
     if( psz_str == NULL )
     {
-        fprintf(stderr, "Warning: can't store following message (%s): ", 
+        fprintf(stderr, "warning: can't store following message (%s): ", 
                 strerror(errno) );
         vfprintf(stderr, psz_format, ap );
         exit( errno );
@@ -336,7 +336,7 @@ static void QueueMsg( intf_msg_t *p_msg, int i_type, char *psz_format, va_list a
     if( p_msg->i_count == INTF_MSG_QSIZE )            /* flush queue if needed */
     {  
 #ifdef DEBUG                 /* in debug mode, queue overflow causes a warning */
-        fprintf(stderr, "Warning: message queue overflow\n" );
+        fprintf(stderr, "warning: message queue overflow\n" );
 #endif
         FlushLockedMsg( p_msg );
     }
@@ -381,7 +381,7 @@ static void QueueDbgMsg(intf_msg_t *p_msg, char *psz_file, char *psz_function,
     vasprintf( &psz_str, psz_format, ap );
     if( psz_str == NULL )
     {
-        fprintf(stderr, "Warning: can't store following message (%s): ", 
+        fprintf(stderr, "warning: can't store following message (%s): ", 
                 strerror(errno) );
         fprintf(stderr, INTF_MSG_DBG_FORMAT, psz_file, psz_function, i_line );
        vfprintf(stderr, psz_format, ap );
@@ -393,7 +393,7 @@ static void QueueDbgMsg(intf_msg_t *p_msg, char *psz_file, char *psz_function,
     if( p_msg->i_count == INTF_MSG_QSIZE )            /* flush queue if needed */
     {  
 #ifdef DEBUG                 /* in debug mode, queue overflow causes a warning */
-        fprintf(stderr, "Warning: message queue overflow\n" );
+        fprintf(stderr, "warning: message queue overflow\n" );
 #endif
         FlushLockedMsg( p_msg );
     }
@@ -481,7 +481,7 @@ static void PrintMsg( intf_msg_item_t *p_msg )
     /* Check if formatting function suceeded */
     if( psz_msg == NULL )
     {
-        fprintf( stderr, "intf error: *** can not format message (%s): %s ***\n", 
+        fprintf( stderr, "error: can not format message (%s): %s\n", 
                  strerror( errno ), p_msg->psz_msg );        
         return;        
     }
@@ -519,7 +519,7 @@ static void PrintMsg( intf_msg_item_t *p_msg )
 
 #else
 
-static void PrintMsg( interface_msg_message_t *p_msg )
+static void PrintMsg( intf_msg_item_t *p_msg )
 {
     /*
      * Print messages on screen 
@@ -534,8 +534,7 @@ static void PrintMsg( interface_msg_message_t *p_msg )
         fprintf( stderr, p_msg->psz_msg );
         break;
     case INTF_MSG_INTF:                                  /* interface messages */
-        intf_PrintXConsole( &p_main->intf_thread.xconsole, 
-                            p_msg->psz_msg );
+        intf_ConsolePrint( p_main->p_intf->p_console, p_msg->psz_msg );
         break;
     } 
 }
index 0483c4ba1259b4056bf89b015c573c5a291b555f..b5033b26da13793cd2649eacb8d0b068f0170c3b 100644 (file)
 #define OPT_NOAUDIO             150
 #define OPT_STEREO              151
 #define OPT_MONO                152
-#define OPT_RATE                153
 
 #define OPT_NOVIDEO             160
-#define OPT_XDGA                161
-#define OPT_XSHM                162
-#define OPT_XNOSHM              163
-#define OPT_XNODGA              164
+#define OPT_COLOR               161
 
 #define OPT_NOVLANS             170
-#define OPT_VLAN_SERVER         171
  
 /* Long options */
 static const struct option longopts[] =
@@ -64,6 +59,8 @@ static const struct option longopts[] =
 
     /* Video options */
     {   "novideo",          0,          0,      OPT_NOVIDEO },           
+    {   "grayscale",        0,          0,      'g' },    
+    {   "color",            0,          0,      OPT_COLOR },                
 
     /* VLAN management options */
     {   "novlans",          0,          0,      OPT_NOVLANS },
@@ -72,7 +69,7 @@ static const struct option longopts[] =
 };
 
 /* Short options */
-static const char *psz_shortopts = "h";
+static const char *psz_shortopts = "hg";
 
 /*******************************************************************************
  * Global variable program_data - this is the one and only, see main.h
@@ -187,18 +184,19 @@ int main( int i_argc, char *ppsz_argv[], char *ppsz_env[] )
  *******************************************************************************/
 int main_GetIntVariable( char *psz_name, int i_default )
 {
-    char *psz_env;
+    char *      psz_env;                                  /* environment value */
+    char *      psz_end;                               /* end of parsing index */
+    long int    i_value;                                              /* value */
 
     psz_env = getenv( psz_name );
     if( psz_env )
-    {
-       psz_env = strchr( psz_env, '=' );
-       if( psz_env )
-       {
-           return( atoi( psz_env + 1) );
-       }
+    {        
+        i_value = strtol( psz_env, &psz_end, 0 );
+        if( (*psz_env != '\0') && (*psz_end == '\0') )
+        {
+            return( i_value );
+        }        
     }  
-    
     return( i_default );
 }
 
@@ -214,16 +212,52 @@ char * main_GetPszVariable( char *psz_name, char *psz_default )
     psz_env = getenv( psz_name );
     if( psz_env )
     {
-       psz_env = strchr( psz_env, '=' );
-       if( psz_env )
-       {
-           return( psz_env + 1 );
-       }
+        return( psz_env );
     }
-
     return( psz_default );    
 }
 
+/*******************************************************************************
+ * main_PutPszVariable: set the string value of an environment variable
+ *******************************************************************************
+ * This function is used to set some default parameters in modules. The use of
+ * this function will cause some memory leak: since some systems use the pointer
+ * passed to putenv to store the environment string, it can't be freed.
+ *******************************************************************************/
+void main_PutPszVariable( char *psz_name, char *psz_value )
+{
+    char *psz_env;
+
+    psz_env = malloc( strlen(psz_name) + strlen(psz_value) + 2 );
+    if( psz_env == NULL )
+    {
+        intf_ErrMsg("error: %s\n", strerror(ENOMEM));        
+    }
+    else
+    {
+        sprintf( psz_env, "%s=%s", psz_name, psz_value );
+        if( putenv( psz_env ) )
+        {
+            intf_ErrMsg("error: %s\n", strerror(errno));
+        }        
+    }
+}
+
+/*******************************************************************************
+ * main_PutIntVariable: set the integer value of an environment variable
+ *******************************************************************************
+ * This function is used to set some default parameters in modules. The use of
+ * this function will cause some memory leak: since some systems use the pointer
+ * passed to putenv to store the environment string, it can't be freed.
+ *******************************************************************************/
+void main_PutIntVariable( char *psz_name, int i_value )
+{
+    char psz_value[ 256 ];                                 /* buffer for value */    
+
+    sprintf(psz_value, "%d", i_value );        
+    main_PutPszVariable( psz_name, psz_value );    
+}
+
 /* following functions are local */
 
 /*******************************************************************************
@@ -299,19 +333,23 @@ static int GetConfiguration( int i_argc, char *ppsz_argv[], char *ppsz_env[] )
         case OPT_NOAUDIO:                                        /* --noaudio */
            p_main->b_audio = 0;
             break;
-        case OPT_STEREO:                                          /* --stereo */           
-           // ?? should be replaced by a putenv
-           //p_main->p_aout->dsp.b_stereo = 1;
+        case OPT_STEREO:                                          /* --stereo */
+            main_PutIntVariable( AOUT_STEREO_VAR, 1 );
             break;
         case OPT_MONO:                                              /* --mono */
-           // ?? should be replaced by a putenv
-           //p_main->p_aout->dsp.b_stereo = 0;
+            main_PutIntVariable( AOUT_STEREO_VAR, 0 );
             break;
 
         /* Video options */
         case OPT_NOVIDEO:                                         /* --novideo */
             p_main->b_video = 0;
             break;       
+        case 'g':                                           /* -g, --grayscale */
+            main_PutIntVariable( VOUT_GRAYSCALE_VAR, 1 );
+            break;            
+        case OPT_COLOR:                                             /* --color */
+            main_PutIntVariable( VOUT_GRAYSCALE_VAR, 0 );
+            break;            
 
         /* VLAN management options */
         case OPT_NOVLANS:                                         /* --novlans */
@@ -352,10 +390,12 @@ static void Usage( void )
     /* Options */
     intf_Msg("Options:" \
              "  -h, --help                      print usage\n" \
+             "  -g, --grayscale                 grayscale video\n" \
              "  --noaudio                       disable audio\n" \
              "  --stereo                        enable stereo\n" \
              "  --mono                          disable stereo\n"
              "  --novideo                       disable video\n" \
+             "  --color                         color video\n" \
              "  --novlans                              disable vlans\n" \
              );
 
@@ -374,6 +414,7 @@ static void Usage( void )
     /* Video parameters */
     intf_Msg("Video parameters:\n" \
              "  " VOUT_FB_DEV_VAR "=<filename>           framebuffer device path\n" \
+             "  " VOUT_GRAYSCALE_VAR "={1|0}             grayscale or color output\n" \
             ); 
 
     /* Vlan parameters */
index 9b27f39cc024de857b7041b2f4bae69d808ef0ca..7eb61012323dcf2564af0ffe8e85ff39a317e81a 100644 (file)
@@ -12,6 +12,7 @@
  * Preamble
  ******************************************************************************/
 #include <stdio.h>
+#include <unistd.h>
 #include <sys/time.h>
 
 #include "common.h"
@@ -82,24 +83,25 @@ void mwait( mtime_t date )
     {
         return;
     }
+#ifndef usleep
     tv_delay.tv_sec = delay / 1000000;
     tv_delay.tv_usec = delay % 1000000;
 
     /* see msleep() about select() errors */
     select( 0, NULL, NULL, NULL, &tv_delay );
+#else
+    usleep( delay );    
+#endif
 }
 
 /******************************************************************************
  * msleep: more precise sleep() (inline function)                        (ok ?)
  ******************************************************************************
- * This function uses select() in a classical way to implement a sleep() call
- * with a microsecond precision.
- * For synchronization purposes, mwait() should be prefered.
- ******************************************************************************
- * ?? decalre as inline
+ * Portable usleep() function.
  ******************************************************************************/
 void msleep( mtime_t delay )
 {
+#ifndef usleep
     struct timeval tv_delay;
 
     tv_delay.tv_sec = delay / 1000000;
@@ -109,4 +111,7 @@ void msleep( mtime_t delay )
      * (i.e. when a signal is sent to the thread, or when memory is full), and
      * can be ingnored. */
     select( 0, NULL, NULL, NULL, &tv_delay );
+#else
+    usleep( delay );
+#endif
 }
index 0424722c6775be224a35d1af3a2d96110ff9cc05..2122ec660fe7f135b35bc384fdbf48b4bb6713a4 100644 (file)
@@ -137,13 +137,10 @@ void vout_SysDisplay( vout_thread_t *p_vout )
 /*******************************************************************************
  * vout_SysGetPicture: get current display buffer informations
  *******************************************************************************
- * This function returns the address of the current display buffer, and the
- * number of samples per line. For 15, 16 and 32 bits displays, this value is 
- * the number of pixels in a line.
+ * This function returns the address of the current display buffer.
  *******************************************************************************/
-byte_t * vout_SysGetPicture( vout_thread_t *p_vout, int *pi_eol_offset )
+byte_t * vout_SysGetPicture( vout_thread_t *p_vout )
 {
-    *pi_eol_offset = p_vout->i_width;
 //????
 //    return( p_vout->p_sys->p_ximage[ p_vout->p_sys->i_buffer_index ].data );        
 }
index 3eb09a5d574ec8959b542199bf1ba6568b2a21d5..1594666676ca0098e5c30d638aafb9f50bde5dbf 100644 (file)
@@ -1,6 +1,6 @@
 /*******************************************************************************
  * video_output.c : video output thread
- * (c)1999 VideoLAN
+ * (c)2000 VideoLAN
  *******************************************************************************
  * This module describes the programming interface for video output threads.
  * It includes functions allowing to open a new thread, send pictures to a
@@ -27,6 +27,7 @@
 #include "video_output.h"
 #include "video_sys.h"
 #include "intf_msg.h"
+#include "main.h"
 
 /*******************************************************************************
  * Macros
  * (0 or 255) */
 #define CLIP_BYTE( i_val ) ( (i_val < 0) ? 0 : ((i_val > 255) ? 255 : i_val) )
 
+/* YUV_GRAYSCALE: parametric macro for YUV grayscale transformation.
+ * Due to the high performance need of this loop, all possible conditions 
+ * evaluations are made outside the transformation loop. However, the code does 
+ * not change much for two different loops. This macro allows to change slightly
+ * the content of the loop without having to copy and paste code. It is used in 
+ * RenderYUVPicture function. */
+#define YUV_GRAYSCALE( TRANS_RED, TRANS_GREEN, TRANS_BLUE, P_PIC )      \
+/* Main loop */                                                         \
+for (i_pic_y=0; i_pic_y < p_pic->i_height ; i_pic_y++)                  \
+{                                                                       \
+    for (i_pic_x=0; i_pic_x< p_pic->i_width; i_pic_x++)                 \
+    {                                                                   \
+        i_y = *p_y++;                                                   \
+        *P_PIC++ = TRANS_RED[i_y] | TRANS_GREEN[i_y] | TRANS_BLUE[i_y]; \
+    }                                                                   \
+    /* Skip until beginning of next line */                             \
+    P_PIC += i_eol_offset;                                              \
+}                                                                       
+
 /* YUV_TRANSFORM: parametric macro for YUV transformation.
  * Due to the high performance need of this loop, all possible conditions 
  * evaluations are made outside the transformation loop. However, the code does 
@@ -70,11 +90,11 @@ for (i_pic_y=0; i_pic_y < p_pic->i_height ; i_pic_y++)                  \
     }                                                                   \
     if( (CHROMA == 420) && !(i_pic_y & 0x1) )                           \
     {                                                                   \
-        p_u -= p_pic->i_width;                                          \
-        p_v -= p_pic->i_width;                                          \
+        p_u -= i_chroma_width;                                          \
+        p_v -= i_chroma_width;                                          \
     }                                                                   \
     /* Skip until beginning of next line */                             \
-    P_PIC += i_pic_eol_offset - p_pic->i_width;                         \
+    P_PIC += i_eol_offset;                                              \
 }
 
 /*******************************************************************************
@@ -82,7 +102,7 @@ for (i_pic_y=0; i_pic_y < p_pic->i_height ; i_pic_y++)                  \
  *******************************************************************************/
 
 /* RGB/YUV inversion matrix (ISO/IEC 13818-2 section 6.3.6, table 6.9) */
-int matrix_coefficients_table[8][4] =
+const int MATRIX_COEFFICIENTS_TABLE[8][4] =
 {
   {117504, 138453, 13954, 34903},       /* no sequence_display_extension */
   {117504, 138453, 13954, 34903},       /* ITU-R Rec. 709 (1990) */
@@ -94,15 +114,40 @@ int matrix_coefficients_table[8][4] =
   {117579, 136230, 16907, 35559}        /* SMPTE 240M (1987) */
 };
 
+/*******************************************************************************
+ * External prototypes
+ *******************************************************************************/
+#ifdef HAVE_MMX
+/* YUV transformations for MMX - in yuv-mmx.S 
+ *      p_y, p_u, p_v:          Y U and V planes
+ *      i_width, i_height:      frames dimensions (pixels)
+ *      i_ypitch, i_vpitch:     Y and V lines sizes (bytes)
+ *      i_aspect:               vertical aspect factor
+ *      pi_pic:                 RGB frame
+ *      i_dci_offset:           ?? x offset for left image border
+ *      i_offset_to_line_0:     ?? x offset for left image border
+ *      i_pitch:                RGB line size (bytes)
+ *      i_colortype:            0 for 565, 1 for 555 */
+void vout_YUV420_16_MMX( u8* p_y, u8* p_u, u8 *p_v, 
+                         unsigned int i_width, unsigned int i_height,
+                         unsigned int i_ypitch, unsigned int i_vpitch,
+                         unsigned int i_aspect, u8 *pi_pic, 
+                         u32 i_dci_offset, u32 i_offset_to_line_0,
+                         int CCOPitch, int i_colortype );
+#endif
+
 /*******************************************************************************
  * Local prototypes
  *******************************************************************************/
-static int      InitThread      ( vout_thread_t *p_vout );
-static void     RunThread       ( vout_thread_t *p_vout );
-static void     ErrorThread     ( vout_thread_t *p_vout );
-static void     EndThread       ( vout_thread_t *p_vout );
-static void     RenderPicture   ( vout_thread_t *p_vout, picture_t *p_pic );
-static void     RenderYUVPicture( vout_thread_t *p_vout, picture_t *p_pic );
+static int      InitThread              ( vout_thread_t *p_vout );
+static void     RunThread               ( vout_thread_t *p_vout );
+static void     ErrorThread             ( vout_thread_t *p_vout );
+static void     EndThread               ( vout_thread_t *p_vout );
+static void     RenderPicture           ( vout_thread_t *p_vout, picture_t *p_pic );
+static void     RenderYUVGrayPicture    ( vout_thread_t *p_vout, picture_t *p_pic );
+static void     RenderYUV16Picture      ( vout_thread_t *p_vout, picture_t *p_pic );
+static void     RenderYUV32Picture      ( vout_thread_t *p_vout, picture_t *p_pic );
+static void     RenderInfo              ( vout_thread_t *p_vout );
 
 /*******************************************************************************
  * vout_CreateThread: creates a new video output thread
@@ -132,15 +177,20 @@ vout_thread_t * vout_CreateThread               (
 
     /* Initialize some fields used by the system-dependant method - these fields will
      * probably be modified by the method */
+    p_vout->b_info              = 0;    
+    p_vout->b_grayscale         = main_GetIntVariable( VOUT_GRAYSCALE_VAR, 
+                                                       VOUT_GRAYSCALE_DEFAULT );
     p_vout->i_width             = i_width;
     p_vout->i_height            = i_height;
+    p_vout->i_bytes_per_line    = i_width * 2;    
     p_vout->i_screen_depth      = 15;
     p_vout->i_bytes_per_pixel   = 2;
     p_vout->f_x_ratio           = 1;
     p_vout->f_y_ratio           = 1;
-    intf_DbgMsg("wished configuration: %dx%dx%d (%d bytes per pixel), ratio %f:%f\n",
+    intf_DbgMsg("wished configuration: %dx%d,%d (%d bytes/pixel, %d bytes/line), ratio %.2f:%.2f, gray=%d\n",
                 p_vout->i_width, p_vout->i_height, p_vout->i_screen_depth,
-                p_vout->i_bytes_per_pixel, p_vout->f_x_ratio, p_vout->f_y_ratio );    
+                p_vout->i_bytes_per_pixel, p_vout->i_bytes_per_line,
+                p_vout->f_x_ratio, p_vout->f_y_ratio, p_vout->b_grayscale );
    
     /* Create and initialize system-dependant method - this function issues its
      * own error messages */
@@ -153,9 +203,10 @@ vout_thread_t * vout_CreateThread               (
       free( p_vout );
       return( NULL );
     }
-    intf_DbgMsg("actual configuration: %dx%dx%d (%d bytes per pixel), ratio %f:%f\n",
+    intf_DbgMsg("actual configuration: %dx%d,%d (%d bytes/pixel, %d bytes/line), ratio %.2f:%.2f, gray=%d\n",
                 p_vout->i_width, p_vout->i_height, p_vout->i_screen_depth,
-                p_vout->i_bytes_per_pixel, p_vout->f_x_ratio, p_vout->f_y_ratio );    
+                p_vout->i_bytes_per_pixel, p_vout->i_bytes_per_line,
+                p_vout->f_x_ratio, p_vout->f_y_ratio, p_vout->b_grayscale );
   
     /* Terminate the initialization */
     p_vout->b_die               = 0;
@@ -166,7 +217,7 @@ vout_thread_t * vout_CreateThread               (
 #ifdef STATS
     p_vout->c_loops             = 0;
     p_vout->c_idle_loops        = 0;
-    p_vout->c_pictures          = 0;    
+    p_vout->c_pictures          = 0;
 #endif      
 
     /* Create thread and set locks */
@@ -237,7 +288,20 @@ void vout_DestroyThread( vout_thread_t *p_vout, int *pi_status )
  *******************************************************************************/
 void  vout_DisplayPicture( vout_thread_t *p_vout, picture_t *p_pic )
 {
-    vlc_mutex_lock( &p_vout->lock );
+#ifdef DEBUG_VIDEO
+    char        psz_date[MSTRTIME_MAX_SIZE];         /* buffer for date string */
+#endif
+  
+   vlc_mutex_lock( &p_vout->lock );
+
+#ifdef DEBUG_VIDEO
+   /* Check if picture status is valid */
+   if( p_pic->i_status != RESERVED_PICTURE )
+   {
+       intf_DbgMsg("error: picture %d has invalid status %d\n", 
+                   p_pic, p_pic->i_status );       
+   }   
+#endif
 
     /* Remove reservation flag */
     p_pic->i_status = READY_PICTURE;
@@ -247,6 +311,12 @@ void  vout_DisplayPicture( vout_thread_t *p_vout, picture_t *p_pic )
     p_vout->c_pictures++;
 #endif
 
+#ifdef DEBUG_VIDEO
+    /* Send picture informations */
+    intf_DbgMsg("picture %p: type=%d, %dx%d, date=%s\n", p_pic, p_pic->i_type, 
+                p_pic->i_width,p_pic->i_height, mstrtime( psz_date, p_pic->date ) );    
+#endif
+
     vlc_mutex_unlock( &p_vout->lock );
 }
 
@@ -286,6 +356,10 @@ picture_t *vout_CreatePicture( vout_thread_t *p_vout, int i_type,
                  * memory allocation needs to be done */
                p_vout->p_picture[i_picture].i_width  = i_width;
                p_vout->p_picture[i_picture].i_status = RESERVED_PICTURE;
+#ifdef DEBUG_VIDEO
+                intf_DbgMsg("picture %p (in destroyed picture slot)\n", 
+                            &p_vout->p_picture[i_picture] );                
+#endif
                vlc_mutex_unlock( &p_vout->lock );
                return( &p_vout->p_picture[i_picture] );
            }
@@ -331,7 +405,7 @@ picture_t *vout_CreatePicture( vout_thread_t *p_vout, int i_type,
             break;                
 #ifdef DEBUG
         default:
-            intf_DbgMsg("unknown picture type %d\n", i_type );
+            intf_DbgMsg("error: unknown picture type %d\n", i_type );
             p_free_picture->p_data   =  NULL;            
             break;            
 #endif    
@@ -340,13 +414,13 @@ picture_t *vout_CreatePicture( vout_thread_t *p_vout, int i_type,
         if( p_free_picture->p_data != NULL )
         {        
             /* Copy picture informations */
-            p_free_picture->i_type           = i_type;
-            p_free_picture->i_status         = RESERVED_PICTURE;
-            p_free_picture->i_width          = i_width;
-            p_free_picture->i_height         = i_height;
-            p_free_picture->i_bytes_per_line = i_bytes_per_line;
-            p_free_picture->i_refcount       = 0;            
-            p_free_picture->i_matrix_coefficients = 1; // ?? default value            
+            p_free_picture->i_type                      = i_type;
+            p_free_picture->i_status                    = RESERVED_PICTURE;
+            p_free_picture->i_width                     = i_width;
+            p_free_picture->i_height                    = i_height;
+            p_free_picture->i_bytes_per_line            = i_bytes_per_line;
+            p_free_picture->i_refcount                  = 0;            
+            p_free_picture->i_matrix_coefficients       = 1; 
         }
         else
         {
@@ -357,18 +431,21 @@ picture_t *vout_CreatePicture( vout_thread_t *p_vout, int i_type,
             intf_ErrMsg("warning: %s\n", strerror( ENOMEM ) );            
         }
         
+#ifdef DEBUG_VIDEO
+        intf_DbgMsg("picture %p (in free picture slot)\n", p_free_picture );        
+#endif
         vlc_mutex_unlock( &p_vout->lock );
         return( p_free_picture );
     }
     
     // No free or destroyed picture could be found
-    intf_DbgMsg( "heap is full\n" );
+    intf_DbgMsg( "warning: heap is full\n" );
     vlc_mutex_unlock( &p_vout->lock );
     return( NULL );
 }
 
 /*******************************************************************************
- * vout_RemovePicture: remove a permanent or reserved picture from the heap
+ * vout_DestroyPicture: remove a permanent or reserved picture from the heap
  *******************************************************************************
  * This function frees a previously reserved picture or a permanent
  * picture. It is meant to be used when the construction of a picture aborted.
@@ -377,7 +454,22 @@ picture_t *vout_CreatePicture( vout_thread_t *p_vout, int i_type,
 void vout_DestroyPicture( vout_thread_t *p_vout, picture_t *p_pic )
 {
     vlc_mutex_lock( &p_vout->lock );
+
+#ifdef DEBUG_VIDEO
+   /* Check if picture status is valid */
+   if( p_pic->i_status != RESERVED_PICTURE )
+   {
+       intf_DbgMsg("error: picture %d has invalid status %d\n", 
+                   p_pic, p_pic->i_status );       
+   }   
+#endif
+
     p_pic->i_status = DESTROYED_PICTURE;
+
+#ifdef DEBUG_VIDEO
+    intf_DbgMsg("picture %p\n", p_pic);    
+#endif
+
     vlc_mutex_unlock( &p_vout->lock );
 }
 
@@ -391,6 +483,11 @@ void vout_LinkPicture( vout_thread_t *p_vout, picture_t *p_pic )
 {
     vlc_mutex_lock( &p_vout->lock );
     p_pic->i_refcount++;
+
+#ifdef DEBUG_VIDEO
+    intf_DbgMsg("picture %p\n", p_pic);    
+#endif
+
     vlc_mutex_unlock( &p_vout->lock );
 }
 
@@ -407,6 +504,11 @@ void vout_UnlinkPicture( vout_thread_t *p_vout, picture_t *p_pic )
     {
        p_pic->i_status = DESTROYED_PICTURE;
     }
+
+#ifdef DEBUG_VIDEO
+    intf_DbgMsg("picture %p\n", p_pic);    
+#endif
+
     vlc_mutex_unlock( &p_vout->lock );    
 }
 
@@ -435,6 +537,12 @@ static int InitThread( vout_thread_t *p_vout )
         p_vout->p_picture[i_index].i_status= FREE_PICTURE;
     }
 
+#ifdef STATS
+    /* Initialize FPS index - since samples won't be used until a minimum of
+     * pictures, they don't need to be initialized */
+    p_vout->i_fps_index = 0;    
+#endif
+
     /* Initialize output method - this function issues its own error messages */
     if( vout_SysInit( p_vout ) )
     {
@@ -457,7 +565,7 @@ static int InitThread( vout_thread_t *p_vout )
         break;
 #ifdef DEBUG
     default:
-        intf_DbgMsg("invalid bytes_per_pixel %d\n", p_vout->i_bytes_per_pixel );
+        intf_DbgMsg("error: invalid bytes_per_pixel %d\n", p_vout->i_bytes_per_pixel );
         i_pixel_size = sizeof( u32 );        
         break;              
 #endif
@@ -512,7 +620,7 @@ static int InitThread( vout_thread_t *p_vout )
         for( i_index = -384; i_index < 640; i_index++) 
         {
             p_vout->pi_trans16_red[i_index]     = (CLIP_BYTE( i_index ) & 0xf8)<<8;
-            p_vout->pi_trans16_green[i_index]   = (CLIP_BYTE( i_index ) & 0xf8)<<3;
+            p_vout->pi_trans16_green[i_index]   = (CLIP_BYTE( i_index ) & 0xfc)<<3;
             p_vout->pi_trans16_blue[i_index]    =  CLIP_BYTE( i_index ) >> 3;
         }
         break;        
@@ -527,7 +635,7 @@ static int InitThread( vout_thread_t *p_vout )
         break;        
 #ifdef DEBUG
     default:
-        intf_DbgMsg("invalid screen depth %d\n", p_vout->i_screen_depth );
+        intf_DbgMsg("error: invalid screen depth %d\n", p_vout->i_screen_depth );
         break;      
 #endif
     }
@@ -535,7 +643,7 @@ static int InitThread( vout_thread_t *p_vout )
     /* Mark thread as running and return */
     p_vout->b_active =          1;    
     *p_vout->pi_status =        THREAD_READY;    
-    intf_DbgMsg("thread ready");    
+    intf_DbgMsg("thread ready\n");    
     return(0);    
 }
 
@@ -551,7 +659,8 @@ static void RunThread( vout_thread_t *p_vout)
     int             i_picture;                                /* picture index */
     int             i_err;                                       /* error code */
     mtime_t         current_date;                              /* current date */
-    picture_t *     p_pic = NULL;                           /* picture pointer */    
+    picture_t *     p_pic;                                  /* picture pointer */    
+    mtime_t         pic_date = 0;                              /* picture date */    
 
     /* 
      * Initialize thread and free configuration 
@@ -575,18 +684,18 @@ static void RunThread( vout_thread_t *p_vout)
         * it can't be modified by the other threads (except if it is unliked,
         * but its data remains)
         */
+        p_pic = NULL;         
         vlc_mutex_lock( &p_vout->lock );
-
        for( i_picture = 0; i_picture < VOUT_MAX_PICTURES; i_picture++ )
        {
            if( (p_vout->p_picture[i_picture].i_status == READY_PICTURE) &&
                ( (p_pic == NULL) || 
-                 (p_vout->p_picture[i_picture].date < p_pic->date) ) )
+                 (p_vout->p_picture[i_picture].date < pic_date) ) )
            {
                p_pic = &p_vout->p_picture[i_picture];
+                pic_date = p_pic->date;                
            }
        }
-
        vlc_mutex_unlock( &p_vout->lock );
 
         /* 
@@ -594,8 +703,16 @@ static void RunThread( vout_thread_t *p_vout)
         */
         if( p_pic )
         {
+#ifdef STATS
+            /* Computes FPS rate */
+            p_vout->fps_sample[ p_vout->i_fps_index++ ] = pic_date;
+            if( p_vout->i_fps_index == VOUT_FPS_SAMPLES )
+            {
+                p_vout->i_fps_index = 0;                
+            }                            
+#endif
            current_date = mdate();
-           if( p_pic->date < current_date )
+           if( pic_date < current_date )
            {
                /* Picture is late: it will be destroyed and the thread will go
                 * immediately to next picture */
@@ -608,11 +725,14 @@ static void RunThread( vout_thread_t *p_vout)
                {
                    p_pic->i_status = DESTROYED_PICTURE;
                }
+
+#ifdef DEBUG_VIDEO
+               intf_DbgMsg( "warning: late picture %p skipped\n", p_pic );
+#endif
                vlc_mutex_unlock( &p_vout->lock );
-               intf_ErrMsg( "warning: late picture skipped\n" );
                p_pic = NULL;
            }
-           else if( p_pic->date > current_date + VOUT_DISPLAY_DELAY )
+           else if( pic_date > current_date + VOUT_DISPLAY_DELAY )
            {
                /* A picture is ready to be rendered, but its rendering date is
                 * far from the current one so the thread will perform an empty loop
@@ -622,8 +742,24 @@ static void RunThread( vout_thread_t *p_vout)
            else
            {
                /* Picture has not yet been displayed, and has a valid display
-                * date : render it */
+                * date : render it, then forget it */
                RenderPicture( p_vout, p_pic );
+                vlc_mutex_lock( &p_vout->lock );
+                if( p_pic->i_refcount )
+               {
+                   p_pic->i_status = DISPLAYED_PICTURE;
+               }
+               else
+               {
+                   p_pic->i_status = DESTROYED_PICTURE;
+               }
+                vlc_mutex_unlock( &p_vout->lock );
+
+                /* Print additional informations */
+                if( p_vout->b_info )
+                {                    
+                    RenderInfo( p_vout );
+                }                
            }
         }
     
@@ -643,24 +779,12 @@ static void RunThread( vout_thread_t *p_vout)
            if( p_pic )
            {
                /* A picture is ready to be displayed : sleep until its display date */
-               mwait( p_pic->date );
+               mwait( pic_date );
 
                if( !i_err )
                {
                    vout_SysDisplay( p_vout );
                }
-
-               /* Picture has been displayed : destroy it */
-               vlc_mutex_lock( &p_vout->lock );
-               if( p_pic->i_refcount )
-               {
-                   p_pic->i_status = DISPLAYED_PICTURE;
-               }
-               else
-               {
-                   p_pic->i_status = DESTROYED_PICTURE;
-               }
-               vlc_mutex_unlock( &p_vout->lock );
            }
            else
            {
@@ -754,156 +878,302 @@ static void EndThread( vout_thread_t *p_vout )
  * and copy it to the current rendering buffer. No lock is required, since the
  * rendered picture has been determined as existant, and will only be destroyed
  * by the vout thread later.
+ * ???? 24 and 32 bpp should probably be separated
  *******************************************************************************/
 static void RenderPicture( vout_thread_t *p_vout, picture_t *p_pic )
 {
+#ifdef DEBUG_VIDEO
+    /* Send picture informations */
+    intf_DbgMsg("picture %p\n", p_pic );
+
+    /* Store rendering start date */
+    p_vout->picture_render_time = mdate();    
+#endif
+
     switch( p_pic->i_type )
     {
     case YUV_420_PICTURE:                   /* YUV picture: YUV transformation */        
     case YUV_422_PICTURE:
     case YUV_444_PICTURE:
-        RenderYUVPicture( p_vout, p_pic );        
+        if( p_vout->b_grayscale )                                 /* grayscale */
+        {
+            RenderYUVGrayPicture( p_vout, p_pic );            
+        }
+        else if( p_vout->i_bytes_per_pixel == 2 )        /* color 15 or 16 bpp */
+        {
+            RenderYUV16Picture( p_vout, p_pic );        
+        }
+        else                                             /* color 24 or 32 bpp */
+        {
+            RenderYUV32Picture( p_vout, p_pic );            
+        }
         break;        
 #ifdef DEBUG
     default:        
-        intf_DbgMsg("unknown picture type %d\n", p_pic->i_type );
+        intf_DbgMsg("error: unknown picture type %d\n", p_pic->i_type );
         break;        
 #endif
     }
+
+#ifdef DEBUG_VIDEO
+    /* Computes rendering time */
+    p_vout->picture_render_time = mdate() - p_vout->picture_render_time;    
+#endif
 }
 
 /*******************************************************************************
- * RenderYUVPicture: render a YUV picture
+ * RenderYUVGrayPicture: render a 15, 16, 24 or 32 bpp YUV picture in grayscale
  *******************************************************************************
  * Performs the YUV convertion. The picture sent to this function should only
  * have YUV_420, YUV_422 or YUV_444 types.
  *******************************************************************************/
-static void RenderYUVPicture( vout_thread_t *p_vout, picture_t *p_pic )
+static void RenderYUVGrayPicture( vout_thread_t *p_vout, picture_t *p_pic )
 {
-    int         i_crv;    
-    int         i_cbu;
-    int         i_cgu;
-    int         i_cgv;    
-    int         i_pic_x;                            /* x coordinate in picture */
-    int         i_pic_y;                            /* y coordinate in picture */
+    int         i_pic_x, i_pic_y;                /* x,y coordinates in picture */
+    int         i_width, i_height;                             /* picture size */
+    int         i_eol_offset;          /* pixels from end of line to next line */   
     yuv_data_t *p_y;                                     /* Y data base adress */
-    yuv_data_t *p_u;                                     /* U data base adress */
-    yuv_data_t *p_v;                                     /* V data base adress */
     yuv_data_t  i_y;                                               /* Y sample */
-    yuv_data_t  i_u;                                               /* U sample */
-    yuv_data_t  i_v;                                               /* V sample */
-    u16        *pi_pic16;               /* base adress for destination picture */
-    u32        *pi_pic32;               /* base adress for destination picture */
-    int         i_pic_eol_offset;                        /* end of line offset */    
-    
-    /* Choose transformation matrix coefficients */
-    i_crv = matrix_coefficients_table[p_pic->i_matrix_coefficients][0];
-    i_cbu = matrix_coefficients_table[p_pic->i_matrix_coefficients][1];
-    i_cgu = matrix_coefficients_table[p_pic->i_matrix_coefficients][2];
-    i_cgv = matrix_coefficients_table[p_pic->i_matrix_coefficients][3];
-
-    /* Set the base pointers */
-    p_y = p_pic->p_y;
-    p_u = p_pic->p_u;
-    p_v = p_pic->p_v;
-    
-    /* Get base adress for destination image */
-    pi_pic32 = (u32 *)pi_pic16 = 
-        (u16 *)vout_SysGetPicture( p_vout, &i_pic_eol_offset );
+    u16 *       pi_pic16;                 /* destination picture, 15 or 16 bpp */
+    u32 *       pi_pic32;                 /* destination picture, 24 or 32 bpp */
+    u16 *       pi_trans16_red;                    /* red transformation table */
+    u16 *       pi_trans16_green;                /* green transformation table */
+    u16 *       pi_trans16_blue;                  /* blue transformation table */
+    u32 *       pi_trans32_red;                    /* red transformation table */
+    u32 *       pi_trans32_green;                /* green transformation table */
+    u32 *       pi_trans32_blue;                  /* blue transformation table */
+    /* Set the base pointers and transformation parameters */
+    p_y =               p_pic->p_y;
+    i_width =           p_pic->i_width;
+    i_height =          p_pic->i_height;
+    i_eol_offset =      p_vout->i_bytes_per_line / p_vout->i_bytes_per_pixel - i_width;
 
-    //?? copy used values (translation, height, width) to local variables ?
-
-    /* Do YUV transformation - the loops are repeated for optimization */
+    /* Get base adress for destination image and translation tables, then
+     * transform image */
     switch( p_vout->i_screen_depth )
     {
     case 15:
     case 16:
-        switch( p_pic->i_type )
-        {
-          case YUV_420_PICTURE:             /* 15 or 16 bpp 420 transformation */
-//#ifdef HAVE_MMX
-            // ?? MMX
-//#else
-            YUV_TRANSFORM( 420,
-                           p_vout->pi_trans16_red, 
-                           p_vout->pi_trans16_green, 
-                           p_vout->pi_trans16_blue,
-                           pi_pic16 );            
-//#endif
-            break;
-          case YUV_422_PICTURE:             /* 15 or 16 bpp 422 transformation */
-            YUV_TRANSFORM( 422,
-                           p_vout->pi_trans16_red, 
-                           p_vout->pi_trans16_green, 
-                           p_vout->pi_trans16_blue,
-                           pi_pic16 );            
-           break;
-          case YUV_444_PICTURE:             /* 15 or 16 bpp 444 transformation */
-            YUV_TRANSFORM( 444,
-                           p_vout->pi_trans16_red, 
-                           p_vout->pi_trans16_green, 
-                           p_vout->pi_trans16_blue,
-                           pi_pic16 );            
-            break;                
-        }
-        break;        
-    case 24: // ?? probably wrong !
-       switch( p_pic->i_type )
-        {
-          case YUV_420_PICTURE:                   /* 24 bpp 420 transformation */
-            YUV_TRANSFORM( 420,  
-                           p_vout->pi_trans32_red, 
-                           p_vout->pi_trans32_green, 
-                           p_vout->pi_trans32_blue,
-                           pi_pic32 );
-            break;
-          case YUV_422_PICTURE:                   /* 24 bpp 422 transformation */
-            YUV_TRANSFORM( 422,
-                           p_vout->pi_trans32_red, 
-                           p_vout->pi_trans32_green, 
-                           p_vout->pi_trans32_blue,
-                           pi_pic32 );
-            break;
-          case YUV_444_PICTURE:                   /* 24 bpp 444 transformation */
-            YUV_TRANSFORM( 444,
-                           p_vout->pi_trans32_red, 
-                           p_vout->pi_trans32_green, 
-                           p_vout->pi_trans32_blue,
-                           pi_pic32 );
-            break;                
-        }
+        pi_trans16_red =      p_vout->pi_trans16_red;
+        pi_trans16_green =    p_vout->pi_trans16_green;
+        pi_trans16_blue =     p_vout->pi_trans16_blue;        
+        pi_pic16 = (u16 *) vout_SysGetPicture( p_vout );
+
+        YUV_GRAYSCALE( pi_trans16_red, pi_trans16_green, pi_trans16_blue,
+                       pi_pic16 );
         break;        
+    case 24:        
     case 32:
-        switch( p_pic->i_type )
-        {
-          case YUV_420_PICTURE:                   /* 32 bpp 420 transformation */
-            YUV_TRANSFORM( 420,
-                           p_vout->pi_trans32_red, 
-                           p_vout->pi_trans32_green, 
-                           p_vout->pi_trans32_blue,
-                           pi_pic32 );            
-            break;
-          case YUV_422_PICTURE:                   /* 32 bpp 422 transformation */
-            YUV_TRANSFORM( 422,
-                           p_vout->pi_trans32_red, 
-                           p_vout->pi_trans32_green, 
-                           p_vout->pi_trans32_blue,
-                           pi_pic32 );
-            break;
-          case YUV_444_PICTURE:                   /* 32 bpp 444 transformation */
-            YUV_TRANSFORM( 444,
-                           p_vout->pi_trans32_red, 
-                           p_vout->pi_trans32_green, 
-                           p_vout->pi_trans32_blue,
-                           pi_pic32 );
-           break;                
-        }
+        pi_trans32_red =      p_vout->pi_trans32_red;
+        pi_trans32_green =    p_vout->pi_trans32_green;
+        pi_trans32_blue =     p_vout->pi_trans32_blue;    
+        pi_pic32 = (u32 *) vout_SysGetPicture( p_vout );
+
+        YUV_GRAYSCALE( pi_trans32_red, pi_trans32_green, pi_trans32_blue,
+                       pi_pic32 );
+        break;        
+#ifdef DEBUG
+    default:
+        intf_DbgMsg("error: invalid screen depth %d\n", p_vout->i_screen_depth );
+        break;    
+#endif      
+    }
+}
+
+
+/*******************************************************************************
+ * RenderYUV16Picture: render a 15 or 16 bpp YUV picture
+ *******************************************************************************
+ * Performs the YUV convertion. The picture sent to this function should only
+ * have YUV_420, YUV_422 or YUV_444 types.
+ *******************************************************************************/
+static void RenderYUV16Picture( vout_thread_t *p_vout, picture_t *p_pic )
+{
+    int         i_crv, i_cbu, i_cgu, i_cgv;     /* transformation coefficients */
+    int         i_pic_x, i_pic_y;                /* x,y coordinates in picture */
+    int         i_y, i_u, i_v;                           /* Y, U and V samples */
+    int         i_width, i_height;                             /* picture size */
+    int         i_chroma_width;                                /* chroma width */    
+    int         i_eol_offset;          /* pixels from end of line to next line */
+    yuv_data_t *p_y;                                     /* Y data base adress */
+    yuv_data_t *p_u;                                     /* U data base adress */
+    yuv_data_t *p_v;                                     /* V data base adress */
+    u16 *       pi_pic;                 /* base adress for destination picture */
+    u16 *       pi_trans_red;                      /* red transformation table */
+    u16 *       pi_trans_green;                  /* green transformation table */
+    u16 *       pi_trans_blue;                    /* blue transformation table */
+    /* Choose transformation matrix coefficients */
+    i_crv = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][0];
+    i_cbu = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][1];
+    i_cgu = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][2];
+    i_cgv = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][3];
+
+    /* Choose the conversions tables */
+    pi_trans_red =      p_vout->pi_trans16_red;
+    pi_trans_green =    p_vout->pi_trans16_green;
+    pi_trans_blue =     p_vout->pi_trans16_blue;    
+
+    /* Set the base pointers and transformation parameters */
+    p_y =               p_pic->p_y;
+    p_u =               p_pic->p_u;
+    p_v =               p_pic->p_v;
+    i_width =           p_pic->i_width;
+    i_height =          p_pic->i_height;
+    i_chroma_width =    i_width / 2;
+    i_eol_offset =      p_vout->i_bytes_per_line / 2 - i_width;    
+        
+    /* Get base adress for destination image */
+    pi_pic = (u16 *)vout_SysGetPicture( p_vout );
+
+    /* Do YUV transformation - the loops are repeated for optimization */
+    switch( p_pic->i_type )
+    {
+    case YUV_420_PICTURE:                   /* 15 or 16 bpp 420 transformation */
+#ifdef HAVE_MMX
+        vout_YUV420_16_MMX( p_y, p_u, p_v, 
+                            i_width, i_height, 
+                            i_width, i_chroma_width,
+                            0, (u8 *) pi_pic, 
+                            0, 0, p_vout->i_bytes_per_line, 
+                            p_vout->i_screen_depth == 15 );
+#else
+        YUV_TRANSFORM( 420,
+                       pi_trans_red, 
+                       pi_trans_green, 
+                       pi_trans_blue,
+                       pi_pic );            
+#endif
+        break;
+    case YUV_422_PICTURE:                   /* 15 or 16 bpp 422 transformation */
+        YUV_TRANSFORM( 422,
+                       pi_trans_red, 
+                       pi_trans_green, 
+                       pi_trans_blue,
+                       pi_pic );            
         break;
+    case YUV_444_PICTURE:                   /* 15 or 16 bpp 444 transformation */
+        YUV_TRANSFORM( 444,
+                       pi_trans_red, 
+                       pi_trans_green, 
+                       pi_trans_blue,
+                       pi_pic );            
+        break;                 
+    }
+}
+
+/*******************************************************************************
+ * RenderYUV32Picture: render a 24 or 32 bpp YUV picture
+ *******************************************************************************
+ * Performs the YUV convertion. The picture sent to this function should only
+ * have YUV_420, YUV_422 or YUV_444 types.
+ *******************************************************************************/
+static void RenderYUV32Picture( vout_thread_t *p_vout, picture_t *p_pic )
+{
+    int         i_crv, i_cbu, i_cgu, i_cgv;     /* transformation coefficients */
+    int         i_pic_x, i_pic_y;                /* x,y coordinates in picture */
+    int         i_y, i_u, i_v;                           /* Y, U and V samples */
+    int         i_width, i_height;                             /* picture size */
+    int         i_chroma_width;                                /* chroma width */    
+    int         i_eol_offset;          /* pixels from end of line to next line */
+    yuv_data_t *p_y;                                     /* Y data base adress */
+    yuv_data_t *p_u;                                     /* U data base adress */
+    yuv_data_t *p_v;                                     /* V data base adress */
+    u32 *       pi_pic;                 /* base adress for destination picture */
+    u32 *       pi_trans_red;                      /* red transformation table */
+    u32 *       pi_trans_green;                  /* green transformation table */
+    u32 *       pi_trans_blue;                    /* blue transformation table */
+    /* Choose transformation matrix coefficients */
+    i_crv = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][0];
+    i_cbu = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][1];
+    i_cgu = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][2];
+    i_cgv = MATRIX_COEFFICIENTS_TABLE[p_pic->i_matrix_coefficients][3];
+
+    /* Choose the conversions tables */
+    pi_trans_red =      p_vout->pi_trans32_red;
+    pi_trans_green =    p_vout->pi_trans32_green;
+    pi_trans_blue =     p_vout->pi_trans32_blue;    
+
+    /* Set the base pointers and transformation parameters */
+    p_y =               p_pic->p_y;
+    p_u =               p_pic->p_u;
+    p_v =               p_pic->p_v;
+    i_width =           p_pic->i_width;
+    i_height =          p_pic->i_height;
+    i_chroma_width =    i_width / 2;
+    i_eol_offset =      p_vout->i_bytes_per_line / p_vout->i_bytes_per_pixel - i_width;
+        
+    /* Get base adress for destination image */
+    pi_pic = (u32 *)vout_SysGetPicture( p_vout );
+
+    /* Do YUV transformation - the loops are repeated for optimization */
+    switch( p_pic->i_type )
+    {
+    case YUV_420_PICTURE:                   /* 24 or 32 bpp 420 transformation */
+        YUV_TRANSFORM( 420,
+                       pi_trans_red, 
+                       pi_trans_green, 
+                       pi_trans_blue,
+                       pi_pic );            
+        break;
+    case YUV_422_PICTURE:                   /* 24 or 32 bpp 422 transformation */
+        YUV_TRANSFORM( 422,
+                       pi_trans_red, 
+                       pi_trans_green, 
+                       pi_trans_blue,
+                       pi_pic );            
+        break;
+    case YUV_444_PICTURE:                   /* 24 or 32 bpp 444 transformation */
+        YUV_TRANSFORM( 444,
+                       pi_trans_red, 
+                       pi_trans_green, 
+                       pi_trans_blue,
+                       pi_pic );            
+        break;                 
+    }
+}
+
+/*******************************************************************************
+ * RenderInfo: print additionnal informations on a picture
+ *******************************************************************************
+ * This function will add informations such as fps and buffer size on a picture
+ *******************************************************************************/
+static void RenderInfo( vout_thread_t *p_vout )
+{
+    char        psz_buffer[256];                              /* string buffer */
+
+#ifdef STATS
+    /* Print FPS rate */
+    if( p_vout->c_pictures > VOUT_FPS_SAMPLES )
+    {        
+        sprintf( psz_buffer, "%.2f fps", (double) VOUT_FPS_SAMPLES * 1000000 /
+                 ( p_vout->fps_sample[ (p_vout->i_fps_index + (VOUT_FPS_SAMPLES - 1)) % 
+                                     VOUT_FPS_SAMPLES ] -
+                   p_vout->fps_sample[ p_vout->i_fps_index ] ) );        
+        vout_SysPrint( p_vout, p_vout->i_width, 0, 1, -1, psz_buffer );
+    }
+
+    /* Print statistics */
+    sprintf( psz_buffer, "%ld pictures, %.1f %% idle loops", p_vout->c_pictures,
+             (double) p_vout->c_idle_loops * 100 / p_vout->c_loops );    
+    vout_SysPrint( p_vout, 0, 0, -1, -1, psz_buffer );    
+#endif
+    
 #ifdef DEBUG
-    default:        
-        intf_DbgMsg("invalid screen depth %d\n", p_vout->i_screen_depth );
-        break;        
+    /* Print heap size  */
+    sprintf( psz_buffer, "video heap size: %d (%.1f %%)", p_vout->i_pictures,
+             (double) p_vout->i_pictures * 100 / VOUT_MAX_PICTURES );
+    vout_SysPrint( p_vout, 0, p_vout->i_height, -1, 1, psz_buffer );    
+#endif
+
+#ifdef DEBUG_VIDEO
+    /* Print rendering statistics */
+    sprintf( psz_buffer, "picture rendering time: %lu us", 
+             (unsigned long) p_vout->picture_render_time );    
+    vout_SysPrint( p_vout, p_vout->i_width, p_vout->i_height, 1, 1, psz_buffer );    
 #endif
-    }                
 }
 
+
index 12342ff5df8030447b11e0b9dcaa03e2d591882d..d32a2bdf6ab798388d34d90fb9e84c551df30883 100644 (file)
@@ -47,6 +47,12 @@ typedef struct vout_sys_s
     Window              window;                     /* window instance handler */
     GC                  gc;                /* graphic context instance handler */    
 
+    /* Font information */
+    int                 i_char_bytes_per_line;      /* character width (bytes) */
+    int                 i_char_height;             /* character height (lines) */
+    int                 i_char_interspacing; /* space between centers (pixels) */
+    byte_t *            pi_font;                       /* pointer to font data */
+
     /* Display buffers and shared memory information */
     int                 i_buffer_index;                        /* buffer index */
     XImage *            p_ximage[2];                         /* XImage pointer */   
@@ -58,7 +64,7 @@ typedef struct vout_sys_s
  *******************************************************************************/
 static int  X11OpenDisplay      ( vout_thread_t *p_vout, char *psz_display, Window root_window );
 static void X11CloseDisplay     ( vout_thread_t *p_vout );
-
+static int  X11GetFont          ( vout_thread_t *p_vout );
 static int  X11CreateWindow     ( vout_thread_t *p_vout );
 static void X11DestroyWindow    ( vout_thread_t *p_vout );
 static int  X11CreateImage      ( vout_thread_t *p_vout, XImage **pp_ximage );
@@ -68,6 +74,7 @@ static int  X11CreateShmImage   ( vout_thread_t *p_vout, XImage **pp_ximage,
 static void X11DestroyShmImage  ( vout_thread_t *p_vout, XImage *p_ximage, 
                                   XShmSegmentInfo *p_shm_info );
 
+
 /*******************************************************************************
  * vout_SysCreate: allocate X11 video thread output method
  *******************************************************************************
@@ -201,30 +208,10 @@ void vout_SysDestroy( vout_thread_t *p_vout )
  *******************************************************************************/
 int vout_SysManage( vout_thread_t *p_vout )
 {
-    boolean_t b_resized;
     //??
+    return 0;
 
-    /* ?? this function should not receive any usefull X11 messages, since they
-     * have tobe treated by the main interface window - check it. */
-    return 0; //??
-
-
-    /* If window has been resized, re-create images */
-/* ??    if( b_resized )
-    {
-        intf_DbgMsg("%p -> resizing window\n", p_vout);
-        X11DestroyImages( p_vout );
-        if( X11CreateImages( p_vout ) )
-        { */
-            /* A fatal error occured: images could not be re-created. Note
-             * that in this case, the images pointers will be NULL, so the
-             * image destructor will know it does not need to destroy them. */
-/*            return( -1 );
-        }
-        return( 1 );        
-    }*/
-
-    return( 0 );
+    // ?? if resized: end/init again, return >0
 }
 
 /*******************************************************************************
@@ -246,10 +233,6 @@ void vout_SysDisplay( vout_thread_t *p_vout )
 
         /* Send the order to the X server */
         XFlush(p_vout->p_sys->p_display);
-        
-        /* ?? wait until effective display ? */
-/*        do XNextEvent(Display_Ptr, &xev);
-        while(xev.type!=CompletionType);*/
     }
     else                                  /* regular X11 capabilities are used */
     {
@@ -270,16 +253,84 @@ void vout_SysDisplay( vout_thread_t *p_vout )
 /*******************************************************************************
  * vout_SysGetPicture: get current display buffer informations
  *******************************************************************************
- * This function returns the address of the current display buffer, and the
- * number of samples per line. For 15, 16 and 32 bits displays, this value is 
- * the number of pixels in a line.
+ * This function returns the address of the current display buffer.
  *******************************************************************************/
-byte_t * vout_SysGetPicture( vout_thread_t *p_vout, int *pi_eol_offset )
+byte_t * vout_SysGetPicture( vout_thread_t *p_vout )
 {
-    *pi_eol_offset = p_vout->i_width;
     return( p_vout->p_sys->p_ximage[ p_vout->p_sys->i_buffer_index ]->data );        
 }
 
+/*******************************************************************************
+ * vout_SysPrint: print simple text on a picture
+ *******************************************************************************
+ * This function will print a simple text on the picture. It is designed to
+ * print debugging or general informations, not to render subtitles.
+ * Since there is no way to print text on an Ximage directly, this function
+ * copy directly the pixels from a font.
+ *******************************************************************************/
+void vout_SysPrint( vout_thread_t *p_vout, int i_x, int i_y, int i_halign, 
+                    int i_valign, unsigned char *psz_text )
+{
+    int                 i_line;                    /* line in character matrix */
+    int                 i_byte;               /* byte offset in character line */    
+    int                 i_height;                          /* character height */    
+    int                 i_char_bytes_per_line;         /* total bytes per line */
+    byte_t *            pi_pic;                                /* picture data */
+    byte_t *            pi_char;                             /* character data */
+
+    /* Update upper left coordinates according to alignment */
+    switch( i_halign )
+    {
+    case 0:                                                        /* centered */
+        i_x -= p_vout->p_sys->i_char_interspacing * strlen( psz_text ) / 2;
+        break;        
+    case 1:                                                   /* right aligned */
+        i_x -= p_vout->p_sys->i_char_interspacing * strlen( psz_text );
+        break;                
+    }
+    switch( i_valign )
+    {
+    case 0:                                                        /* centered */
+        i_y -= p_vout->p_sys->i_char_height / 2;
+        break;        
+    case 1:                                                   /* bottom aligned */
+        i_y -= p_vout->p_sys->i_char_height;
+        break;                
+    }
+
+    /* Copy used variables to local */
+    i_height =                  p_vout->p_sys->i_char_height;
+    i_char_bytes_per_line =     p_vout->p_sys->i_char_bytes_per_line;    
+
+    /* Print text */
+    for( ; *psz_text != '\0'; psz_text++ )
+    {
+        if( (*psz_text >= VOUT_MIN_CHAR) && (*psz_text < VOUT_MAX_CHAR) )
+        {            
+            /* Select character */
+            pi_char =   p_vout->p_sys->pi_font + (*psz_text - VOUT_MIN_CHAR) * 
+                i_height * i_char_bytes_per_line;
+            pi_pic =    p_vout->p_sys->p_ximage[ p_vout->p_sys->i_buffer_index ]->data +
+                i_y * p_vout->i_bytes_per_line + i_x * p_vout->i_bytes_per_pixel;
+
+            /* Copy character */
+            for( i_line = 0; i_line < i_height; i_line++ )
+            {
+                /* Copy line */
+                for( i_byte = 0; i_byte < i_char_bytes_per_line; i_byte++ )
+                {
+                    pi_pic[ i_byte  ] = *pi_char++;                                
+                }
+                
+                /* Go to next line */
+                pi_pic += p_vout->i_bytes_per_line;
+            }
+        }
+
+        /* Jump to next character */
+        i_x += p_vout->p_sys->i_char_interspacing;
+    }
+}
 
 /* following functions are local */
 
@@ -326,15 +377,25 @@ static int X11OpenDisplay( vout_thread_t *p_vout, char *psz_display, Window root
         XCloseDisplay( p_vout->p_sys->p_display );        
         return( 1  );
         break;
-    }
+    }    
 
-    /* Create a window */
-    if( X11CreateWindow( p_vout ) )                           /* create window */
+    /* Create a window and set line length */
+    if( X11CreateWindow( p_vout ) )
     {
         intf_ErrMsg("error: can't open a window\n");        
         XCloseDisplay( p_vout->p_sys->p_display );        
         return( 1 );
     }
+    p_vout->i_bytes_per_line = p_vout->i_width * p_vout->i_bytes_per_pixel;    
+
+    /* Get font information */
+    if( X11GetFont( p_vout ) )
+    {
+        intf_ErrMsg("error: can't read default font\n");
+        X11DestroyWindow( p_vout );
+        XCloseDisplay( p_vout->p_sys->p_display );
+        return( 1 );        
+    }
 
     return( 0 );    
 }
@@ -347,10 +408,101 @@ static int X11OpenDisplay( vout_thread_t *p_vout, char *psz_display, Window root
  *******************************************************************************/
 static void X11CloseDisplay( vout_thread_t *p_vout )
 {
+    // Free font info
+    free( p_vout->p_sys->pi_font );    
+
+    // Destroy window and close display
     X11DestroyWindow( p_vout );
     XCloseDisplay( p_vout->p_sys->p_display );    
 }
 
+/*******************************************************************************
+ * X11GetFont: get default font bitmap informations
+ *******************************************************************************
+ * This function will convert a font into a bitmap for later use by the 
+ * vout_SysPrint function.
+ *******************************************************************************/
+static int X11GetFont( vout_thread_t *p_vout )
+{
+    XFontStruct *       p_font_info;             /* font information structure */
+    Pixmap              pixmap;              /* pixmap used to draw characters */
+    GC                  gc;                                 /* graphic context */        
+    XGCValues           gc_values;               /* graphic context properties */    
+    XImage *            p_ximage;                      /* ximage for character */    
+    unsigned char       i_char;                             /* character index */    
+    int                 i_char_width;              /* character width (pixels) */
+    int                 i_char_bytes;                  /* total character size */        
+    
+    /* Load font */
+    p_font_info = XLoadQueryFont( p_vout->p_sys->p_display, "fixed" );
+    if( p_font_info == NULL )
+    {
+        intf_ErrMsg("error: can't load 'fixed' font\n");
+        return( 1 );        
+    }
+    
+    /* Get character size */
+    i_char_width =                              p_font_info->max_bounds.lbearing + 
+        p_font_info->max_bounds.rbearing;
+    p_vout->p_sys->i_char_bytes_per_line =      i_char_width * p_vout->i_bytes_per_pixel;    
+    p_vout->p_sys->i_char_height =              p_font_info->max_bounds.ascent + 
+        p_font_info->max_bounds.descent;
+    i_char_bytes =                              p_vout->p_sys->i_char_bytes_per_line *
+        p_vout->p_sys->i_char_height;    
+    p_vout->p_sys->i_char_interspacing =        p_font_info->max_bounds.width;    
+
+    /* Allocate font descriptor */
+    p_vout->p_sys->pi_font = malloc( i_char_bytes * ( VOUT_MAX_CHAR - VOUT_MIN_CHAR ) );
+    if( p_vout->p_sys->pi_font == NULL )
+    {
+        intf_ErrMsg("error: %s\n", strerror( ENOMEM ) );
+        XFreeFont( p_vout->p_sys->p_display, p_font_info );
+        return( 1 );        
+    }   
+
+    /* Create drawable and graphic context */
+    gc_values.foreground =      XBlackPixel( p_vout->p_sys->p_display, 
+                                             p_vout->p_sys->i_screen );
+    gc_values.background =      XBlackPixel( p_vout->p_sys->p_display, 
+                                             p_vout->p_sys->i_screen );
+    gc_values.font =            p_font_info->fid;    
+    pixmap = XCreatePixmap( p_vout->p_sys->p_display, p_vout->p_sys->window,
+                            i_char_width,
+                            p_vout->p_sys->i_char_height *(VOUT_MAX_CHAR-VOUT_MIN_CHAR),
+                            p_vout->i_screen_depth );    
+    gc = XCreateGC( p_vout->p_sys->p_display, pixmap, 
+                    GCForeground | GCBackground | GCFont, &gc_values );
+
+    /* Clear pixmap and invert graphic context */
+    XFillRectangle( p_vout->p_sys->p_display, pixmap, gc, 0, 0, i_char_width, 
+                    p_vout->p_sys->i_char_height*(VOUT_MAX_CHAR-VOUT_MIN_CHAR) );    
+    XSetForeground( p_vout->p_sys->p_display, gc, 
+                    XWhitePixel( p_vout->p_sys->p_display, p_vout->p_sys->i_screen ) );
+    XSetBackground( p_vout->p_sys->p_display, gc, 
+                    XBlackPixel( p_vout->p_sys->p_display, p_vout->p_sys->i_screen ) );
+
+    /* Copy characters bitmaps to font descriptor */
+    for( i_char = VOUT_MIN_CHAR; i_char < VOUT_MAX_CHAR; i_char++ )
+    {    
+        XDrawString( p_vout->p_sys->p_display, pixmap, gc, 0,
+                     p_font_info->max_bounds.ascent + 
+                     (i_char-VOUT_MIN_CHAR) * p_vout->p_sys->i_char_height,
+                     &i_char, 1 );
+    }
+    p_ximage = XGetImage( p_vout->p_sys->p_display, pixmap, 0, 0, i_char_width,
+                          p_vout->p_sys->i_char_height*(VOUT_MAX_CHAR-VOUT_MIN_CHAR),
+                          -1, ZPixmap );        
+    memcpy( p_vout->p_sys->pi_font, p_ximage->data, 
+            i_char_bytes*(VOUT_MAX_CHAR-VOUT_MIN_CHAR));        
+
+    /* Free resources, unload font and return */        
+    XDestroyImage( p_ximage ); 
+    XFreeGC( p_vout->p_sys->p_display, gc );
+    XFreePixmap( p_vout->p_sys->p_display, pixmap );
+    XFreeFont( p_vout->p_sys->p_display, p_font_info );
+    return( 0 );    
+}
+
 /*******************************************************************************
  * X11CreateWindow: create X11 vout window
  *******************************************************************************
@@ -436,9 +588,7 @@ static int X11CreateImage( vout_thread_t *p_vout, XImage **pp_ximage )
     int         i_quantum;                       /* XImage quantum (see below) */
   
     /* Allocate memory for image */
-    pb_data = (byte_t *) malloc( p_vout->i_bytes_per_pixel
-                                 * p_vout->i_width 
-                                 * p_vout->i_height );
+    pb_data = (byte_t *) malloc( p_vout->i_bytes_per_line * p_vout->i_height );
     if( !pb_data )                                                    /* error */
     {
         intf_ErrMsg("error: %s\n", strerror(ENOMEM));
@@ -447,13 +597,13 @@ static int X11CreateImage( vout_thread_t *p_vout, XImage **pp_ximage )
 
     /* Optimize the quantum of a scanline regarding its size - the quantum is
        a diviser of the number of bits between the start of two scanlines. */
-    if( !(( p_vout->i_width * p_vout->i_bytes_per_pixel ) % 32) )
+    if( !(( p_vout->i_bytes_per_line ) % 32) )
     {
         i_quantum = 32;
     }
     else    
     {
-        if( !(( p_vout->i_width * p_vout->i_bytes_per_pixel ) % 16) )
+        if( !(( p_vout->i_bytes_per_line ) % 16) )
         {
             i_quantum = 16;
         }
@@ -485,12 +635,6 @@ static int X11CreateImage( vout_thread_t *p_vout, XImage **pp_ximage )
  * The order of the operations respects the recommandations of the mit-shm 
  * document by J.Corbet and K.Packard. Most of the parameters were copied from 
  * there.
- * ?? error on failure:
- * X Error of failed request:  BadAccess (attempt to access private resource denied)
- *  Major opcode of failed request:  129 (MIT-SHM)
- *  Minor opcode of failed request:  1 (X_ShmAttach)
- *  Serial number of failed request:  17
- *  Current serial number in output stream:  18         
  *******************************************************************************/
 static int X11CreateShmImage( vout_thread_t *p_vout, XImage **pp_ximage, 
                               XShmSegmentInfo *p_shm_info)
@@ -544,6 +688,10 @@ static int X11CreateShmImage( vout_thread_t *p_vout, XImage **pp_ximage,
         XDestroyImage( *pp_ximage );
         return( 1 );
     }
+
+    /* Send image to X server. This instruction is required, since having 
+     * built a Shm XImage and not using it causes an error on XCloseDisplay */
+    XFlush( p_vout->p_sys->p_display );    
     return( 0 );
 }
 
diff --git a/src/video_output/yuv_mmx.S b/src/video_output/yuv_mmx.S
new file mode 100644 (file)
index 0000000..992ddf8
--- /dev/null
@@ -0,0 +1,533 @@
+/*
+ *-------------------------------------------------------------------------
+ *cxm12161 -- This function performs YUV12-to-RGB16 color conversion for H26x.
+ *            It handles any format in which there are three fields, the low
+ *            order field being B and fully contained in the low order byte, the
+ *            second field being G and being somewhere in bits 4 through 11,
+ *            and the high order field being R and fully contained in the high
+ *            order byte.
+ *
+ *            The YUV12 input is planar, 8 bits per pel.  The Y plane may have
+ *            a pitch of up to 768.  It may have a width less than or equal
+ *            to the pitch.  It must be DWORD aligned, and preferably QWORD
+ *            aligned.  Pitch and Width must be a multiple of four.  For best
+ *            performance, Pitch should not be 4 more than a multiple of 32.
+ *            Height may be any amount, but must be a multiple of two.  The U
+ *            and V planes may have a different pitch than the Y plane, subject
+ *            to the same limitations.
+ */
+
+//.include iammx.inc
+//.include locals.inc
+
+.data
+    .align 16
+
+RGB_formats: 
+    .long  RGB565
+    .long  RGB555
+    .long  RGB664
+    .long  RGB655
+
+Minusg:             .long 0x00800080, 0x00800080
+Yadd:               .long 0x10101010, 0x10101010
+VtR:                .long 0x00660066, 0x00660066
+VtG:                .long 0x00340034, 0x00340034
+UtG:                .long 0x00190019, 0x00190019
+UtB:                .long 0x00810081, 0x00810081
+Ymul:               .long 0x004a004a, 0x004a004a
+UVtG:               .long 0x00340019, 0x00340019
+VtRUtB:             .long 0x01990205, 0x01990205
+fourbitu:           .quad 0xf0f0f0f0f0f0f0f0
+fivebitu:           .quad 0xe0e0e0e0e0e0e0e0
+sixbitu:            .quad 0xc0c0c0c0c0c0c0c0
+
+.text
+
+#define LocalFrameSize  156
+#define RegisterStorageSize  16
+
+/* Arguments: */
+#define YPlane                    LocalFrameSize + RegisterStorageSize +  4
+#define UPlane                    LocalFrameSize + RegisterStorageSize +  8
+#define VPlane                    LocalFrameSize + RegisterStorageSize + 12
+#define FrameWidth                LocalFrameSize + RegisterStorageSize + 16 
+#define FrameHeight               LocalFrameSize + RegisterStorageSize + 20
+#define YPitch                    LocalFrameSize + RegisterStorageSize + 24
+#define ChromaPitch               LocalFrameSize + RegisterStorageSize + 28
+#define AspectAdjustmentCount     LocalFrameSize + RegisterStorageSize + 32
+#define ColorConvertedFrame       LocalFrameSize + RegisterStorageSize + 36
+#define DCIOffset                 LocalFrameSize + RegisterStorageSize + 40
+#define CCOffsetToLine0           LocalFrameSize + RegisterStorageSize + 44
+#define CCOPitch                  LocalFrameSize + RegisterStorageSize + 48
+#define CCType                    LocalFrameSize + RegisterStorageSize + 52
+#define EndOfArgList              LocalFrameSize + RegisterStorageSize + 56
+
+/* Locals (on local stack frame) */
+#define CCOCursor        0
+#define CCOSkipDistance  4
+#define ChromaLineLen    8
+#define YCursor          12
+#define DistanceFromVToU 16
+#define EndOfChromaLine  20
+#define AspectCount      24
+#define AspectBaseCount  28
+#define tmpYCursorEven   32
+#define tmpYCursorOdd    36
+#define tmpCCOPitch      40
+#define temp_mmx         44
+#define RLeftShift       92
+#define GLeftShift       100
+#define RRightShift      108
+#define GRightShift      116
+#define BRightShift      124
+#define RUpperLimit      132
+#define GUpperLimit      140
+#define BUpperLimit      148
+
+
+/*
+ * extern void C MMX_YUV12ToRGB16 (
+ *                                     U8* YPlane,
+ *                                     U8* UPlane,
+ *                                     U8* VPlane,
+ *                                     UN  FrameWidth,
+ *                                     UN  FrameHeight,
+ *                                     UN  YPitch,
+ *                                     UN  VPitch,
+ *                                     UN  AspectAdjustmentCount,
+ *                                     U8* ColorConvertedFrame,
+ *                                     U32 DCIOffset,
+ *                                     U32 CCOffsetToLine0,
+ *                                     IN  CCOPitch,
+ *                                     IN  CCType)
+ *
+ *  The local variables are on the stack,
+ *  The tables are in the one and only data segment.
+ *
+ *  CCOffsetToLine0 is relative to ColorConvertedFrame.
+ *  CCType  used by RGB color convertors to determine the exact conversion type.
+ *    RGB565 = 0 
+ *    RGB555 = 1
+ *    RGB664 = 2
+ *    RGB655 = 3
+ */
+
+.globl yuv_2_rgb
+yuv_2_rgb: 
+  pushl      %esi
+  pushl      %edi
+
+  pushl      %ebp
+  pushl      %ebx
+
+  subl       $LocalFrameSize,%esp
+  movl       CCType(%esp),%eax
+  cmpl       $4,%eax
+  jae        finish
+
+  jmp        *RGB_formats(,%eax,4)
+
+RGB555: 
+  xorl       %eax,%eax
+  movl       $2,%ebx                 /* 10-8 for byte shift */
+  movl       %ebx,RLeftShift(%esp)
+  movl       %eax,RLeftShift+4(%esp)
+  movl       $5,%ebx
+  movl       %ebx,GLeftShift(%esp)
+  movl       %eax,GLeftShift+4(%esp)
+  movl       $9,%ebx
+  movl       %ebx,RRightShift(%esp)
+  movl       %eax,RRightShift+4(%esp)
+  movl       %ebx,GRightShift(%esp)
+  movl       %eax,GRightShift+4(%esp)
+  movl       %ebx,BRightShift(%esp)
+  movl       %eax,BRightShift+4(%esp)
+  movq       fivebitu,%mm0
+  movq       %mm0,RUpperLimit(%esp)
+  movq       %mm0,GUpperLimit(%esp)
+  movq       %mm0,BUpperLimit(%esp)
+  jmp        RGBEND
+
+RGB664: 
+  xorl       %eax,%eax
+  movl       $2,%ebx                 /* 8-6 */
+  movl       %ebx,RLeftShift(%esp)
+  movl       %eax,RLeftShift+4(%esp)
+  movl       $4,%ebx
+  movl       %ebx,GLeftShift(%esp)
+  movl       %eax,GLeftShift+4(%esp)
+  movl       $8,%ebx
+  movl       %ebx,RRightShift(%esp)
+  movl       %eax,RRightShift+4(%esp)
+  movl       %ebx,GRightShift(%esp)
+  movl       %eax,GRightShift+4(%esp)
+  movl       $10,%ebx
+  movl       %ebx,BRightShift(%esp)
+  movl       %eax,BRightShift+4(%esp)
+  movq       sixbitu,%mm0
+  movq       %mm0,RUpperLimit(%esp)
+  movq       %mm0,GUpperLimit(%esp)
+  movq       fourbitu,%mm0
+  movq       %mm0,BUpperLimit(%esp)
+  jmp        RGBEND
+
+RGB655: 
+  xorl       %eax,%eax
+  movl       $2,%ebx                 /* 8-6 */
+  movl       %ebx,RLeftShift(%esp)
+  movl       %eax,RLeftShift+4(%esp)
+  movl       $5,%ebx
+  movl       %ebx,GLeftShift(%esp)
+  movl       %eax,GLeftShift+4(%esp)
+  movl       $8,%ebx
+  movl       %ebx,RRightShift(%esp)
+  movl       %eax,RRightShift+4(%esp)
+  movl       $9,%ebx
+  movl       %ebx,GRightShift(%esp)
+  movl       %eax,GRightShift+4(%esp)
+  movl       %ebx,BRightShift(%esp)
+  movl       %eax,BRightShift+4(%esp)
+  movq       sixbitu,%mm0
+  movq       %mm0,RUpperLimit(%esp)
+  movq       fivebitu,%mm0
+  movq       %mm0,GUpperLimit(%esp)
+  movq       %mm0,BUpperLimit(%esp)
+  jmp        RGBEND
+
+RGB565: 
+  xorl       %eax,%eax
+  movl       $3,%ebx                 /* 8-5 */
+  movl       %ebx,RLeftShift(%esp)
+  movl       %eax,RLeftShift+4(%esp)
+  movl       $5,%ebx
+  movl       %ebx,GLeftShift(%esp)
+  movl       %eax,GLeftShift+4(%esp)
+  movl       $9,%ebx
+  movl       %ebx,RRightShift(%esp)
+  movl       %eax,RRightShift+4(%esp)
+  movl       %ebx,BRightShift(%esp)
+  movl       %eax,BRightShift+4(%esp)
+  movl       $8,%ebx
+  movl       %ebx,GRightShift(%esp)
+  movl       %eax,GRightShift+4(%esp)
+  movq       fivebitu,%mm0
+  movq       %mm0,RUpperLimit(%esp)
+  movq       %mm0,BUpperLimit(%esp)
+  movq       sixbitu,%mm0
+  movq       %mm0,GUpperLimit(%esp)
+//  jmp        RGBEND
+
+RGBEND: 
+  movl       VPlane(%esp),%ebx
+  movl       UPlane(%esp),%ecx
+  subl       %ebx,%ecx
+  movl       %ecx,DistanceFromVToU(%esp)
+
+  movl       ColorConvertedFrame(%esp),%eax
+  addl       DCIOffset(%esp),%eax
+  addl       CCOffsetToLine0(%esp),%eax
+  movl       %eax,CCOCursor(%esp)
+
+
+  movl       YPitch(%esp),%ecx
+  movl       FrameWidth(%esp),%ebx
+  movl       CCOPitch(%esp),%eax
+  subl       %ebx,%eax                   /* CCOPitch-FrameWidth */
+  subl       %ebx,%eax                   /* CCOPitch-2*FrameWidth */
+  sarl       %ebx                        /* FrameWidth/2 */
+  movl       YPlane(%esp),%esi           /* Fetch cursor over luma plane. */
+  movl       %ebx,ChromaLineLen(%esp)    /* FrameWidth/2 */
+  movl       %eax,CCOSkipDistance(%esp)  /* CCOPitch-3*FrameWidth */
+  movl       %esi,YCursor(%esp)
+  movl       AspectAdjustmentCount(%esp),%edx
+  movl       VPlane(%esp),%esi
+
+  cmpl       $1,%edx
+  je         finish
+  movl       %edx,AspectCount(%esp)
+  movl       %edx,AspectBaseCount(%esp)
+  xorl       %eax,%eax
+
+  movl       ChromaLineLen(%esp),%edi
+  movl       %edi,EndOfChromaLine(%esp)
+  movl       CCOCursor(%esp),%edi
+
+  movl       DistanceFromVToU(%esp),%edx
+  movl       YCursor(%esp),%ebp         /* Fetch Y Pitch. */
+  movl       FrameWidth(%esp),%ebx
+
+  addl       %ebx,%ebp
+  movl       %ebp,tmpYCursorEven(%esp)
+  movl       YPitch(%esp),%eax
+  addl       %eax,%ebp
+  movl       %ebp,tmpYCursorOdd(%esp)
+
+  sarl       %ebx
+  addl       %ebx,%esi
+  addl       %esi,%edx
+  negl       %ebx
+  movl       %ebx,FrameWidth(%esp)
+
+/*
+ *  Register Usage:
+ */
+
+PrepareChromaLine: 
+  movl       AspectCount(%esp),%ebp
+  movl       FrameWidth(%esp),%ebx
+  subl       $2,%ebp
+  movl       CCOPitch(%esp),%eax
+  movl       %eax,tmpCCOPitch(%esp)
+  ja         continue
+
+  xorl       %eax,%eax
+  addl       AspectAdjustmentCount(%esp),%ebp
+  movl       %eax,tmpCCOPitch(%esp)
+continue: 
+  movl       %ebp,AspectCount(%esp)
+
+do_next_8x2_block: 
+  movl       tmpYCursorEven(%esp),%ebp
+/* here is even line */
+  movd       (%edx,%ebx,),%mm1       /* 4 u values */
+  pxor       %mm0,%mm0               /* mm0=0 */
+  movd       (%esi,%ebx,),%mm2       /* 4 v values */
+  punpcklbw  %mm0,%mm1               /* get 4 unsign u */
+  psubw      Minusg,%mm1             /* get 4 unsign u-128 */
+  punpcklbw  %mm0,%mm2               /* get unsign v */
+  psubw      Minusg,%mm2             /* get unsign v-128 */
+  movq       %mm1,%mm3               /* save the u-128 unsign */
+  movq       %mm1,%mm5               /* save u-128 unsign */
+  punpcklwd  %mm2,%mm1               /* get 2 low u, v unsign pairs */
+  pmaddwd    UVtG,%mm1
+  punpckhwd  %mm2,%mm3               /* create high 2 unsign uv pairs */
+  pmaddwd    UVtG,%mm3
+  movq       %mm2,temp_mmx(%esp)       /* save v-128 */
+  movq       (%ebp,%ebx,2),%mm6      /* mm6 has 8 y pixels */
+  psubusb    Yadd,%mm6               /* mm6 has 8 y-16 pixels */
+  packssdw   %mm3,%mm1               /* packed the results to signed words */
+  movq       %mm6,%mm7               /* save the 8 y-16 pixels */
+  punpcklbw  %mm0,%mm6               /* mm6 has 4 low y-16 unsign */
+  pmullw     Ymul,%mm6
+  punpckhbw  %mm0,%mm7               /* mm7 has 4 high y-16 unsign */
+  pmullw     Ymul,%mm7
+  movq       %mm1,%mm4
+  movq       %mm1,temp_mmx+8(%esp)     /* save 4 chroma G values */
+  punpcklwd  %mm1,%mm1               /* chroma G replicate low 2 */
+  movq       %mm6,%mm0               /* low  y */
+  punpckhwd  %mm4,%mm4               /* chroma G replicate high 2 */
+  movq       %mm7,%mm3               /* high y */
+  psubw      %mm1,%mm6               /* 4 low G */
+  psraw      GRightShift(%esp),%mm6
+  psubw      %mm4,%mm7               /* 4 high G values in signed 16 bit */
+  movq       %mm5,%mm2
+  punpcklwd  %mm5,%mm5               /* replicate the 2 low u pixels */
+  pmullw     UtB,%mm5
+  punpckhwd  %mm2,%mm2
+  psraw      GRightShift(%esp),%mm7
+  pmullw     UtB,%mm2
+  packuswb   %mm7,%mm6               /* mm6: G7 G6 G5 G4 G3 G2 G1 G0 */
+  movq       %mm5,temp_mmx+16(%esp)    /* low chroma B */
+  paddw      %mm0,%mm5               /* 4 low B values in signed 16 bit */
+  movq       %mm2,temp_mmx+40(%esp)    /* high chroma B */
+  paddw      %mm3,%mm2               /* 4 high B values in signed 16 bit */
+  psraw      BRightShift(%esp),%mm5  /* low B scaled down by 6+(8-5) */
+  psraw      BRightShift(%esp),%mm2  /* high B scaled down by 6+(8-5) */
+  packuswb   %mm2,%mm5               /* mm5: B7 B6 B5 B4 B3 B2 B1 B0 */
+
+  movq       temp_mmx(%esp),%mm2       /* 4 v values */
+  movq       %mm5,%mm1               /* save B */
+  movq       %mm2,%mm7
+  punpcklwd  %mm2,%mm2               /* replicate the 2 low v pixels */
+  pmullw     VtR,%mm2
+  punpckhwd  %mm7,%mm7
+  pmullw     VtR,%mm7
+  paddusb    BUpperLimit(%esp),%mm1  /* mm1: saturate B+0FF-15 */
+  movq       %mm2,temp_mmx+24(%esp)    /* low chroma R */
+  paddw      %mm0,%mm2               /* 4 low R values in signed 16 bit */
+  psraw      RRightShift(%esp),%mm2  /* low R scaled down by 6+(8-5) */
+  pxor       %mm4,%mm4               /* mm4=0 for 8-&gt;16 conversion */
+  movq       %mm7,temp_mmx+32(%esp)    /* high chroma R */
+  paddw      %mm3,%mm7               /* 4 high R values in signed 16 bit */
+  psraw      RRightShift(%esp),%mm7  /* high R scaled down by 6+(8-5) */
+  psubusb    BUpperLimit(%esp),%mm1
+  packuswb   %mm7,%mm2               /* mm2: R7 R6 R5 R4 R3 R2 R1 R0 */
+  paddusb    GUpperLimit(%esp),%mm6  /* G fast patch ih */
+  psubusb    GUpperLimit(%esp),%mm6  /* fast patch ih */
+  paddusb    RUpperLimit(%esp),%mm2  /* R */
+  psubusb    RUpperLimit(%esp),%mm2
+
+/*
+ * here we are packing from RGB24 to RGB16
+ * input:
+ *         mm6: G7 G6 G5 G4 G3 G2 G1 G0
+ *         mm1: B7 B6 B5 B4 B3 B2 B1 B0
+ *         mm2: R7 R6 R5 R4 R3 R2 R1 R0
+ * assuming 8 original pixels in 0-H representation on mm6, mm5, mm2
+ * when  H=2**xBITS-1 (x is for R G B)
+ * output:
+ *        mm1- result: 4 low RGB16
+ *        mm7- result: 4 high RGB16
+ * using: mm0- zero register
+ *        mm3- temporary results
+ * algorithm:
+ *   for (i=0; i&lt;8; i++) {
+ *     RGB[i]=256*(R[i]&lt;&lt;(8-5))+(G[i]&lt;&lt;5)+B[i];
+ *   }
+ */
+
+  psllq      RLeftShift(%esp),%mm2   /* position R in the most significant
+                                        part of the byte */
+  movq       %mm1,%mm7               /* mm1: Save B */
+
+/*
+ * note: no need for shift to place B on the least significant part of the byte
+ *   R in left position, B in the right position so they can be combined
+ */
+
+  punpcklbw  %mm2,%mm1               /* mm1: 4 low 16 bit RB */
+  pxor       %mm0,%mm0               /* mm0: 0 */
+  punpckhbw  %mm2,%mm7               /* mm5: 4 high 16 bit RB */
+  movq       %mm6,%mm3               /* mm3: G */
+  punpcklbw  %mm0,%mm6               /* mm6: low 4 G 16 bit */
+  psllw      GLeftShift(%esp),%mm6   /* shift low G 5 positions */
+  punpckhbw  %mm0,%mm3               /* mm3: high 4 G 16 bit */
+  por        %mm6,%mm1               /* mm1: low RBG16 */
+  psllw      GLeftShift(%esp),%mm3   /* shift high G 5 positions */
+  por        %mm3,%mm7               /* mm5: high RBG16 */
+
+  movl       tmpYCursorOdd(%esp),%ebp  /* moved to here to save cycles 
+                                           before odd line */
+  movq       %mm1,(%edi)             /* !! aligned */
+
+/*- start odd line */
+  movq       (%ebp,%ebx,2),%mm1      /* mm1 has 8 y pixels */
+  pxor       %mm2,%mm2
+  psubusb    Yadd,%mm1               /* mm1 has 8 pixels y-16 */
+  movq       %mm1,%mm5
+  punpcklbw  %mm2,%mm1               /* get 4 low y-16 unsign pixels word */
+  pmullw     Ymul,%mm1               /* low 4 luminance contribution */
+  punpckhbw  %mm2,%mm5               /* 4 high y-16 */
+  pmullw     Ymul,%mm5               /* high 4 luminance contribution */
+  movq       %mm7,8(%edi)            /* !! aligned */
+  movq       %mm1,%mm0
+  paddw      temp_mmx+24(%esp),%mm0    /* low 4 R */
+  movq       %mm5,%mm6
+  psraw      RRightShift(%esp),%mm0  /* low R scaled down by 6+(8-5) */
+  paddw      temp_mmx+32(%esp),%mm5    /* high 4 R */
+  movq       %mm1,%mm2
+  psraw      RRightShift(%esp),%mm5  /* high R scaled down by 6+(8-5) */
+  paddw      temp_mmx+16(%esp),%mm2    /* low 4 B */
+  packuswb   %mm5,%mm0               /* mm0: R7 R6 R5 R4 R3 R2 R1 R0 */
+  psraw      BRightShift(%esp),%mm2  /* low B scaled down by 6+(8-5) */
+  movq       %mm6,%mm5
+  paddw      temp_mmx+40(%esp),%mm6    /* high 4 B */
+  psraw      BRightShift(%esp),%mm6  /* high B scaled down by 6+(8-5) */
+  movq       temp_mmx+8(%esp),%mm3     /* chroma G  low 4 */
+  packuswb   %mm6,%mm2               /* mm2: B7 B6 B5 B4 B3 B2 B1 B0 */
+  movq       %mm3,%mm4
+  punpcklwd  %mm3,%mm3               /* replicate low 2 */
+  punpckhwd  %mm4,%mm4               /* replicate high 2 */
+  psubw      %mm3,%mm1               /* 4 low G */
+  psraw      GRightShift(%esp),%mm1  /* low G scaled down by 6+(8-5) */
+  psubw      %mm4,%mm5               /* 4 high G values in signed 16 bit */
+  psraw      GRightShift(%esp),%mm5  /* high G scaled down by 6+(8-5) */
+  paddusb    BUpperLimit(%esp),%mm2  /* mm1: saturate B+0FF-15 */
+  packuswb   %mm5,%mm1               /*mm1: G7 G6 G5 G4 G3 G2 G1 G0 */
+  psubusb    BUpperLimit(%esp),%mm2
+  paddusb    GUpperLimit(%esp),%mm1  /* G */
+  psubusb    GUpperLimit(%esp),%mm1
+  paddusb    RUpperLimit(%esp),%mm0  /* R */
+  movl       tmpCCOPitch(%esp),%eax
+  psubusb    RUpperLimit(%esp),%mm0
+
+/*
+ * here we are packing from RGB24 to RGB16
+ *        mm1: G7 G6 G5 G4 G3 G2 G1 G0
+ *        mm2: B7 B6 B5 B4 B3 B2 B1 B0
+ *        mm0: R7 R6 R5 R4 R3 R2 R1 R0
+ * output:
+ *        mm2- result: 4 low RGB16
+ *        mm7- result: 4 high RGB16
+ * using: mm4- zero register
+ *        mm3- temporary results
+ */
+
+  psllq      RLeftShift(%esp),%mm0   /* position R in the most significant 
+                                        part of the byte */
+  movq       %mm2,%mm7               /* mm7: Save B */
+
+/*
+ * note: no need for shift to place B on the least significant part of the byte
+ *   R in left position, B in the right position so they can be combined
+ */
+
+  punpcklbw  %mm0,%mm2               /* mm1: 4 low 16 bit RB */
+  pxor       %mm4,%mm4               /* mm4: 0 */
+  movq       %mm1,%mm3               /* mm3: G */
+  punpckhbw  %mm0,%mm7               /* mm7: 4 high 16 bit RB */
+  punpcklbw  %mm4,%mm1               /* mm1: low 4 G 16 bit */
+  punpckhbw  %mm4,%mm3               /* mm3: high 4 G 16 bit */
+  psllw      GLeftShift(%esp),%mm1   /* shift low G 5 positions */
+  por        %mm1,%mm2               /* mm2: low RBG16 */
+  psllw      GLeftShift(%esp),%mm3   /* shift high G 5 positions */
+  por        %mm3,%mm7               /* mm7: high RBG16 */
+  movq       %mm2,(%edi,%eax,)
+  movq       %mm7,8(%edi,%eax,)      /* aligned */
+  addl       $16,%edi                /* ih take 16 bytes (8 pixels-16 bit) */
+  addl       $4,%ebx                 /* ? to take 4 pixels together
+                                        instead of 2 */
+  jl         do_next_8x2_block
+
+  addl       CCOSkipDistance(%esp),%edi /* go to begin of next line */
+  addl       tmpCCOPitch(%esp),%edi     /* skip odd line (if it is needed) */
+// Leax       AspectCount
+// Lebp       CCOPitch               ; skip odd line
+
+// sub        eax, 2
+// jg         @f
+
+// Addeax     AspectBaseCount
+// xor        ebp, ebp
+
+//@@:
+//  Seax       AspectCount
+//  add        edi, ebp
+
+  movl       YPitch(%esp),%eax
+  movl       tmpYCursorOdd(%esp),%ebp
+  addl       %eax,%ebp               /* skip one line */
+//  lea        ebp, [ebp+2*eax]        /* skip two lines */
+  movl       %ebp,tmpYCursorEven(%esp)
+//  Sebp       tmpYCursorOdd
+
+  addl       %eax,%ebp               /* skip one line */
+  movl       %ebp,tmpYCursorOdd(%esp)
+//  Lebp       tmpYCursorEven
+//  lea        ebp, [ebp+2*eax]
+//  Sebp       tmpYCursorEven
+
+
+  addl       ChromaPitch(%esp),%esi
+  addl       ChromaPitch(%esp),%edx
+
+
+//  Leax       YLimit                  /* Done with last line? */
+//  cmp        ebp, eax
+//  jbe        PrepareChromaLine
+  subw       $2,FrameHeight(%esp)
+  ja         PrepareChromaLine
+
+/******************************************************************************/
+
+finish: 
+  emms
+  addl       $LocalFrameSize,%esp
+
+  popl       %ebx
+  popl       %ebp
+  popl       %edi
+  popl       %esi
+  ret
+