]> git.sesse.net Git - vlc/blob - plugins/yuv/video_yuvmmx.c
c7cf67f79cc3a801ec97a8d95bcfa2248fbdfe91
[vlc] / plugins / yuv / video_yuvmmx.c
1 /*****************************************************************************
2  * video_yuvmmx.c: MMX YUV transformation functions
3  * Provides functions to perform the YUV conversion.
4  *****************************************************************************
5  * Copyright (C) 1999, 2000 VideoLAN
6  * $Id: video_yuvmmx.c,v 1.7 2001/03/21 13:42:34 sam Exp $
7  *
8  * Authors: Samuel Hocevar <sam@zoy.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public
21  * License along with this program; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  *****************************************************************************/
25
26 #define MODULE_NAME yuvmmx
27 #include "modules_inner.h"
28
29 /*****************************************************************************
30  * Preamble
31  *****************************************************************************/
32 #include "defs.h"
33
34 #include <math.h>                                            /* exp(), pow() */
35 #include <errno.h>                                                 /* ENOMEM */
36 #include <stdlib.h>                                                /* free() */
37 #include <string.h>                                            /* strerror() */
38
39 #include "config.h"
40 #include "common.h"
41 #include "threads.h"
42 #include "mtime.h"
43 #include "tests.h"
44
45 #include "modules.h"
46
47 #include "video.h"
48 #include "video_output.h"
49
50 #include "video_common.h"
51
52 #include "intf_msg.h"
53
54 static int     yuv_Probe      ( probedata_t *p_data );
55 static int     yuv_Init       ( vout_thread_t *p_vout );
56 static int     yuv_Reset      ( vout_thread_t *p_vout );
57 static void    yuv_End        ( vout_thread_t *p_vout );
58
59 static void    SetYUV         ( vout_thread_t *p_vout );
60
61 /*****************************************************************************
62  * Functions exported as capabilities. They are declared as static so that
63  * we don't pollute the namespace too much.
64  *****************************************************************************/
65 void _M( yuv_getfunctions )( function_list_t * p_function_list )
66 {
67     p_function_list->pf_probe = yuv_Probe;
68     p_function_list->functions.yuv.pf_init = yuv_Init;
69     p_function_list->functions.yuv.pf_reset = yuv_Reset;
70     p_function_list->functions.yuv.pf_end = yuv_End;
71 }
72
73 /*****************************************************************************
74  * yuv_Probe: tests probe the audio device and return a score
75  *****************************************************************************
76  * This function tries to open the DSP and returns a score to the plugin
77  * manager so that it can choose the most appropriate one.
78  *****************************************************************************/
79 static int yuv_Probe( probedata_t *p_data )
80 {
81     /* Test for MMX support in the CPU */
82     if( TestCPU( CPU_CAPABILITY_MMX ) )
83     {
84         if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) )
85         {
86             return( 999 );
87         }
88         else
89         {
90             return( 100 );
91         }
92     }
93     else
94     {
95         return( 0 );
96     }
97 }
98
99 /*****************************************************************************
100  * yuv_Init: allocate and initialize translations tables
101  *****************************************************************************
102  * This function will allocate memory to store translation tables, depending
103  * of the screen depth.
104  *****************************************************************************/
105 static int yuv_Init( vout_thread_t *p_vout )
106 {
107     size_t      tables_size;                        /* tables size, in bytes */
108
109     /* Computes tables size for 8bbp only */
110     if( p_vout->i_bytes_per_pixel == 1 )
111     {
112         tables_size = sizeof( u8 )
113                 * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : PALETTE_TABLE_SIZE);
114
115         /* Allocate memory */
116         p_vout->yuv.p_base = malloc( tables_size );
117         if( p_vout->yuv.p_base == NULL )
118         {
119             intf_ErrMsg("error: %s", strerror(ENOMEM));
120             return( 1 );
121         }
122     }
123     else
124     {
125         p_vout->yuv.p_base = NULL;
126     }
127
128     /* Allocate memory for conversion buffer and offset array */
129     p_vout->yuv.p_buffer = malloc( VOUT_MAX_WIDTH * p_vout->i_bytes_per_pixel );
130     if( p_vout->yuv.p_buffer == NULL )
131     {
132         intf_ErrMsg("error: %s", strerror(ENOMEM));
133         free( p_vout->yuv.p_base );
134         return( 1 );
135     }
136     p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );
137     if( p_vout->yuv.p_offset == NULL )
138     {
139         intf_ErrMsg("error: %s", strerror(ENOMEM));
140         free( p_vout->yuv.p_base );
141         free( p_vout->yuv.p_buffer );
142         return( 1 );
143     }
144
145     /* Initialize tables */
146     SetYUV( p_vout );
147     return( 0 );
148 }
149
150 /*****************************************************************************
151  * yuv_End: destroy translations tables
152  *****************************************************************************
153  * Free memory allocated by yuv_CCreate.
154  *****************************************************************************/
155 static void yuv_End( vout_thread_t *p_vout )
156 {
157     free( p_vout->yuv.p_base );
158     free( p_vout->yuv.p_buffer );
159     free( p_vout->yuv.p_offset );
160 }
161
162 /*****************************************************************************
163  * yuv_Reset: re-initialize translations tables
164  *****************************************************************************
165  * This function will initialize the tables allocated by vout_CreateTables and
166  * set functions pointers.
167  *****************************************************************************/
168 static int yuv_Reset( vout_thread_t *p_vout )
169 {
170     yuv_End( p_vout );
171     return( yuv_Init( p_vout ) );
172 }
173
174 /*****************************************************************************
175  * SetYUV: compute tables and set function pointers
176  *****************************************************************************/
177 static void SetYUV( vout_thread_t *p_vout )
178 {
179     int         i_index;                                  /* index in tables */
180
181     /*
182      * Set pointers and build YUV tables
183      */
184     if( p_vout->b_grayscale )
185     {
186         /* Grayscale: build gray table */
187         if( p_vout->i_bytes_per_pixel == 1 )
188         {
189             u16 bright[256], transp[256];
190
191             for( i_index = 0; i_index < 256; i_index++)
192             {
193                 bright[ i_index ] = i_index << 8;
194                 transp[ i_index ] = 0;
195             }
196             /* the colors have been allocated, we can set the palette */
197             p_vout->pf_setpalette( p_vout, bright, bright, bright, transp );
198             p_vout->i_white_pixel = 0xff;
199             p_vout->i_black_pixel = 0x00;
200             p_vout->i_gray_pixel = 0x44;
201             p_vout->i_blue_pixel = 0x3b;
202         }
203     }
204     else
205     {
206         /* Color: build red, green and blue tables */
207         if( p_vout->i_bytes_per_pixel == 1 )
208         {
209             #define RGB_MIN 0
210             #define RGB_MAX 255
211             #define CLIP( x ) ( ((x < 0) ? 0 : (x > 255) ? 255 : x) << 8 )
212             #define SHIFT 20
213             #define U_GREEN_COEF    ((int)(-0.391 * (1<<SHIFT) / 1.164))
214             #define U_BLUE_COEF     ((int)(2.018 * (1<<SHIFT) / 1.164))
215             #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
216             #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))
217
218             int y,u,v;
219             int r,g,b;
220             int uvr, uvg, uvb;
221             int i = 0, j = 0;
222             u16 red[256], green[256], blue[256], transp[256];
223             unsigned char lookup[PALETTE_TABLE_SIZE];
224
225             p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
226
227             /* this loop calculates the intersection of an YUV box
228              * and the RGB cube. */
229             for ( y = 0; y <= 256; y += 16 )
230             {
231                 for ( u = 0; u <= 256; u += 32 )
232                 for ( v = 0; v <= 256; v += 32 )
233                 {
234                     uvr = (V_RED_COEF*(v-128)) >> SHIFT;
235                     uvg = (U_GREEN_COEF*(u-128) + V_GREEN_COEF*(v-128)) >> SHIFT;
236                     uvb = (U_BLUE_COEF*(u-128)) >> SHIFT;
237                     r = y + uvr;
238                     g = y + uvg;
239                     b = y + uvb;
240
241                     if( r >= RGB_MIN && g >= RGB_MIN && b >= RGB_MIN
242                             && r <= RGB_MAX && g <= RGB_MAX && b <= RGB_MAX )
243                     {
244                         /* this one should never happen unless someone fscked up my code */
245                         if(j == 256) { intf_ErrMsg( "vout error: no colors left to build palette" ); break; }
246
247                         /* clip the colors */
248                         red[j] = CLIP( r );
249                         green[j] = CLIP( g );
250                         blue[j] = CLIP( b );
251                         transp[j] = 0;
252
253                         /* allocate color */
254                         lookup[i] = 1;
255                         p_vout->yuv.yuv.p_rgb8[i++] = j;
256                         j++;
257                     }
258                     else
259                     {
260                         lookup[i] = 0;
261                         p_vout->yuv.yuv.p_rgb8[i++] = 0;
262                     }
263                 }
264                 i += 128-81;
265             }
266
267             /* the colors have been allocated, we can set the palette */
268             /* there will eventually be a way to know which colors
269              * couldn't be allocated and try to find a replacement */
270             p_vout->pf_setpalette( p_vout, red, green, blue, transp );
271
272             p_vout->i_white_pixel = 0xff;
273             p_vout->i_black_pixel = 0x00;
274             p_vout->i_gray_pixel = 0x44;
275             p_vout->i_blue_pixel = 0x3b;
276
277             i = 0;
278             /* this loop allocates colors that got outside
279              * the RGB cube */
280             for ( y = 0; y <= 256; y += 16 )
281             {
282                 for ( u = 0; u <= 256; u += 32 )
283                 {
284                     for ( v = 0; v <= 256; v += 32 )
285                     {
286                         int u2, v2;
287                         int dist, mindist = 100000000;
288
289                         if( lookup[i] || y==0)
290                         {
291                             i++;
292                             continue;
293                         }
294
295                         /* heavy. yeah. */
296                         for( u2 = 0; u2 <= 256; u2 += 32 )
297                         for( v2 = 0; v2 <= 256; v2 += 32 )
298                         {
299                             j = ((y>>4)<<7) + (u2>>5)*9 + (v2>>5);
300                             dist = (u-u2)*(u-u2) + (v-v2)*(v-v2);
301                             if( lookup[j] )
302                             /* find the nearest color */
303                             if( dist < mindist )
304                             {
305                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
306                                 mindist = dist;
307                             }
308                             j -= 128;
309                             if( lookup[j] )
310                             /* find the nearest color */
311                             if( dist + 128 < mindist )
312                             {
313                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
314                                 mindist = dist + 128;
315                             }
316                         }
317                         i++;
318                     }
319                 }
320                 i += 128-81;
321             }
322         }
323     }
324
325     /*
326      * Set functions pointers
327      */
328     if( p_vout->b_YCbr)
329     {
330         switch( p_vout->i_bytes_per_pixel)
331         {
332         case 1:
333             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420YCbr8;
334             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422YCbr8;
335             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444YCbr8;
336             break;
337         
338         case 2:
339             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420YCbr16;
340             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422YCbr16;
341             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444YCbr16;
342             break;
343         
344         case 3:
345             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420YCbr24;
346             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422YCbr24;
347             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444YCbr24;
348             break;
349         
350         case 4:
351             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420YCbr32;
352             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422YCbr32;
353             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444YCbr32;
354             break;
355         }
356     }    
357     else if( p_vout->b_grayscale )
358     {
359         /* Grayscale */
360         switch( p_vout->i_bytes_per_pixel )
361         {
362         case 1:
363             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertY4Gray8;
364             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertY4Gray8;
365             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertY4Gray8;
366             break;
367         case 2:
368             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertY4Gray16;
369             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertY4Gray16;
370             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertY4Gray16;
371             break;
372         case 3:
373             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420RGB24;
374             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertY4Gray24;
375             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertY4Gray24;
376             break;
377         case 4:
378             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420RGB32;
379             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertY4Gray32;
380             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertY4Gray32;
381             break;
382         }
383     }
384     else
385     {
386         /* Color */
387         switch( p_vout->i_bytes_per_pixel )
388         {
389         case 1:
390             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420RGB8;
391             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422RGB8;
392             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444RGB8;
393             break;
394         case 2:
395             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420RGB16;
396             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422RGB16;
397             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444RGB16;
398             break;
399         case 3:
400             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420RGB24;
401             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422RGB24;
402             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444RGB24;
403             break;
404         case 4:
405             p_vout->yuv.pf_yuv420 = (vout_yuv_convert_t *) ConvertYUV420RGB32;
406             p_vout->yuv.pf_yuv422 = (vout_yuv_convert_t *) ConvertYUV422RGB32;
407             p_vout->yuv.pf_yuv444 = (vout_yuv_convert_t *) ConvertYUV444RGB32;
408             break;
409         }
410     }
411 }
412
413 /*****************************************************************************
414  * SetOffset: build offset array for conversion functions
415  *****************************************************************************
416  * This function will build an offset array used in later conversion functions.
417  * It will also set horizontal and vertical scaling indicators. If b_double
418  * is set, the p_offset structure has interleaved Y and U/V offsets.
419  *****************************************************************************/
420 void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
421                 boolean_t *pb_h_scaling, int *pi_v_scaling,
422                 int *p_offset, boolean_t b_double )
423 {
424     int i_x;                                    /* x position in destination */
425     int i_scale_count;                                     /* modulo counter */
426
427     /*
428      * Prepare horizontal offset array
429      */
430     if( i_pic_width - i_width == 0 )
431     {
432         /* No horizontal scaling: YUV conversion is done directly to picture */
433         *pb_h_scaling = 0;
434     }
435     else if( i_pic_width - i_width > 0 )
436     {
437         /* Prepare scaling array for horizontal extension */
438         *pb_h_scaling =  1;
439         i_scale_count =  i_pic_width;
440         for( i_x = i_width; i_x--; )
441         {
442             while( (i_scale_count -= i_width) > 0 )
443             {
444                 *p_offset++ = 0;
445             }
446             *p_offset++ = 1;
447             i_scale_count += i_pic_width;
448         }
449     }
450     else /* if( i_pic_width - i_width < 0 ) */
451     {
452         /* Prepare scaling array for horizontal reduction */
453         *pb_h_scaling =  1;
454         i_scale_count =  i_width;
455         for( i_x = i_pic_width; i_x--; )
456         {
457             *p_offset = 1;
458             while( (i_scale_count -= i_pic_width) > 0 )
459             {
460                 *p_offset += 1;
461             }
462             p_offset++;
463             i_scale_count += i_width;
464         }
465     }
466
467     /*
468      * Set vertical scaling indicator
469      */
470     if( i_pic_height - i_height == 0 )
471     {
472         *pi_v_scaling = 0;
473     }
474     else if( i_pic_height - i_height > 0 )
475     {
476         *pi_v_scaling = 1;
477     }
478     else /* if( i_pic_height - i_height < 0 ) */
479     {
480         *pi_v_scaling = -1;
481     }
482 }
483