]> git.sesse.net Git - vlc/blob - plugins/yuv/video_yuvmmx.c
* AC3 IMDCT and downmix functions are now in plugins, --imdct and
[vlc] / plugins / yuv / video_yuvmmx.c
1 /*****************************************************************************
2  * video_yuvmmx.c: MMX YUV transformation functions
3  * Provides functions to perform the YUV conversion.
4  *****************************************************************************
5  * Copyright (C) 1999, 2000 VideoLAN
6  * $Id: video_yuvmmx.c,v 1.9 2001/05/15 16:19:42 sam Exp $
7  *
8  * Authors: Samuel Hocevar <sam@zoy.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public
21  * License along with this program; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  *****************************************************************************/
25
26 #define MODULE_NAME yuvmmx
27 #include "modules_inner.h"
28
29 /*****************************************************************************
30  * Preamble
31  *****************************************************************************/
32 #include "defs.h"
33
34 #include <math.h>                                            /* exp(), pow() */
35 #include <errno.h>                                                 /* ENOMEM */
36 #include <stdlib.h>                                                /* free() */
37 #include <string.h>                                            /* strerror() */
38
39 #include "config.h"
40 #include "common.h"
41 #include "threads.h"
42 #include "mtime.h"
43 #include "tests.h"
44
45 #include "modules.h"
46
47 #include "video.h"
48 #include "video_output.h"
49
50 #include "video_common.h"
51
52 #include "intf_msg.h"
53
54 static int     yuv_Probe      ( probedata_t *p_data );
55 static int     yuv_Init       ( vout_thread_t *p_vout );
56 static int     yuv_Reset      ( vout_thread_t *p_vout );
57 static void    yuv_End        ( vout_thread_t *p_vout );
58
59 static void    SetYUV         ( vout_thread_t *p_vout );
60
61 /*****************************************************************************
62  * Functions exported as capabilities. They are declared as static so that
63  * we don't pollute the namespace too much.
64  *****************************************************************************/
65 void _M( yuv_getfunctions )( function_list_t * p_function_list )
66 {
67     p_function_list->pf_probe = yuv_Probe;
68     p_function_list->functions.yuv.pf_init = yuv_Init;
69     p_function_list->functions.yuv.pf_reset = yuv_Reset;
70     p_function_list->functions.yuv.pf_end = yuv_End;
71 }
72
73 /*****************************************************************************
74  * yuv_Probe: tests probe the audio device and return a score
75  *****************************************************************************
76  * This function tries to open the DSP and returns a score to the plugin
77  * manager so that it can choose the most appropriate one.
78  *****************************************************************************/
79 static int yuv_Probe( probedata_t *p_data )
80 {
81     /* Test for MMX support in the CPU */
82     if( !TestCPU( CPU_CAPABILITY_MMX ) )
83     {
84         return( 0 );
85     }
86
87     if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) )
88     {
89         return( 999 );
90     }
91
92     return( 100 );
93 }
94
95 /*****************************************************************************
96  * yuv_Init: allocate and initialize translations tables
97  *****************************************************************************
98  * This function will allocate memory to store translation tables, depending
99  * of the screen depth.
100  *****************************************************************************/
101 static int yuv_Init( vout_thread_t *p_vout )
102 {
103     size_t      tables_size;                        /* tables size, in bytes */
104
105     /* Computes tables size for 8bbp only */
106     if( p_vout->i_bytes_per_pixel == 1 )
107     {
108         tables_size = sizeof( u8 )
109                 * (p_vout->b_grayscale ? GRAY_TABLE_SIZE : PALETTE_TABLE_SIZE);
110
111         /* Allocate memory */
112         p_vout->yuv.p_base = malloc( tables_size );
113         if( p_vout->yuv.p_base == NULL )
114         {
115             intf_ErrMsg("error: %s", strerror(ENOMEM));
116             return( 1 );
117         }
118     }
119     else
120     {
121         p_vout->yuv.p_base = NULL;
122     }
123
124     /* Allocate memory for conversion buffer and offset array */
125     p_vout->yuv.p_buffer = malloc( VOUT_MAX_WIDTH * p_vout->i_bytes_per_pixel );
126     if( p_vout->yuv.p_buffer == NULL )
127     {
128         intf_ErrMsg("error: %s", strerror(ENOMEM));
129         free( p_vout->yuv.p_base );
130         return( 1 );
131     }
132     p_vout->yuv.p_offset = malloc( p_vout->i_width * sizeof( int ) );
133     if( p_vout->yuv.p_offset == NULL )
134     {
135         intf_ErrMsg("error: %s", strerror(ENOMEM));
136         free( p_vout->yuv.p_base );
137         free( p_vout->yuv.p_buffer );
138         return( 1 );
139     }
140
141     /* Initialize tables */
142     SetYUV( p_vout );
143     return( 0 );
144 }
145
146 /*****************************************************************************
147  * yuv_End: destroy translations tables
148  *****************************************************************************
149  * Free memory allocated by yuv_CCreate.
150  *****************************************************************************/
151 static void yuv_End( vout_thread_t *p_vout )
152 {
153     free( p_vout->yuv.p_base );
154     free( p_vout->yuv.p_buffer );
155     free( p_vout->yuv.p_offset );
156 }
157
158 /*****************************************************************************
159  * yuv_Reset: re-initialize translations tables
160  *****************************************************************************
161  * This function will initialize the tables allocated by vout_CreateTables and
162  * set functions pointers.
163  *****************************************************************************/
164 static int yuv_Reset( vout_thread_t *p_vout )
165 {
166     yuv_End( p_vout );
167     return( yuv_Init( p_vout ) );
168 }
169
170 /*****************************************************************************
171  * SetYUV: compute tables and set function pointers
172  *****************************************************************************/
173 static void SetYUV( vout_thread_t *p_vout )
174 {
175     int         i_index;                                  /* index in tables */
176
177     /*
178      * Set pointers and build YUV tables
179      */
180     if( p_vout->b_grayscale )
181     {
182         /* Grayscale: build gray table */
183         if( p_vout->i_bytes_per_pixel == 1 )
184         {
185             u16 bright[256], transp[256];
186
187             for( i_index = 0; i_index < 256; i_index++)
188             {
189                 bright[ i_index ] = i_index << 8;
190                 transp[ i_index ] = 0;
191             }
192             /* the colors have been allocated, we can set the palette */
193             p_vout->pf_setpalette( p_vout, bright, bright, bright, transp );
194             p_vout->i_white_pixel = 0xff;
195             p_vout->i_black_pixel = 0x00;
196             p_vout->i_gray_pixel = 0x44;
197             p_vout->i_blue_pixel = 0x3b;
198         }
199     }
200     else
201     {
202         /* Color: build red, green and blue tables */
203         if( p_vout->i_bytes_per_pixel == 1 )
204         {
205             #define RGB_MIN 0
206             #define RGB_MAX 255
207             #define CLIP( x ) ( ((x < 0) ? 0 : (x > 255) ? 255 : x) << 8 )
208             #define SHIFT 20
209             #define U_GREEN_COEF    ((int)(-0.391 * (1<<SHIFT) / 1.164))
210             #define U_BLUE_COEF     ((int)(2.018 * (1<<SHIFT) / 1.164))
211             #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
212             #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))
213
214             int y,u,v;
215             int r,g,b;
216             int uvr, uvg, uvb;
217             int i = 0, j = 0;
218             u16 red[256], green[256], blue[256], transp[256];
219             unsigned char lookup[PALETTE_TABLE_SIZE];
220
221             p_vout->yuv.yuv.p_rgb8 = (u8 *)p_vout->yuv.p_base;
222
223             /* this loop calculates the intersection of an YUV box
224              * and the RGB cube. */
225             for ( y = 0; y <= 256; y += 16 )
226             {
227                 for ( u = 0; u <= 256; u += 32 )
228                 for ( v = 0; v <= 256; v += 32 )
229                 {
230                     uvr = (V_RED_COEF*(v-128)) >> SHIFT;
231                     uvg = (U_GREEN_COEF*(u-128) + V_GREEN_COEF*(v-128)) >> SHIFT;
232                     uvb = (U_BLUE_COEF*(u-128)) >> SHIFT;
233                     r = y + uvr;
234                     g = y + uvg;
235                     b = y + uvb;
236
237                     if( r >= RGB_MIN && g >= RGB_MIN && b >= RGB_MIN
238                             && r <= RGB_MAX && g <= RGB_MAX && b <= RGB_MAX )
239                     {
240                         /* this one should never happen unless someone fscked up my code */
241                         if(j == 256) { intf_ErrMsg( "vout error: no colors left to build palette" ); break; }
242
243                         /* clip the colors */
244                         red[j] = CLIP( r );
245                         green[j] = CLIP( g );
246                         blue[j] = CLIP( b );
247                         transp[j] = 0;
248
249                         /* allocate color */
250                         lookup[i] = 1;
251                         p_vout->yuv.yuv.p_rgb8[i++] = j;
252                         j++;
253                     }
254                     else
255                     {
256                         lookup[i] = 0;
257                         p_vout->yuv.yuv.p_rgb8[i++] = 0;
258                     }
259                 }
260                 i += 128-81;
261             }
262
263             /* the colors have been allocated, we can set the palette */
264             /* there will eventually be a way to know which colors
265              * couldn't be allocated and try to find a replacement */
266             p_vout->pf_setpalette( p_vout, red, green, blue, transp );
267
268             p_vout->i_white_pixel = 0xff;
269             p_vout->i_black_pixel = 0x00;
270             p_vout->i_gray_pixel = 0x44;
271             p_vout->i_blue_pixel = 0x3b;
272
273             i = 0;
274             /* this loop allocates colors that got outside
275              * the RGB cube */
276             for ( y = 0; y <= 256; y += 16 )
277             {
278                 for ( u = 0; u <= 256; u += 32 )
279                 {
280                     for ( v = 0; v <= 256; v += 32 )
281                     {
282                         int u2, v2;
283                         int dist, mindist = 100000000;
284
285                         if( lookup[i] || y==0)
286                         {
287                             i++;
288                             continue;
289                         }
290
291                         /* heavy. yeah. */
292                         for( u2 = 0; u2 <= 256; u2 += 32 )
293                         for( v2 = 0; v2 <= 256; v2 += 32 )
294                         {
295                             j = ((y>>4)<<7) + (u2>>5)*9 + (v2>>5);
296                             dist = (u-u2)*(u-u2) + (v-v2)*(v-v2);
297                             if( lookup[j] )
298                             /* find the nearest color */
299                             if( dist < mindist )
300                             {
301                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
302                                 mindist = dist;
303                             }
304                             j -= 128;
305                             if( lookup[j] )
306                             /* find the nearest color */
307                             if( dist + 128 < mindist )
308                             {
309                                 p_vout->yuv.yuv.p_rgb8[i] = p_vout->yuv.yuv.p_rgb8[j];
310                                 mindist = dist + 128;
311                             }
312                         }
313                         i++;
314                     }
315                 }
316                 i += 128-81;
317             }
318         }
319     }
320
321     /*
322      * Set functions pointers
323      */
324     if( p_vout->b_YCbr)
325     {
326         switch( p_vout->i_bytes_per_pixel)
327         {
328 #define _X( foo ) (vout_yuv_convert_t *) _M( foo )
329         case 1:
330             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420YCbr8 );
331             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422YCbr8 );
332             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444YCbr8 );
333             break;
334         
335         case 2:
336             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420YCbr16 );
337             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422YCbr16 );
338             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444YCbr16 );
339             break;
340         
341         case 3:
342             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420YCbr24 );
343             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422YCbr24 );
344             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444YCbr24 );
345             break;
346         
347         case 4:
348             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420YCbr32 );
349             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422YCbr32 );
350             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444YCbr32 );
351             break;
352 #undef _X
353         }
354     }    
355     else if( p_vout->b_grayscale )
356     {
357         /* Grayscale */
358         switch( p_vout->i_bytes_per_pixel )
359         {
360 #define _X( foo ) (vout_yuv_convert_t *) _M( foo )
361         case 1:
362             p_vout->yuv.pf_yuv420 = _X( ConvertY4Gray8 );
363             p_vout->yuv.pf_yuv422 = _X( ConvertY4Gray8 );
364             p_vout->yuv.pf_yuv444 = _X( ConvertY4Gray8 );
365             break;
366         case 2:
367             p_vout->yuv.pf_yuv420 = _X( ConvertY4Gray16 );
368             p_vout->yuv.pf_yuv422 = _X( ConvertY4Gray16 );
369             p_vout->yuv.pf_yuv444 = _X( ConvertY4Gray16 );
370             break;
371         case 3:
372             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420RGB24 );
373             p_vout->yuv.pf_yuv422 = _X( ConvertY4Gray24 );
374             p_vout->yuv.pf_yuv444 = _X( ConvertY4Gray24 );
375             break;
376         case 4:
377             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420RGB32 );
378             p_vout->yuv.pf_yuv422 = _X( ConvertY4Gray32 );
379             p_vout->yuv.pf_yuv444 = _X( ConvertY4Gray32 );
380             break;
381 #undef _X
382         }
383     }
384     else
385     {
386         /* Color */
387         switch( p_vout->i_bytes_per_pixel )
388         {
389 #define _X( foo ) (vout_yuv_convert_t *) _M( foo )
390         case 1:
391             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420RGB8 );
392             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422RGB8 );
393             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444RGB8 );
394             break;
395         case 2:
396             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420RGB16 );
397             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422RGB16 );
398             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444RGB16 );
399             break;
400         case 3:
401             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420RGB24 );
402             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422RGB24 );
403             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444RGB24 );
404             break;
405         case 4:
406             p_vout->yuv.pf_yuv420 = _X( ConvertYUV420RGB32 );
407             p_vout->yuv.pf_yuv422 = _X( ConvertYUV422RGB32 );
408             p_vout->yuv.pf_yuv444 = _X( ConvertYUV444RGB32 );
409             break;
410 #undef _X
411         }
412     }
413 }
414
415 /*****************************************************************************
416  * SetOffset: build offset array for conversion functions
417  *****************************************************************************
418  * This function will build an offset array used in later conversion functions.
419  * It will also set horizontal and vertical scaling indicators. If b_double
420  * is set, the p_offset structure has interleaved Y and U/V offsets.
421  *****************************************************************************/
422 void _M( SetOffset )( int i_width, int i_height, int i_pic_width,
423                       int i_pic_height, boolean_t *pb_h_scaling,
424                       int *pi_v_scaling, int *p_offset, boolean_t b_double )
425 {
426     int i_x;                                    /* x position in destination */
427     int i_scale_count;                                     /* modulo counter */
428
429     /*
430      * Prepare horizontal offset array
431      */
432     if( i_pic_width - i_width == 0 )
433     {
434         /* No horizontal scaling: YUV conversion is done directly to picture */
435         *pb_h_scaling = 0;
436     }
437     else if( i_pic_width - i_width > 0 )
438     {
439         /* Prepare scaling array for horizontal extension */
440         *pb_h_scaling =  1;
441         i_scale_count =  i_pic_width;
442         for( i_x = i_width; i_x--; )
443         {
444             while( (i_scale_count -= i_width) > 0 )
445             {
446                 *p_offset++ = 0;
447             }
448             *p_offset++ = 1;
449             i_scale_count += i_pic_width;
450         }
451     }
452     else /* if( i_pic_width - i_width < 0 ) */
453     {
454         /* Prepare scaling array for horizontal reduction */
455         *pb_h_scaling =  1;
456         i_scale_count =  i_width;
457         for( i_x = i_pic_width; i_x--; )
458         {
459             *p_offset = 1;
460             while( (i_scale_count -= i_pic_width) > 0 )
461             {
462                 *p_offset += 1;
463             }
464             p_offset++;
465             i_scale_count += i_width;
466         }
467     }
468
469     /*
470      * Set vertical scaling indicator
471      */
472     if( i_pic_height - i_height == 0 )
473     {
474         *pi_v_scaling = 0;
475     }
476     else if( i_pic_height - i_height > 0 )
477     {
478         *pi_v_scaling = 1;
479     }
480     else /* if( i_pic_height - i_height < 0 ) */
481     {
482         *pi_v_scaling = -1;
483     }
484 }
485