]> git.sesse.net Git - vlc/blob - src/video_decoder/video_decoder.c
. SPU decoder now uses Meuuh's GetChunk() code.
[vlc] / src / video_decoder / video_decoder.c
1 /*****************************************************************************
2  * video_decoder.c : video decoder thread
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: video_decoder.c,v 1.43 2000/12/23 03:10:59 sam Exp $
6  *
7  * Authors: Christophe Massiot <massiot@via.ecp.fr>
8  *          GaĆ«l Hendryckx <jimmy@via.ecp.fr>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  * 
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include "defs.h"
29
30 #include <stdlib.h>                                                /* free() */
31 #include <unistd.h>                                              /* getpid() */
32 #include <sys/types.h>                        /* on BSD, uio.h needs types.h */
33 #include <sys/uio.h>                                          /* for input.h */
34 #include <errno.h>                                                  /* errno */
35
36 #include "config.h"
37 #include "common.h"
38 #include "threads.h"
39 #include "mtime.h"
40 #include "plugins.h"
41
42 #include "intf_msg.h"
43
44 #include "stream_control.h"
45 #include "input_ext-dec.h"
46
47 #include "video.h"
48 #include "video_output.h"
49
50 #include "vdec_idct.h"
51 #include "video_decoder.h"
52 #include "vdec_motion.h"
53
54 #include "vpar_blocks.h"
55 #include "vpar_headers.h"
56 #include "vpar_synchro.h"
57 #include "video_parser.h"
58 #include "video_fifo.h"
59
60 /*
61  * Local prototypes
62  */
63 #ifdef VDEC_SMP
64 static int      vdec_InitThread     ( vdec_thread_t *p_vdec );
65 #endif
66 static void     RunThread           ( vdec_thread_t *p_vdec );
67 static void     ErrorThread         ( vdec_thread_t *p_vdec );
68 static void     EndThread           ( vdec_thread_t *p_vdec );
69
70 /*****************************************************************************
71  * vdec_CreateThread: create a video decoder thread
72  *****************************************************************************
73  * This function creates a new video decoder thread, and returns a pointer
74  * to its description. On error, it returns NULL.
75  * Following configuration properties are used:
76  * XXX??
77  *****************************************************************************/
78 vdec_thread_t * vdec_CreateThread( vpar_thread_t *p_vpar /*, int *pi_status */ )
79 {
80     vdec_thread_t *     p_vdec;
81
82     intf_DbgMsg("vdec debug: creating video decoder thread");
83
84     /* Allocate the memory needed to store the thread's structure */
85     if ( (p_vdec = (vdec_thread_t *)malloc( sizeof(vdec_thread_t) )) == NULL )
86     {
87         intf_ErrMsg("vdec error: not enough memory for vdec_CreateThread() to create the new thread");
88         return( NULL );
89     }
90
91     /*
92      * Initialize the thread properties
93      */
94     p_vdec->b_die = 0;
95     p_vdec->b_error = 0;
96
97     /*
98      * Initialize the parser properties
99      */
100     p_vdec->p_vpar = p_vpar;
101
102     /* Spawn the video decoder thread */
103     if ( vlc_thread_create(&p_vdec->thread_id, "video decoder",
104          (vlc_thread_func_t)RunThread, (void *)p_vdec) )
105     {
106         intf_ErrMsg("vdec error: can't spawn video decoder thread");
107         free( p_vdec );
108         return( NULL );
109     }
110
111     intf_DbgMsg("vdec debug: video decoder thread (%p) created", p_vdec);
112     return( p_vdec );
113 }
114
115 /*****************************************************************************
116  * vdec_DestroyThread: destroy a video decoder thread
117  *****************************************************************************
118  * Destroy and terminate thread. This function will return 0 if the thread could
119  * be destroyed, and non 0 else. The last case probably means that the thread
120  * was still active, and another try may succeed.
121  *****************************************************************************/
122 void vdec_DestroyThread( vdec_thread_t *p_vdec /*, int *pi_status */ )
123 {
124     intf_DbgMsg("vdec debug: requesting termination of video decoder thread %p", p_vdec);
125
126     /* Ask thread to kill itself */
127     p_vdec->b_die = 1;
128
129 #ifdef VDEC_SMP
130     /* Make sure the decoder thread leaves the vpar_GetMacroblock() function */
131     vlc_mutex_lock( &(p_vdec->p_vpar->vfifo.lock) );
132     vlc_cond_signal( &(p_vdec->p_vpar->vfifo.wait) );
133     vlc_mutex_unlock( &(p_vdec->p_vpar->vfifo.lock) );
134 #endif
135
136     /* Waiting for the decoder thread to exit */
137     /* Remove this as soon as the "status" flag is implemented */
138     vlc_thread_join( p_vdec->thread_id );
139 }
140
141 /* following functions are local */
142
143 /*****************************************************************************
144  * vdec_InitThread: initialize video decoder thread
145  *****************************************************************************
146  * This function is called from RunThread and performs the second step of the
147  * initialization. It returns 0 on success. Note that the thread's flag are not
148  * modified inside this function.
149  *****************************************************************************/
150 #ifdef VDEC_SMP
151 static int vdec_InitThread( vdec_thread_t *p_vdec )
152 #else
153 int vdec_InitThread( vdec_thread_t *p_vdec )
154 #endif
155 {
156 #ifndef HAVE_MMX
157     int i_dummy;
158 #endif
159
160     intf_DbgMsg("vdec debug: initializing video decoder thread %p", p_vdec);
161
162 #ifndef HAVE_MMX
163     /* Init crop table */
164     p_vdec->pi_crop = p_vdec->pi_crop_buf + (VDEC_CROPRANGE >> 1);
165     for( i_dummy = -(VDEC_CROPRANGE >> 1); i_dummy < 0; i_dummy++ )
166     {
167         p_vdec->pi_crop[i_dummy] = 0;
168     }
169     for( ; i_dummy < 255; i_dummy ++ )
170     {
171         p_vdec->pi_crop[i_dummy] = i_dummy;
172     }
173     for( ; i_dummy < (VDEC_CROPRANGE >> 1) -1; i_dummy++ )
174     {
175         p_vdec->pi_crop[i_dummy] = 255;
176     }
177 #endif
178
179 #ifdef VDEC_SMP
180     /* Re-nice ourself */
181     if( nice(VDEC_NICE) == -1 )
182     {
183         intf_WarnMsg( 2, "vdec warning : couldn't nice() (%s)",
184                       strerror(errno) );
185     }
186 #endif
187
188     /* Mark thread as running and return */
189     intf_DbgMsg("vdec debug: InitThread(%p) succeeded", p_vdec);
190     return( 0 );
191 }
192
193 /*****************************************************************************
194  * ErrorThread: RunThread() error loop
195  *****************************************************************************
196  * This function is called when an error occured during thread main's loop. The
197  * thread can still receive feed, but must be ready to terminate as soon as
198  * possible.
199  *****************************************************************************/
200 static void ErrorThread( vdec_thread_t *p_vdec )
201 {
202     macroblock_t *       p_mb;
203
204     /* Wait until a `die' order */
205     while( !p_vdec->b_die )
206     {
207         p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo );
208         vpar_DestroyMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
209     }
210 }
211
212 /*****************************************************************************
213  * EndThread: thread destruction
214  *****************************************************************************
215  * This function is called when the thread ends after a sucessful
216  * initialization.
217  *****************************************************************************/
218 static void EndThread( vdec_thread_t *p_vdec )
219 {
220     intf_DbgMsg("vdec debug: EndThread(%p)", p_vdec);
221 }
222
223 /*****************************************************************************
224  * AddBlock : add a block
225  *****************************************************************************/
226 #ifndef HAVE_MMX
227 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
228                                  yuv_data_t * p_data, int i_incr )
229 {
230     int i_x, i_y;
231
232     for( i_y = 0; i_y < 8; i_y++ )
233     {
234         for( i_x = 0; i_x < 8; i_x++ )
235         {
236             *p_data = p_vdec->pi_crop[*p_data + *p_block++];
237             p_data++;
238         }
239         p_data += i_incr;
240     }
241 }
242 #else
243 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
244                                           yuv_data_t * p_data, int i_incr )
245 {
246     asm __volatile__ (
247             "pxor       %%mm7,%%mm7\n\t"
248
249             "movq       (%0),%%mm1\n\t"
250             "movq       %%mm1,%%mm2\n\t"
251             "punpckhbw  %%mm7,%%mm1\n\t"
252             "punpcklbw  %%mm7,%%mm2\n\t"
253             "paddw      (%1),%%mm2\n\t"
254             "paddw      8(%1),%%mm1\n\t"
255             "packuswb   %%mm1,%%mm2\n\t"
256             "movq       %%mm2,(%0)\n\t"
257             "addl       %2,%0\n\t"
258
259             "movq       (%0),%%mm1\n\t"
260             "movq       %%mm1,%%mm2\n\t"
261             "punpckhbw  %%mm7,%%mm1\n\t"
262             "punpcklbw  %%mm7,%%mm2\n\t"
263             "paddw      16(%1),%%mm2\n\t"
264             "paddw      24(%1),%%mm1\n\t"
265             "packuswb   %%mm1,%%mm2\n\t"
266             "movq       %%mm2,(%0)\n\t"
267             "addl       %2,%0\n\t"
268
269             "movq       (%0),%%mm1\n\t"
270             "movq       %%mm1,%%mm2\n\t"
271             "punpckhbw  %%mm7,%%mm1\n\t"
272             "punpcklbw  %%mm7,%%mm2\n\t"
273             "paddw      32(%1),%%mm2\n\t"
274             "paddw      40(%1),%%mm1\n\t"
275             "packuswb   %%mm1,%%mm2\n\t"
276             "movq       %%mm2,(%0)\n\t"
277             "addl       %2,%0\n\t"
278
279             "movq       (%0),%%mm1\n\t"
280             "movq       %%mm1,%%mm2\n\t"
281             "punpckhbw  %%mm7,%%mm1\n\t"
282             "punpcklbw  %%mm7,%%mm2\n\t"
283             "paddw      48(%1),%%mm2\n\t"
284             "paddw      56(%1),%%mm1\n\t"
285             "packuswb   %%mm1,%%mm2\n\t"
286             "movq       %%mm2,(%0)\n\t"
287             "addl       %2,%0\n\t"
288
289             "movq       (%0),%%mm1\n\t"
290             "movq       %%mm1,%%mm2\n\t"
291             "punpckhbw  %%mm7,%%mm1\n\t"
292             "punpcklbw  %%mm7,%%mm2\n\t"
293             "paddw      64(%1),%%mm2\n\t"
294             "paddw      72(%1),%%mm1\n\t"
295             "packuswb   %%mm1,%%mm2\n\t"
296             "movq       %%mm2,(%0)\n\t"
297             "addl       %2,%0\n\t"
298
299             "movq       (%0),%%mm1\n\t"
300             "movq       %%mm1,%%mm2\n\t"
301             "punpckhbw  %%mm7,%%mm1\n\t"
302             "punpcklbw  %%mm7,%%mm2\n\t"
303             "paddw      80(%1),%%mm2\n\t"
304             "paddw      88(%1),%%mm1\n\t"
305             "packuswb   %%mm1,%%mm2\n\t"
306             "movq       %%mm2,(%0)\n\t"
307             "addl       %2,%0\n\t"
308
309             "movq       (%0),%%mm1\n\t"
310             "movq       %%mm1,%%mm2\n\t"
311             "punpckhbw  %%mm7,%%mm1\n\t"
312             "punpcklbw  %%mm7,%%mm2\n\t"
313             "paddw      96(%1),%%mm2\n\t"
314             "paddw      104(%1),%%mm1\n\t"
315             "packuswb   %%mm1,%%mm2\n\t"
316             "movq       %%mm2,(%0)\n\t"
317             "addl       %2,%0\n\t"
318
319             "movq       (%0),%%mm1\n\t"
320             "movq       %%mm1,%%mm2\n\t"
321             "punpckhbw  %%mm7,%%mm1\n\t"
322             "punpcklbw  %%mm7,%%mm2\n\t"
323             "paddw      112(%1),%%mm2\n\t"
324             "paddw      120(%1),%%mm1\n\t"
325             "packuswb   %%mm1,%%mm2\n\t"
326             "movq       %%mm2,(%0)\n\t"
327
328             //"emms"
329             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
330 }
331 #endif
332
333
334 /*****************************************************************************
335  * CopyBlock : copy a block
336  *****************************************************************************/
337 #ifndef HAVE_MMX
338 static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
339                                   yuv_data_t * p_data, int i_incr )
340 {
341     int i_x, i_y;
342
343     for( i_y = 0; i_y < 8; i_y++ )
344     {
345         for( i_x = 0; i_x < 8; i_x++ )
346         {
347             *p_data++ = p_vdec->pi_crop[*p_block++];
348         }
349         p_data += i_incr;
350     }
351 }
352 #else
353 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
354                                           yuv_data_t * p_data, int i_incr )
355 {
356     asm __volatile__ (
357             "movq         (%1),%%mm0\n\t"
358             "packuswb   8(%1),%%mm0\n\t"
359             "movq        %%mm0,(%0)\n\t"
360             "addl           %2,%0\n\t"
361
362             "movq        16(%1),%%mm0\n\t"
363             "packuswb   24(%1),%%mm0\n\t"
364             "movq        %%mm0,(%0)\n\t"
365             "addl           %2,%0\n\t"
366
367             "movq        32(%1),%%mm0\n\t"
368             "packuswb   40(%1),%%mm0\n\t"
369             "movq        %%mm0,(%0)\n\t"
370             "addl           %2,%0\n\t"
371
372             "movq        48(%1),%%mm0\n\t"
373             "packuswb   56(%1),%%mm0\n\t"
374             "movq        %%mm0,(%0)\n\t"
375             "addl           %2,%0\n\t"
376
377             "movq        64(%1),%%mm0\n\t"
378             "packuswb   72(%1),%%mm0\n\t"
379             "movq        %%mm0,(%0)\n\t"
380             "addl           %2,%0\n\t"
381
382             "movq        80(%1),%%mm0\n\t"
383             "packuswb   88(%1),%%mm0\n\t"
384             "movq        %%mm0,(%0)\n\t"
385             "addl           %2,%0\n\t"
386
387             "movq        96(%1),%%mm0\n\t"
388             "packuswb   104(%1),%%mm0\n\t"
389             "movq        %%mm0,(%0)\n\t"
390             "addl           %2,%0\n\t"
391
392             "movq        112(%1),%%mm0\n\t"
393             "packuswb   120(%1),%%mm0\n\t"
394             "movq        %%mm0,(%0)\n\t"
395             //"emms"
396             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
397 }
398 #endif
399
400
401 /*****************************************************************************
402  * vdec_DecodeMacroblock : decode a macroblock of a picture
403  *****************************************************************************/
404 #define DECODEBLOCKSC( OPBLOCK )                                        \
405 {                                                                       \
406     int             i_b, i_mask;                                        \
407                                                                         \
408     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
409                                                                         \
410     /* luminance */                                                     \
411     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
412     {                                                                   \
413         if( p_mb->i_coded_block_pattern & i_mask )                      \
414         {                                                               \
415             /*                                                          \
416              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
417              */                                                         \
418             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
419                                   p_mb->pi_sparse_pos[i_b] );           \
420                                                                         \
421             /*                                                          \
422              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
423              * section 7.6.8)                                           \
424              */                                                         \
425             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
426                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
427         }                                                               \
428     }                                                                   \
429                                                                         \
430     /* chrominance */                                                   \
431     for( i_b = 4; i_b < 4 + p_mb->i_chroma_nb_blocks;                   \
432          i_b++, i_mask >>= 1 )                                          \
433     {                                                                   \
434         if( p_mb->i_coded_block_pattern & i_mask )                      \
435         {                                                               \
436             /*                                                          \
437              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
438              */                                                         \
439             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
440                                   p_mb->pi_sparse_pos[i_b] );           \
441                                                                         \
442             /*                                                          \
443              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
444              * section 7.6.8)                                           \
445              */                                                         \
446             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
447                      p_mb->p_data[i_b], p_mb->i_addb_c_stride );        \
448         }                                                               \
449     }                                                                   \
450 }
451
452 #define DECODEBLOCKSBW( OPBLOCK )                                       \
453 {                                                                       \
454     int             i_b, i_mask;                                        \
455                                                                         \
456     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
457                                                                         \
458     /* luminance */                                                     \
459     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
460     {                                                                   \
461         if( p_mb->i_coded_block_pattern & i_mask )                      \
462         {                                                               \
463             /*                                                          \
464              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
465              */                                                         \
466             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
467                                   p_mb->pi_sparse_pos[i_b] );           \
468                                                                         \
469             /*                                                          \
470              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
471              * section 7.6.8)                                           \
472              */                                                         \
473             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
474                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
475         }                                                               \
476     }                                                                   \
477 }
478
479 void vdec_DecodeMacroblockC ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
480 {
481     if( !(p_mb->i_mb_type & MB_INTRA) )
482     {
483         /*
484          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
485          */
486         if( p_mb->pf_motion == 0 )
487         {
488             intf_ErrMsg( "vdec error: pf_motion set to NULL" );
489         }
490         else
491         {
492             p_mb->pf_motion( p_mb );
493         }
494
495         DECODEBLOCKSC( AddBlock )
496     }
497     else
498     {
499         DECODEBLOCKSC( CopyBlock )
500     }
501
502     /*
503      * Decoding is finished, release the macroblock and free
504      * unneeded memory.
505      */
506     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
507 }
508
509 void vdec_DecodeMacroblockBW ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
510 {
511     if( !(p_mb->i_mb_type & MB_INTRA) )
512     {
513         /*
514          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
515          */
516         if( p_mb->pf_motion == 0 )
517         {
518             intf_ErrMsg( "vdec error: pf_motion set to NULL" );
519         }
520         else
521         {
522             p_mb->pf_motion( p_mb );
523         }
524
525         DECODEBLOCKSBW( AddBlock )
526     }
527     else
528     {
529         DECODEBLOCKSBW( CopyBlock )
530     }
531
532     /*
533      * Decoding is finished, release the macroblock and free
534      * unneeded memory.
535      */
536     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
537 }
538
539
540
541 /*****************************************************************************
542  * RunThread: video decoder thread
543  *****************************************************************************
544  * Video decoder thread. This function does only return when the thread is
545  * terminated.
546  *****************************************************************************/
547 static void RunThread( vdec_thread_t *p_vdec )
548 {
549     intf_DbgMsg("vdec debug: running video decoder thread (%p) (pid == %i)",
550                 p_vdec, getpid());
551
552     /*
553      * Initialize thread and free configuration
554      */
555     p_vdec->b_error = vdec_InitThread( p_vdec );
556     if( p_vdec->b_error )
557     {
558         return;
559     }
560     p_vdec->b_run = 1;
561
562     /*
563      * Main loop - it is not executed if an error occured during
564      * initialization
565      */
566     while( (!p_vdec->b_die) && (!p_vdec->b_error) )
567     {
568         macroblock_t *          p_mb;
569
570         if( (p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo )) != NULL )
571         {
572             vdec_DecodeMacroblockC ( p_vdec, p_mb );
573         }
574     }
575
576     /*
577      * Error loop
578      */
579     if( p_vdec->b_error )
580     {
581         ErrorThread( p_vdec );
582     }
583
584     /* End of thread */
585     EndThread( p_vdec );
586     p_vdec->b_run = 0;
587 }