]> git.sesse.net Git - vlc/blob - src/video_decoder/video_decoder.c
* vlc is now compiled without the STATS mode by default.
[vlc] / src / video_decoder / video_decoder.c
1 /*****************************************************************************
2  * video_decoder.c : video decoder thread
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  *
6  * Authors: Christophe Massiot <massiot@via.ecp.fr>
7  *          GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include "defs.h"
28
29 #include <stdlib.h>                                                /* free() */
30 #include <unistd.h>                                              /* getpid() */
31 #include <sys/types.h>                        /* on BSD, uio.h needs types.h */
32 #include <sys/uio.h>                                          /* for input.h */
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "plugins.h"
39
40 #include "intf_msg.h"
41
42 #include "input.h"
43 #include "decoder_fifo.h"
44 #include "video.h"
45 #include "video_output.h"
46
47 #include "vdec_idct.h"
48 #include "video_decoder.h"
49 #include "vdec_motion.h"
50
51 #include "vpar_blocks.h"
52 #include "vpar_headers.h"
53 #include "vpar_synchro.h"
54 #include "video_parser.h"
55 #include "video_fifo.h"
56
57 /*
58  * Local prototypes
59  */
60 #ifdef VDEC_SMP
61 static int      vdec_InitThread     ( vdec_thread_t *p_vdec );
62 #endif
63 static void     RunThread           ( vdec_thread_t *p_vdec );
64 static void     ErrorThread         ( vdec_thread_t *p_vdec );
65 static void     EndThread           ( vdec_thread_t *p_vdec );
66
67 /*****************************************************************************
68  * vdec_CreateThread: create a video decoder thread
69  *****************************************************************************
70  * This function creates a new video decoder thread, and returns a pointer
71  * to its description. On error, it returns NULL.
72  * Following configuration properties are used:
73  * XXX??
74  *****************************************************************************/
75 vdec_thread_t * vdec_CreateThread( vpar_thread_t *p_vpar /*, int *pi_status */ )
76 {
77     vdec_thread_t *     p_vdec;
78
79     intf_DbgMsg("vdec debug: creating video decoder thread\n");
80
81     /* Allocate the memory needed to store the thread's structure */
82     if ( (p_vdec = (vdec_thread_t *)malloc( sizeof(vdec_thread_t) )) == NULL )
83     {
84         intf_ErrMsg("vdec error: not enough memory for vdec_CreateThread() to create the new thread\n");
85         return( NULL );
86     }
87
88     /*
89      * Initialize the thread properties
90      */
91     p_vdec->b_die = 0;
92     p_vdec->b_error = 0;
93
94     /*
95      * Initialize the parser properties
96      */
97     p_vdec->p_vpar = p_vpar;
98
99     /* Spawn the video decoder thread */
100     if ( vlc_thread_create(&p_vdec->thread_id, "video decoder",
101          (vlc_thread_func_t)RunThread, (void *)p_vdec) )
102     {
103         intf_ErrMsg("vdec error: can't spawn video decoder thread\n");
104         free( p_vdec );
105         return( NULL );
106     }
107
108     intf_DbgMsg("vdec debug: video decoder thread (%p) created\n", p_vdec);
109     return( p_vdec );
110 }
111
112 /*****************************************************************************
113  * vdec_DestroyThread: destroy a video decoder thread
114  *****************************************************************************
115  * Destroy and terminate thread. This function will return 0 if the thread could
116  * be destroyed, and non 0 else. The last case probably means that the thread
117  * was still active, and another try may succeed.
118  *****************************************************************************/
119 void vdec_DestroyThread( vdec_thread_t *p_vdec /*, int *pi_status */ )
120 {
121     intf_DbgMsg("vdec debug: requesting termination of video decoder thread %p\n", p_vdec);
122
123     /* Ask thread to kill itself */
124     p_vdec->b_die = 1;
125
126 #ifdef VDEC_SMP
127     /* Make sure the decoder thread leaves the vpar_GetMacroblock() function */
128     vlc_mutex_lock( &(p_vdec->p_vpar->vfifo.lock) );
129     vlc_cond_signal( &(p_vdec->p_vpar->vfifo.wait) );
130     vlc_mutex_unlock( &(p_vdec->p_vpar->vfifo.lock) );
131 #endif
132
133     /* Waiting for the decoder thread to exit */
134     /* Remove this as soon as the "status" flag is implemented */
135     vlc_thread_join( p_vdec->thread_id );
136 }
137
138 /* following functions are local */
139
140 /*****************************************************************************
141  * vdec_InitThread: initialize video decoder thread
142  *****************************************************************************
143  * This function is called from RunThread and performs the second step of the
144  * initialization. It returns 0 on success. Note that the thread's flag are not
145  * modified inside this function.
146  *****************************************************************************/
147 #ifdef VDEC_SMP
148 static int vdec_InitThread( vdec_thread_t *p_vdec )
149 #else
150 int vdec_InitThread( vdec_thread_t *p_vdec )
151 #endif
152 {
153 #ifndef HAVE_MMX
154     int i_dummy;
155 #endif
156
157     intf_DbgMsg("vdec debug: initializing video decoder thread %p\n", p_vdec);
158
159     /* Initialize other properties */
160 #ifdef STATS
161     p_vdec->c_loops = 0;
162     p_vdec->c_idle_loops = 0;
163     p_vdec->c_decoded_pictures = 0;
164     p_vdec->c_decoded_i_pictures = 0;
165     p_vdec->c_decoded_p_pictures = 0;
166     p_vdec->c_decoded_b_pictures = 0;
167 #endif
168
169 #ifndef HAVE_MMX
170     /* Init crop table */
171     p_vdec->pi_crop = p_vdec->pi_crop_buf + (VDEC_CROPRANGE >> 1);
172     for( i_dummy = -(VDEC_CROPRANGE >> 1); i_dummy < 0; i_dummy++ )
173     {
174         p_vdec->pi_crop[i_dummy] = 0;
175     }
176     for( ; i_dummy < 255; i_dummy ++ )
177     {
178         p_vdec->pi_crop[i_dummy] = i_dummy;
179     }
180     for( ; i_dummy < (VDEC_CROPRANGE >> 1) -1; i_dummy++ )
181     {
182         p_vdec->pi_crop[i_dummy] = 255;
183     }
184 #endif
185
186 #ifdef VDEC_SMP
187     /* Re-nice ourself */
188     if( nice(VDEC_NICE) == -1 )
189     {
190         intf_WarnMsg( 2, "vdec warning : couldn't nice() (%s)\n",
191                       strerror(errno) );
192     }
193 #endif
194
195     /* Mark thread as running and return */
196     intf_DbgMsg("vdec debug: InitThread(%p) succeeded\n", p_vdec);
197     return( 0 );
198 }
199
200 /*****************************************************************************
201  * ErrorThread: RunThread() error loop
202  *****************************************************************************
203  * This function is called when an error occured during thread main's loop. The
204  * thread can still receive feed, but must be ready to terminate as soon as
205  * possible.
206  *****************************************************************************/
207 static void ErrorThread( vdec_thread_t *p_vdec )
208 {
209     macroblock_t *       p_mb;
210
211     /* Wait until a `die' order */
212     while( !p_vdec->b_die )
213     {
214         p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo );
215         vpar_DestroyMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
216     }
217 }
218
219 /*****************************************************************************
220  * EndThread: thread destruction
221  *****************************************************************************
222  * This function is called when the thread ends after a sucessful
223  * initialization.
224  *****************************************************************************/
225 static void EndThread( vdec_thread_t *p_vdec )
226 {
227     intf_DbgMsg("vdec debug: EndThread(%p)\n", p_vdec);
228 }
229
230 /*****************************************************************************
231  * AddBlock : add a block
232  *****************************************************************************/
233 #ifndef HAVE_MMX
234 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
235                                  yuv_data_t * p_data, int i_incr )
236 {
237     int i_x, i_y;
238
239     for( i_y = 0; i_y < 8; i_y++ )
240     {
241         for( i_x = 0; i_x < 8; i_x++ )
242         {
243             *p_data = p_vdec->pi_crop[*p_data + *p_block++];
244             p_data++;
245         }
246         p_data += i_incr;
247     }
248 }
249 #else
250 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
251                                           yuv_data_t * p_data, int i_incr )
252 {
253     asm __volatile__ (
254             "pxor       %%mm7,%%mm7\n\t"
255
256             "movq       (%0),%%mm1\n\t"
257             "movq       %%mm1,%%mm2\n\t"
258             "punpckhbw  %%mm7,%%mm1\n\t"
259             "punpcklbw  %%mm7,%%mm2\n\t"
260             "paddw      (%1),%%mm2\n\t"
261             "paddw      8(%1),%%mm1\n\t"
262             "packuswb   %%mm1,%%mm2\n\t"
263             "movq       %%mm2,(%0)\n\t"
264             "addl       %2,%0\n\t"
265
266             "movq       (%0),%%mm1\n\t"
267             "movq       %%mm1,%%mm2\n\t"
268             "punpckhbw  %%mm7,%%mm1\n\t"
269             "punpcklbw  %%mm7,%%mm2\n\t"
270             "paddw      16(%1),%%mm2\n\t"
271             "paddw      24(%1),%%mm1\n\t"
272             "packuswb   %%mm1,%%mm2\n\t"
273             "movq       %%mm2,(%0)\n\t"
274             "addl       %2,%0\n\t"
275
276             "movq       (%0),%%mm1\n\t"
277             "movq       %%mm1,%%mm2\n\t"
278             "punpckhbw  %%mm7,%%mm1\n\t"
279             "punpcklbw  %%mm7,%%mm2\n\t"
280             "paddw      32(%1),%%mm2\n\t"
281             "paddw      40(%1),%%mm1\n\t"
282             "packuswb   %%mm1,%%mm2\n\t"
283             "movq       %%mm2,(%0)\n\t"
284             "addl       %2,%0\n\t"
285
286             "movq       (%0),%%mm1\n\t"
287             "movq       %%mm1,%%mm2\n\t"
288             "punpckhbw  %%mm7,%%mm1\n\t"
289             "punpcklbw  %%mm7,%%mm2\n\t"
290             "paddw      48(%1),%%mm2\n\t"
291             "paddw      56(%1),%%mm1\n\t"
292             "packuswb   %%mm1,%%mm2\n\t"
293             "movq       %%mm2,(%0)\n\t"
294             "addl       %2,%0\n\t"
295
296             "movq       (%0),%%mm1\n\t"
297             "movq       %%mm1,%%mm2\n\t"
298             "punpckhbw  %%mm7,%%mm1\n\t"
299             "punpcklbw  %%mm7,%%mm2\n\t"
300             "paddw      64(%1),%%mm2\n\t"
301             "paddw      72(%1),%%mm1\n\t"
302             "packuswb   %%mm1,%%mm2\n\t"
303             "movq       %%mm2,(%0)\n\t"
304             "addl       %2,%0\n\t"
305
306             "movq       (%0),%%mm1\n\t"
307             "movq       %%mm1,%%mm2\n\t"
308             "punpckhbw  %%mm7,%%mm1\n\t"
309             "punpcklbw  %%mm7,%%mm2\n\t"
310             "paddw      80(%1),%%mm2\n\t"
311             "paddw      88(%1),%%mm1\n\t"
312             "packuswb   %%mm1,%%mm2\n\t"
313             "movq       %%mm2,(%0)\n\t"
314             "addl       %2,%0\n\t"
315
316             "movq       (%0),%%mm1\n\t"
317             "movq       %%mm1,%%mm2\n\t"
318             "punpckhbw  %%mm7,%%mm1\n\t"
319             "punpcklbw  %%mm7,%%mm2\n\t"
320             "paddw      96(%1),%%mm2\n\t"
321             "paddw      104(%1),%%mm1\n\t"
322             "packuswb   %%mm1,%%mm2\n\t"
323             "movq       %%mm2,(%0)\n\t"
324             "addl       %2,%0\n\t"
325
326             "movq       (%0),%%mm1\n\t"
327             "movq       %%mm1,%%mm2\n\t"
328             "punpckhbw  %%mm7,%%mm1\n\t"
329             "punpcklbw  %%mm7,%%mm2\n\t"
330             "paddw      112(%1),%%mm2\n\t"
331             "paddw      120(%1),%%mm1\n\t"
332             "packuswb   %%mm1,%%mm2\n\t"
333             "movq       %%mm2,(%0)\n\t"
334
335             //"emms"
336             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
337 }
338 #endif
339
340
341 /*****************************************************************************
342  * CopyBlock : copy a block
343  *****************************************************************************/
344 #ifndef HAVE_MMX
345 static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
346                                   yuv_data_t * p_data, int i_incr )
347 {
348     int i_x, i_y;
349
350     for( i_y = 0; i_y < 8; i_y++ )
351     {
352         for( i_x = 0; i_x < 8; i_x++ )
353         {
354             *p_data++ = p_vdec->pi_crop[*p_block++];
355         }
356         p_data += i_incr;
357     }
358 }
359 #else
360 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
361                                           yuv_data_t * p_data, int i_incr )
362 {
363     asm __volatile__ (
364             "movq         (%1),%%mm0\n\t"
365             "packuswb   8(%1),%%mm0\n\t"
366             "movq        %%mm0,(%0)\n\t"
367             "addl           %2,%0\n\t"
368
369             "movq        16(%1),%%mm0\n\t"
370             "packuswb   24(%1),%%mm0\n\t"
371             "movq        %%mm0,(%0)\n\t"
372             "addl           %2,%0\n\t"
373
374             "movq        32(%1),%%mm0\n\t"
375             "packuswb   40(%1),%%mm0\n\t"
376             "movq        %%mm0,(%0)\n\t"
377             "addl           %2,%0\n\t"
378
379             "movq        48(%1),%%mm0\n\t"
380             "packuswb   56(%1),%%mm0\n\t"
381             "movq        %%mm0,(%0)\n\t"
382             "addl           %2,%0\n\t"
383
384             "movq        64(%1),%%mm0\n\t"
385             "packuswb   72(%1),%%mm0\n\t"
386             "movq        %%mm0,(%0)\n\t"
387             "addl           %2,%0\n\t"
388
389             "movq        80(%1),%%mm0\n\t"
390             "packuswb   88(%1),%%mm0\n\t"
391             "movq        %%mm0,(%0)\n\t"
392             "addl           %2,%0\n\t"
393
394             "movq        96(%1),%%mm0\n\t"
395             "packuswb   104(%1),%%mm0\n\t"
396             "movq        %%mm0,(%0)\n\t"
397             "addl           %2,%0\n\t"
398
399             "movq        112(%1),%%mm0\n\t"
400             "packuswb   120(%1),%%mm0\n\t"
401             "movq        %%mm0,(%0)\n\t"
402             //"emms"
403             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
404 }
405 #endif
406
407
408 /*****************************************************************************
409  * vdec_DecodeMacroblock : decode a macroblock of a picture
410  *****************************************************************************/
411 #define DECODEBLOCKSC( OPBLOCK )                                        \
412 {                                                                       \
413     int             i_b, i_mask;                                        \
414                                                                         \
415     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
416                                                                         \
417     /* luminance */                                                     \
418     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
419     {                                                                   \
420         if( p_mb->i_coded_block_pattern & i_mask )                      \
421         {                                                               \
422             /*                                                          \
423              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
424              */                                                         \
425             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
426                                   p_mb->pi_sparse_pos[i_b] );           \
427                                                                         \
428             /*                                                          \
429              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
430              * section 7.6.8)                                           \
431              */                                                         \
432             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
433                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
434         }                                                               \
435     }                                                                   \
436                                                                         \
437     /* chrominance */                                                   \
438     for( i_b = 4; i_b < 4 + p_mb->i_chroma_nb_blocks;                   \
439          i_b++, i_mask >>= 1 )                                          \
440     {                                                                   \
441         if( p_mb->i_coded_block_pattern & i_mask )                      \
442         {                                                               \
443             /*                                                          \
444              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
445              */                                                         \
446             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
447                                   p_mb->pi_sparse_pos[i_b] );           \
448                                                                         \
449             /*                                                          \
450              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
451              * section 7.6.8)                                           \
452              */                                                         \
453             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
454                      p_mb->p_data[i_b], p_mb->i_addb_c_stride );        \
455         }                                                               \
456     }                                                                   \
457 }
458
459 #define DECODEBLOCKSBW( OPBLOCK )                                       \
460 {                                                                       \
461     int             i_b, i_mask;                                        \
462                                                                         \
463     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
464                                                                         \
465     /* luminance */                                                     \
466     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
467     {                                                                   \
468         if( p_mb->i_coded_block_pattern & i_mask )                      \
469         {                                                               \
470             /*                                                          \
471              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
472              */                                                         \
473             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
474                                   p_mb->pi_sparse_pos[i_b] );           \
475                                                                         \
476             /*                                                          \
477              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
478              * section 7.6.8)                                           \
479              */                                                         \
480             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
481                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
482         }                                                               \
483     }                                                                   \
484 }
485
486 void vdec_DecodeMacroblockC ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
487 {
488     if( !(p_mb->i_mb_type & MB_INTRA) )
489     {
490         /*
491          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
492          */
493         if( p_mb->pf_motion == 0 )
494         {
495             intf_ErrMsg( "vdec error: pf_motion set to NULL\n" );
496         }
497         else
498         {
499             p_mb->pf_motion( p_mb );
500         }
501
502         DECODEBLOCKSC( AddBlock )
503     }
504     else
505     {
506         DECODEBLOCKSC( CopyBlock )
507     }
508
509     /*
510      * Decoding is finished, release the macroblock and free
511      * unneeded memory.
512      */
513     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
514 }
515
516 void vdec_DecodeMacroblockBW ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
517 {
518     if( !(p_mb->i_mb_type & MB_INTRA) )
519     {
520         /*
521          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
522          */
523         if( p_mb->pf_motion == 0 )
524         {
525             intf_ErrMsg( "vdec error: pf_motion set to NULL\n" );
526         }
527         else
528         {
529             p_mb->pf_motion( p_mb );
530         }
531
532         DECODEBLOCKSBW( AddBlock )
533     }
534     else
535     {
536         DECODEBLOCKSBW( CopyBlock )
537     }
538
539     /*
540      * Decoding is finished, release the macroblock and free
541      * unneeded memory.
542      */
543     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
544 }
545
546
547
548 /*****************************************************************************
549  * RunThread: video decoder thread
550  *****************************************************************************
551  * Video decoder thread. This function does only return when the thread is
552  * terminated.
553  *****************************************************************************/
554 static void RunThread( vdec_thread_t *p_vdec )
555 {
556     intf_DbgMsg("vdec debug: running video decoder thread (%p) (pid == %i)\n",
557                 p_vdec, getpid());
558
559     /*
560      * Initialize thread and free configuration
561      */
562     p_vdec->b_error = vdec_InitThread( p_vdec );
563     if( p_vdec->b_error )
564     {
565         return;
566     }
567     p_vdec->b_run = 1;
568
569     /*
570      * Main loop - it is not executed if an error occured during
571      * initialization
572      */
573     while( (!p_vdec->b_die) && (!p_vdec->b_error) )
574     {
575         macroblock_t *          p_mb;
576
577         if( (p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo )) != NULL )
578         {
579             p_vdec->p_vpar->p_vout->vdec_DecodeMacroblock ( p_vdec, p_mb );
580         }
581     }
582
583     /*
584      * Error loop
585      */
586     if( p_vdec->b_error )
587     {
588         ErrorThread( p_vdec );
589     }
590
591     /* End of thread */
592     EndThread( p_vdec );
593     p_vdec->b_run = 0;
594 }