]> git.sesse.net Git - vlc/blob - src/video_decoder/video_decoder.c
* Added more stats
[vlc] / src / video_decoder / video_decoder.c
1 /*****************************************************************************
2  * video_decoder.c : video decoder thread
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  *
6  * Authors: Christophe Massiot <massiot@via.ecp.fr>
7  *          GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include "defs.h"
28
29 #include <stdlib.h>                                                /* free() */
30 #include <unistd.h>                                              /* getpid() */
31 #include <sys/types.h>                        /* on BSD, uio.h needs types.h */
32 #include <sys/uio.h>                                          /* for input.h */
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "plugins.h"
39
40 #include "intf_msg.h"
41
42 #include "input.h"
43 #include "decoder_fifo.h"
44 #include "video.h"
45 #include "video_output.h"
46
47 #include "vdec_idct.h"
48 #include "video_decoder.h"
49 #include "vdec_motion.h"
50
51 #include "vpar_blocks.h"
52 #include "vpar_headers.h"
53 #include "vpar_synchro.h"
54 #include "video_parser.h"
55 #include "video_fifo.h"
56
57 /*
58  * Local prototypes
59  */
60 #ifdef VDEC_SMP
61 static int      vdec_InitThread     ( vdec_thread_t *p_vdec );
62 #endif
63 static void     RunThread           ( vdec_thread_t *p_vdec );
64 static void     ErrorThread         ( vdec_thread_t *p_vdec );
65 static void     EndThread           ( vdec_thread_t *p_vdec );
66
67 /*****************************************************************************
68  * vdec_CreateThread: create a video decoder thread
69  *****************************************************************************
70  * This function creates a new video decoder thread, and returns a pointer
71  * to its description. On error, it returns NULL.
72  * Following configuration properties are used:
73  * XXX??
74  *****************************************************************************/
75 vdec_thread_t * vdec_CreateThread( vpar_thread_t *p_vpar /*, int *pi_status */ )
76 {
77     vdec_thread_t *     p_vdec;
78
79     intf_DbgMsg("vdec debug: creating video decoder thread\n");
80
81     /* Allocate the memory needed to store the thread's structure */
82     if ( (p_vdec = (vdec_thread_t *)malloc( sizeof(vdec_thread_t) )) == NULL )
83     {
84         intf_ErrMsg("vdec error: not enough memory for vdec_CreateThread() to create the new thread\n");
85         return( NULL );
86     }
87
88     /*
89      * Initialize the thread properties
90      */
91     p_vdec->b_die = 0;
92     p_vdec->b_error = 0;
93
94     /*
95      * Initialize the parser properties
96      */
97     p_vdec->p_vpar = p_vpar;
98
99     /* Spawn the video decoder thread */
100     if ( vlc_thread_create(&p_vdec->thread_id, "video decoder",
101          (vlc_thread_func_t)RunThread, (void *)p_vdec) )
102     {
103         intf_ErrMsg("vdec error: can't spawn video decoder thread\n");
104         free( p_vdec );
105         return( NULL );
106     }
107
108     intf_DbgMsg("vdec debug: video decoder thread (%p) created\n", p_vdec);
109     return( p_vdec );
110 }
111
112 /*****************************************************************************
113  * vdec_DestroyThread: destroy a video decoder thread
114  *****************************************************************************
115  * Destroy and terminate thread. This function will return 0 if the thread could
116  * be destroyed, and non 0 else. The last case probably means that the thread
117  * was still active, and another try may succeed.
118  *****************************************************************************/
119 void vdec_DestroyThread( vdec_thread_t *p_vdec /*, int *pi_status */ )
120 {
121     intf_DbgMsg("vdec debug: requesting termination of video decoder thread %p\n", p_vdec);
122
123     /* Ask thread to kill itself */
124     p_vdec->b_die = 1;
125
126 #ifdef VDEC_SMP
127     /* Make sure the decoder thread leaves the vpar_GetMacroblock() function */
128     vlc_mutex_lock( &(p_vdec->p_vpar->vfifo.lock) );
129     vlc_cond_signal( &(p_vdec->p_vpar->vfifo.wait) );
130     vlc_mutex_unlock( &(p_vdec->p_vpar->vfifo.lock) );
131 #endif
132
133     /* Waiting for the decoder thread to exit */
134     /* Remove this as soon as the "status" flag is implemented */
135     vlc_thread_join( p_vdec->thread_id );
136 }
137
138 /* following functions are local */
139
140 /*****************************************************************************
141  * vdec_InitThread: initialize video decoder thread
142  *****************************************************************************
143  * This function is called from RunThread and performs the second step of the
144  * initialization. It returns 0 on success. Note that the thread's flag are not
145  * modified inside this function.
146  *****************************************************************************/
147 #ifdef VDEC_SMP
148 static int vdec_InitThread( vdec_thread_t *p_vdec )
149 #else
150 int vdec_InitThread( vdec_thread_t *p_vdec )
151 #endif
152 {
153 #ifndef HAVE_MMX
154     int i_dummy;
155 #endif
156
157     intf_DbgMsg("vdec debug: initializing video decoder thread %p\n", p_vdec);
158
159 #ifndef HAVE_MMX
160     /* Init crop table */
161     p_vdec->pi_crop = p_vdec->pi_crop_buf + (VDEC_CROPRANGE >> 1);
162     for( i_dummy = -(VDEC_CROPRANGE >> 1); i_dummy < 0; i_dummy++ )
163     {
164         p_vdec->pi_crop[i_dummy] = 0;
165     }
166     for( ; i_dummy < 255; i_dummy ++ )
167     {
168         p_vdec->pi_crop[i_dummy] = i_dummy;
169     }
170     for( ; i_dummy < (VDEC_CROPRANGE >> 1) -1; i_dummy++ )
171     {
172         p_vdec->pi_crop[i_dummy] = 255;
173     }
174 #endif
175
176 #ifdef VDEC_SMP
177     /* Re-nice ourself */
178     if( nice(VDEC_NICE) == -1 )
179     {
180         intf_WarnMsg( 2, "vdec warning : couldn't nice() (%s)\n",
181                       strerror(errno) );
182     }
183 #endif
184
185     /* Mark thread as running and return */
186     intf_DbgMsg("vdec debug: InitThread(%p) succeeded\n", p_vdec);
187     return( 0 );
188 }
189
190 /*****************************************************************************
191  * ErrorThread: RunThread() error loop
192  *****************************************************************************
193  * This function is called when an error occured during thread main's loop. The
194  * thread can still receive feed, but must be ready to terminate as soon as
195  * possible.
196  *****************************************************************************/
197 static void ErrorThread( vdec_thread_t *p_vdec )
198 {
199     macroblock_t *       p_mb;
200
201     /* Wait until a `die' order */
202     while( !p_vdec->b_die )
203     {
204         p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo );
205         vpar_DestroyMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
206     }
207 }
208
209 /*****************************************************************************
210  * EndThread: thread destruction
211  *****************************************************************************
212  * This function is called when the thread ends after a sucessful
213  * initialization.
214  *****************************************************************************/
215 static void EndThread( vdec_thread_t *p_vdec )
216 {
217     intf_DbgMsg("vdec debug: EndThread(%p)\n", p_vdec);
218 }
219
220 /*****************************************************************************
221  * AddBlock : add a block
222  *****************************************************************************/
223 #ifndef HAVE_MMX
224 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
225                                  yuv_data_t * p_data, int i_incr )
226 {
227     int i_x, i_y;
228
229     for( i_y = 0; i_y < 8; i_y++ )
230     {
231         for( i_x = 0; i_x < 8; i_x++ )
232         {
233             *p_data = p_vdec->pi_crop[*p_data + *p_block++];
234             p_data++;
235         }
236         p_data += i_incr;
237     }
238 }
239 #else
240 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
241                                           yuv_data_t * p_data, int i_incr )
242 {
243     asm __volatile__ (
244             "pxor       %%mm7,%%mm7\n\t"
245
246             "movq       (%0),%%mm1\n\t"
247             "movq       %%mm1,%%mm2\n\t"
248             "punpckhbw  %%mm7,%%mm1\n\t"
249             "punpcklbw  %%mm7,%%mm2\n\t"
250             "paddw      (%1),%%mm2\n\t"
251             "paddw      8(%1),%%mm1\n\t"
252             "packuswb   %%mm1,%%mm2\n\t"
253             "movq       %%mm2,(%0)\n\t"
254             "addl       %2,%0\n\t"
255
256             "movq       (%0),%%mm1\n\t"
257             "movq       %%mm1,%%mm2\n\t"
258             "punpckhbw  %%mm7,%%mm1\n\t"
259             "punpcklbw  %%mm7,%%mm2\n\t"
260             "paddw      16(%1),%%mm2\n\t"
261             "paddw      24(%1),%%mm1\n\t"
262             "packuswb   %%mm1,%%mm2\n\t"
263             "movq       %%mm2,(%0)\n\t"
264             "addl       %2,%0\n\t"
265
266             "movq       (%0),%%mm1\n\t"
267             "movq       %%mm1,%%mm2\n\t"
268             "punpckhbw  %%mm7,%%mm1\n\t"
269             "punpcklbw  %%mm7,%%mm2\n\t"
270             "paddw      32(%1),%%mm2\n\t"
271             "paddw      40(%1),%%mm1\n\t"
272             "packuswb   %%mm1,%%mm2\n\t"
273             "movq       %%mm2,(%0)\n\t"
274             "addl       %2,%0\n\t"
275
276             "movq       (%0),%%mm1\n\t"
277             "movq       %%mm1,%%mm2\n\t"
278             "punpckhbw  %%mm7,%%mm1\n\t"
279             "punpcklbw  %%mm7,%%mm2\n\t"
280             "paddw      48(%1),%%mm2\n\t"
281             "paddw      56(%1),%%mm1\n\t"
282             "packuswb   %%mm1,%%mm2\n\t"
283             "movq       %%mm2,(%0)\n\t"
284             "addl       %2,%0\n\t"
285
286             "movq       (%0),%%mm1\n\t"
287             "movq       %%mm1,%%mm2\n\t"
288             "punpckhbw  %%mm7,%%mm1\n\t"
289             "punpcklbw  %%mm7,%%mm2\n\t"
290             "paddw      64(%1),%%mm2\n\t"
291             "paddw      72(%1),%%mm1\n\t"
292             "packuswb   %%mm1,%%mm2\n\t"
293             "movq       %%mm2,(%0)\n\t"
294             "addl       %2,%0\n\t"
295
296             "movq       (%0),%%mm1\n\t"
297             "movq       %%mm1,%%mm2\n\t"
298             "punpckhbw  %%mm7,%%mm1\n\t"
299             "punpcklbw  %%mm7,%%mm2\n\t"
300             "paddw      80(%1),%%mm2\n\t"
301             "paddw      88(%1),%%mm1\n\t"
302             "packuswb   %%mm1,%%mm2\n\t"
303             "movq       %%mm2,(%0)\n\t"
304             "addl       %2,%0\n\t"
305
306             "movq       (%0),%%mm1\n\t"
307             "movq       %%mm1,%%mm2\n\t"
308             "punpckhbw  %%mm7,%%mm1\n\t"
309             "punpcklbw  %%mm7,%%mm2\n\t"
310             "paddw      96(%1),%%mm2\n\t"
311             "paddw      104(%1),%%mm1\n\t"
312             "packuswb   %%mm1,%%mm2\n\t"
313             "movq       %%mm2,(%0)\n\t"
314             "addl       %2,%0\n\t"
315
316             "movq       (%0),%%mm1\n\t"
317             "movq       %%mm1,%%mm2\n\t"
318             "punpckhbw  %%mm7,%%mm1\n\t"
319             "punpcklbw  %%mm7,%%mm2\n\t"
320             "paddw      112(%1),%%mm2\n\t"
321             "paddw      120(%1),%%mm1\n\t"
322             "packuswb   %%mm1,%%mm2\n\t"
323             "movq       %%mm2,(%0)\n\t"
324
325             //"emms"
326             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
327 }
328 #endif
329
330
331 /*****************************************************************************
332  * CopyBlock : copy a block
333  *****************************************************************************/
334 #ifndef HAVE_MMX
335 static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
336                                   yuv_data_t * p_data, int i_incr )
337 {
338     int i_x, i_y;
339
340     for( i_y = 0; i_y < 8; i_y++ )
341     {
342         for( i_x = 0; i_x < 8; i_x++ )
343         {
344             *p_data++ = p_vdec->pi_crop[*p_block++];
345         }
346         p_data += i_incr;
347     }
348 }
349 #else
350 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
351                                           yuv_data_t * p_data, int i_incr )
352 {
353     asm __volatile__ (
354             "movq         (%1),%%mm0\n\t"
355             "packuswb   8(%1),%%mm0\n\t"
356             "movq        %%mm0,(%0)\n\t"
357             "addl           %2,%0\n\t"
358
359             "movq        16(%1),%%mm0\n\t"
360             "packuswb   24(%1),%%mm0\n\t"
361             "movq        %%mm0,(%0)\n\t"
362             "addl           %2,%0\n\t"
363
364             "movq        32(%1),%%mm0\n\t"
365             "packuswb   40(%1),%%mm0\n\t"
366             "movq        %%mm0,(%0)\n\t"
367             "addl           %2,%0\n\t"
368
369             "movq        48(%1),%%mm0\n\t"
370             "packuswb   56(%1),%%mm0\n\t"
371             "movq        %%mm0,(%0)\n\t"
372             "addl           %2,%0\n\t"
373
374             "movq        64(%1),%%mm0\n\t"
375             "packuswb   72(%1),%%mm0\n\t"
376             "movq        %%mm0,(%0)\n\t"
377             "addl           %2,%0\n\t"
378
379             "movq        80(%1),%%mm0\n\t"
380             "packuswb   88(%1),%%mm0\n\t"
381             "movq        %%mm0,(%0)\n\t"
382             "addl           %2,%0\n\t"
383
384             "movq        96(%1),%%mm0\n\t"
385             "packuswb   104(%1),%%mm0\n\t"
386             "movq        %%mm0,(%0)\n\t"
387             "addl           %2,%0\n\t"
388
389             "movq        112(%1),%%mm0\n\t"
390             "packuswb   120(%1),%%mm0\n\t"
391             "movq        %%mm0,(%0)\n\t"
392             //"emms"
393             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
394 }
395 #endif
396
397
398 /*****************************************************************************
399  * vdec_DecodeMacroblock : decode a macroblock of a picture
400  *****************************************************************************/
401 #define DECODEBLOCKSC( OPBLOCK )                                        \
402 {                                                                       \
403     int             i_b, i_mask;                                        \
404                                                                         \
405     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
406                                                                         \
407     /* luminance */                                                     \
408     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
409     {                                                                   \
410         if( p_mb->i_coded_block_pattern & i_mask )                      \
411         {                                                               \
412             /*                                                          \
413              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
414              */                                                         \
415             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
416                                   p_mb->pi_sparse_pos[i_b] );           \
417                                                                         \
418             /*                                                          \
419              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
420              * section 7.6.8)                                           \
421              */                                                         \
422             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
423                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
424         }                                                               \
425     }                                                                   \
426                                                                         \
427     /* chrominance */                                                   \
428     for( i_b = 4; i_b < 4 + p_mb->i_chroma_nb_blocks;                   \
429          i_b++, i_mask >>= 1 )                                          \
430     {                                                                   \
431         if( p_mb->i_coded_block_pattern & i_mask )                      \
432         {                                                               \
433             /*                                                          \
434              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
435              */                                                         \
436             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
437                                   p_mb->pi_sparse_pos[i_b] );           \
438                                                                         \
439             /*                                                          \
440              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
441              * section 7.6.8)                                           \
442              */                                                         \
443             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
444                      p_mb->p_data[i_b], p_mb->i_addb_c_stride );        \
445         }                                                               \
446     }                                                                   \
447 }
448
449 #define DECODEBLOCKSBW( OPBLOCK )                                       \
450 {                                                                       \
451     int             i_b, i_mask;                                        \
452                                                                         \
453     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
454                                                                         \
455     /* luminance */                                                     \
456     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
457     {                                                                   \
458         if( p_mb->i_coded_block_pattern & i_mask )                      \
459         {                                                               \
460             /*                                                          \
461              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
462              */                                                         \
463             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
464                                   p_mb->pi_sparse_pos[i_b] );           \
465                                                                         \
466             /*                                                          \
467              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
468              * section 7.6.8)                                           \
469              */                                                         \
470             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
471                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
472         }                                                               \
473     }                                                                   \
474 }
475
476 void vdec_DecodeMacroblockC ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
477 {
478     if( !(p_mb->i_mb_type & MB_INTRA) )
479     {
480         /*
481          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
482          */
483         if( p_mb->pf_motion == 0 )
484         {
485             intf_ErrMsg( "vdec error: pf_motion set to NULL\n" );
486         }
487         else
488         {
489             p_mb->pf_motion( p_mb );
490         }
491
492         DECODEBLOCKSC( AddBlock )
493     }
494     else
495     {
496         DECODEBLOCKSC( CopyBlock )
497     }
498
499     /*
500      * Decoding is finished, release the macroblock and free
501      * unneeded memory.
502      */
503     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
504 }
505
506 void vdec_DecodeMacroblockBW ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
507 {
508     if( !(p_mb->i_mb_type & MB_INTRA) )
509     {
510         /*
511          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
512          */
513         if( p_mb->pf_motion == 0 )
514         {
515             intf_ErrMsg( "vdec error: pf_motion set to NULL\n" );
516         }
517         else
518         {
519             p_mb->pf_motion( p_mb );
520         }
521
522         DECODEBLOCKSBW( AddBlock )
523     }
524     else
525     {
526         DECODEBLOCKSBW( CopyBlock )
527     }
528
529     /*
530      * Decoding is finished, release the macroblock and free
531      * unneeded memory.
532      */
533     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
534 }
535
536
537
538 /*****************************************************************************
539  * RunThread: video decoder thread
540  *****************************************************************************
541  * Video decoder thread. This function does only return when the thread is
542  * terminated.
543  *****************************************************************************/
544 static void RunThread( vdec_thread_t *p_vdec )
545 {
546     intf_DbgMsg("vdec debug: running video decoder thread (%p) (pid == %i)\n",
547                 p_vdec, getpid());
548
549     /*
550      * Initialize thread and free configuration
551      */
552     p_vdec->b_error = vdec_InitThread( p_vdec );
553     if( p_vdec->b_error )
554     {
555         return;
556     }
557     p_vdec->b_run = 1;
558
559     /*
560      * Main loop - it is not executed if an error occured during
561      * initialization
562      */
563     while( (!p_vdec->b_die) && (!p_vdec->b_error) )
564     {
565         macroblock_t *          p_mb;
566
567         if( (p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo )) != NULL )
568         {
569             p_vdec->p_vpar->p_vout->vdec_DecodeMacroblock ( p_vdec, p_mb );
570         }
571     }
572
573     /*
574      * Error loop
575      */
576     if( p_vdec->b_error )
577     {
578         ErrorThread( p_vdec );
579     }
580
581     /* End of thread */
582     EndThread( p_vdec );
583     p_vdec->b_run = 0;
584 }