]> git.sesse.net Git - vlc/blob - src/video_decoder/video_decoder.c
2813d2aaeb8689f61a81adde48bcb07dc1c83bb3
[vlc] / src / video_decoder / video_decoder.c
1 /*****************************************************************************
2  * video_decoder.c : video decoder thread
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: video_decoder.c,v 1.44 2001/01/05 18:46:44 massiot Exp $
6  *
7  * Authors: Christophe Massiot <massiot@via.ecp.fr>
8  *          GaĆ«l Hendryckx <jimmy@via.ecp.fr>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  * 
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #include "defs.h"
29
30 #include <stdlib.h>                                                /* free() */
31 #include <unistd.h>                                              /* getpid() */
32 #include <errno.h>                                                  /* errno */
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "plugins.h"
39
40 #include "intf_msg.h"
41
42 #include "stream_control.h"
43 #include "input_ext-dec.h"
44
45 #include "video.h"
46 #include "video_output.h"
47
48 #include "vdec_idct.h"
49 #include "video_decoder.h"
50 #include "vdec_motion.h"
51
52 #include "vpar_blocks.h"
53 #include "vpar_headers.h"
54 #include "vpar_synchro.h"
55 #include "video_parser.h"
56 #include "video_fifo.h"
57
58 /*
59  * Local prototypes
60  */
61 #ifdef VDEC_SMP
62 static int      vdec_InitThread     ( vdec_thread_t *p_vdec );
63 #endif
64 static void     RunThread           ( vdec_thread_t *p_vdec );
65 static void     ErrorThread         ( vdec_thread_t *p_vdec );
66 static void     EndThread           ( vdec_thread_t *p_vdec );
67
68 /*****************************************************************************
69  * vdec_CreateThread: create a video decoder thread
70  *****************************************************************************
71  * This function creates a new video decoder thread, and returns a pointer
72  * to its description. On error, it returns NULL.
73  * Following configuration properties are used:
74  * XXX??
75  *****************************************************************************/
76 vdec_thread_t * vdec_CreateThread( vpar_thread_t *p_vpar /*, int *pi_status */ )
77 {
78     vdec_thread_t *     p_vdec;
79
80     intf_DbgMsg("vdec debug: creating video decoder thread");
81
82     /* Allocate the memory needed to store the thread's structure */
83     if ( (p_vdec = (vdec_thread_t *)malloc( sizeof(vdec_thread_t) )) == NULL )
84     {
85         intf_ErrMsg("vdec error: not enough memory for vdec_CreateThread() to create the new thread");
86         return( NULL );
87     }
88
89     /*
90      * Initialize the thread properties
91      */
92     p_vdec->b_die = 0;
93     p_vdec->b_error = 0;
94
95     /*
96      * Initialize the parser properties
97      */
98     p_vdec->p_vpar = p_vpar;
99
100     /* Spawn the video decoder thread */
101     if ( vlc_thread_create(&p_vdec->thread_id, "video decoder",
102          (vlc_thread_func_t)RunThread, (void *)p_vdec) )
103     {
104         intf_ErrMsg("vdec error: can't spawn video decoder thread");
105         free( p_vdec );
106         return( NULL );
107     }
108
109     intf_DbgMsg("vdec debug: video decoder thread (%p) created", p_vdec);
110     return( p_vdec );
111 }
112
113 /*****************************************************************************
114  * vdec_DestroyThread: destroy a video decoder thread
115  *****************************************************************************
116  * Destroy and terminate thread. This function will return 0 if the thread could
117  * be destroyed, and non 0 else. The last case probably means that the thread
118  * was still active, and another try may succeed.
119  *****************************************************************************/
120 void vdec_DestroyThread( vdec_thread_t *p_vdec /*, int *pi_status */ )
121 {
122     intf_DbgMsg("vdec debug: requesting termination of video decoder thread %p", p_vdec);
123
124     /* Ask thread to kill itself */
125     p_vdec->b_die = 1;
126
127 #ifdef VDEC_SMP
128     /* Make sure the decoder thread leaves the vpar_GetMacroblock() function */
129     vlc_mutex_lock( &(p_vdec->p_vpar->vfifo.lock) );
130     vlc_cond_signal( &(p_vdec->p_vpar->vfifo.wait) );
131     vlc_mutex_unlock( &(p_vdec->p_vpar->vfifo.lock) );
132 #endif
133
134     /* Waiting for the decoder thread to exit */
135     /* Remove this as soon as the "status" flag is implemented */
136     vlc_thread_join( p_vdec->thread_id );
137 }
138
139 /* following functions are local */
140
141 /*****************************************************************************
142  * vdec_InitThread: initialize video decoder thread
143  *****************************************************************************
144  * This function is called from RunThread and performs the second step of the
145  * initialization. It returns 0 on success. Note that the thread's flag are not
146  * modified inside this function.
147  *****************************************************************************/
148 #ifdef VDEC_SMP
149 static int vdec_InitThread( vdec_thread_t *p_vdec )
150 #else
151 int vdec_InitThread( vdec_thread_t *p_vdec )
152 #endif
153 {
154 #ifndef HAVE_MMX
155     int i_dummy;
156 #endif
157
158     intf_DbgMsg("vdec debug: initializing video decoder thread %p", p_vdec);
159
160 #ifndef HAVE_MMX
161     /* Init crop table */
162     p_vdec->pi_crop = p_vdec->pi_crop_buf + (VDEC_CROPRANGE >> 1);
163     for( i_dummy = -(VDEC_CROPRANGE >> 1); i_dummy < 0; i_dummy++ )
164     {
165         p_vdec->pi_crop[i_dummy] = 0;
166     }
167     for( ; i_dummy < 255; i_dummy ++ )
168     {
169         p_vdec->pi_crop[i_dummy] = i_dummy;
170     }
171     for( ; i_dummy < (VDEC_CROPRANGE >> 1) -1; i_dummy++ )
172     {
173         p_vdec->pi_crop[i_dummy] = 255;
174     }
175 #endif
176
177 #ifdef VDEC_SMP
178     /* Re-nice ourself */
179     if( nice(VDEC_NICE) == -1 )
180     {
181         intf_WarnMsg( 2, "vdec warning : couldn't nice() (%s)",
182                       strerror(errno) );
183     }
184 #endif
185
186     /* Mark thread as running and return */
187     intf_DbgMsg("vdec debug: InitThread(%p) succeeded", p_vdec);
188     return( 0 );
189 }
190
191 /*****************************************************************************
192  * ErrorThread: RunThread() error loop
193  *****************************************************************************
194  * This function is called when an error occured during thread main's loop. The
195  * thread can still receive feed, but must be ready to terminate as soon as
196  * possible.
197  *****************************************************************************/
198 static void ErrorThread( vdec_thread_t *p_vdec )
199 {
200     macroblock_t *       p_mb;
201
202     /* Wait until a `die' order */
203     while( !p_vdec->b_die )
204     {
205         p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo );
206         vpar_DestroyMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
207     }
208 }
209
210 /*****************************************************************************
211  * EndThread: thread destruction
212  *****************************************************************************
213  * This function is called when the thread ends after a sucessful
214  * initialization.
215  *****************************************************************************/
216 static void EndThread( vdec_thread_t *p_vdec )
217 {
218     intf_DbgMsg("vdec debug: EndThread(%p)", p_vdec);
219 }
220
221 /*****************************************************************************
222  * AddBlock : add a block
223  *****************************************************************************/
224 #ifndef HAVE_MMX
225 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
226                                  yuv_data_t * p_data, int i_incr )
227 {
228     int i_x, i_y;
229
230     for( i_y = 0; i_y < 8; i_y++ )
231     {
232         for( i_x = 0; i_x < 8; i_x++ )
233         {
234             *p_data = p_vdec->pi_crop[*p_data + *p_block++];
235             p_data++;
236         }
237         p_data += i_incr;
238     }
239 }
240 #else
241 static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
242                                           yuv_data_t * p_data, int i_incr )
243 {
244     asm __volatile__ (
245             "pxor       %%mm7,%%mm7\n\t"
246
247             "movq       (%0),%%mm1\n\t"
248             "movq       %%mm1,%%mm2\n\t"
249             "punpckhbw  %%mm7,%%mm1\n\t"
250             "punpcklbw  %%mm7,%%mm2\n\t"
251             "paddw      (%1),%%mm2\n\t"
252             "paddw      8(%1),%%mm1\n\t"
253             "packuswb   %%mm1,%%mm2\n\t"
254             "movq       %%mm2,(%0)\n\t"
255             "addl       %2,%0\n\t"
256
257             "movq       (%0),%%mm1\n\t"
258             "movq       %%mm1,%%mm2\n\t"
259             "punpckhbw  %%mm7,%%mm1\n\t"
260             "punpcklbw  %%mm7,%%mm2\n\t"
261             "paddw      16(%1),%%mm2\n\t"
262             "paddw      24(%1),%%mm1\n\t"
263             "packuswb   %%mm1,%%mm2\n\t"
264             "movq       %%mm2,(%0)\n\t"
265             "addl       %2,%0\n\t"
266
267             "movq       (%0),%%mm1\n\t"
268             "movq       %%mm1,%%mm2\n\t"
269             "punpckhbw  %%mm7,%%mm1\n\t"
270             "punpcklbw  %%mm7,%%mm2\n\t"
271             "paddw      32(%1),%%mm2\n\t"
272             "paddw      40(%1),%%mm1\n\t"
273             "packuswb   %%mm1,%%mm2\n\t"
274             "movq       %%mm2,(%0)\n\t"
275             "addl       %2,%0\n\t"
276
277             "movq       (%0),%%mm1\n\t"
278             "movq       %%mm1,%%mm2\n\t"
279             "punpckhbw  %%mm7,%%mm1\n\t"
280             "punpcklbw  %%mm7,%%mm2\n\t"
281             "paddw      48(%1),%%mm2\n\t"
282             "paddw      56(%1),%%mm1\n\t"
283             "packuswb   %%mm1,%%mm2\n\t"
284             "movq       %%mm2,(%0)\n\t"
285             "addl       %2,%0\n\t"
286
287             "movq       (%0),%%mm1\n\t"
288             "movq       %%mm1,%%mm2\n\t"
289             "punpckhbw  %%mm7,%%mm1\n\t"
290             "punpcklbw  %%mm7,%%mm2\n\t"
291             "paddw      64(%1),%%mm2\n\t"
292             "paddw      72(%1),%%mm1\n\t"
293             "packuswb   %%mm1,%%mm2\n\t"
294             "movq       %%mm2,(%0)\n\t"
295             "addl       %2,%0\n\t"
296
297             "movq       (%0),%%mm1\n\t"
298             "movq       %%mm1,%%mm2\n\t"
299             "punpckhbw  %%mm7,%%mm1\n\t"
300             "punpcklbw  %%mm7,%%mm2\n\t"
301             "paddw      80(%1),%%mm2\n\t"
302             "paddw      88(%1),%%mm1\n\t"
303             "packuswb   %%mm1,%%mm2\n\t"
304             "movq       %%mm2,(%0)\n\t"
305             "addl       %2,%0\n\t"
306
307             "movq       (%0),%%mm1\n\t"
308             "movq       %%mm1,%%mm2\n\t"
309             "punpckhbw  %%mm7,%%mm1\n\t"
310             "punpcklbw  %%mm7,%%mm2\n\t"
311             "paddw      96(%1),%%mm2\n\t"
312             "paddw      104(%1),%%mm1\n\t"
313             "packuswb   %%mm1,%%mm2\n\t"
314             "movq       %%mm2,(%0)\n\t"
315             "addl       %2,%0\n\t"
316
317             "movq       (%0),%%mm1\n\t"
318             "movq       %%mm1,%%mm2\n\t"
319             "punpckhbw  %%mm7,%%mm1\n\t"
320             "punpcklbw  %%mm7,%%mm2\n\t"
321             "paddw      112(%1),%%mm2\n\t"
322             "paddw      120(%1),%%mm1\n\t"
323             "packuswb   %%mm1,%%mm2\n\t"
324             "movq       %%mm2,(%0)\n\t"
325
326             //"emms"
327             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
328 }
329 #endif
330
331
332 /*****************************************************************************
333  * CopyBlock : copy a block
334  *****************************************************************************/
335 #ifndef HAVE_MMX
336 static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
337                                   yuv_data_t * p_data, int i_incr )
338 {
339     int i_x, i_y;
340
341     for( i_y = 0; i_y < 8; i_y++ )
342     {
343         for( i_x = 0; i_x < 8; i_x++ )
344         {
345             *p_data++ = p_vdec->pi_crop[*p_block++];
346         }
347         p_data += i_incr;
348     }
349 }
350 #else
351 static  __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
352                                           yuv_data_t * p_data, int i_incr )
353 {
354     asm __volatile__ (
355             "movq         (%1),%%mm0\n\t"
356             "packuswb   8(%1),%%mm0\n\t"
357             "movq        %%mm0,(%0)\n\t"
358             "addl           %2,%0\n\t"
359
360             "movq        16(%1),%%mm0\n\t"
361             "packuswb   24(%1),%%mm0\n\t"
362             "movq        %%mm0,(%0)\n\t"
363             "addl           %2,%0\n\t"
364
365             "movq        32(%1),%%mm0\n\t"
366             "packuswb   40(%1),%%mm0\n\t"
367             "movq        %%mm0,(%0)\n\t"
368             "addl           %2,%0\n\t"
369
370             "movq        48(%1),%%mm0\n\t"
371             "packuswb   56(%1),%%mm0\n\t"
372             "movq        %%mm0,(%0)\n\t"
373             "addl           %2,%0\n\t"
374
375             "movq        64(%1),%%mm0\n\t"
376             "packuswb   72(%1),%%mm0\n\t"
377             "movq        %%mm0,(%0)\n\t"
378             "addl           %2,%0\n\t"
379
380             "movq        80(%1),%%mm0\n\t"
381             "packuswb   88(%1),%%mm0\n\t"
382             "movq        %%mm0,(%0)\n\t"
383             "addl           %2,%0\n\t"
384
385             "movq        96(%1),%%mm0\n\t"
386             "packuswb   104(%1),%%mm0\n\t"
387             "movq        %%mm0,(%0)\n\t"
388             "addl           %2,%0\n\t"
389
390             "movq        112(%1),%%mm0\n\t"
391             "packuswb   120(%1),%%mm0\n\t"
392             "movq        %%mm0,(%0)\n\t"
393             //"emms"
394             :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
395 }
396 #endif
397
398
399 /*****************************************************************************
400  * vdec_DecodeMacroblock : decode a macroblock of a picture
401  *****************************************************************************/
402 #define DECODEBLOCKSC( OPBLOCK )                                        \
403 {                                                                       \
404     int             i_b, i_mask;                                        \
405                                                                         \
406     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
407                                                                         \
408     /* luminance */                                                     \
409     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
410     {                                                                   \
411         if( p_mb->i_coded_block_pattern & i_mask )                      \
412         {                                                               \
413             /*                                                          \
414              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
415              */                                                         \
416             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
417                                   p_mb->pi_sparse_pos[i_b] );           \
418                                                                         \
419             /*                                                          \
420              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
421              * section 7.6.8)                                           \
422              */                                                         \
423             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
424                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
425         }                                                               \
426     }                                                                   \
427                                                                         \
428     /* chrominance */                                                   \
429     for( i_b = 4; i_b < 4 + p_mb->i_chroma_nb_blocks;                   \
430          i_b++, i_mask >>= 1 )                                          \
431     {                                                                   \
432         if( p_mb->i_coded_block_pattern & i_mask )                      \
433         {                                                               \
434             /*                                                          \
435              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
436              */                                                         \
437             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
438                                   p_mb->pi_sparse_pos[i_b] );           \
439                                                                         \
440             /*                                                          \
441              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
442              * section 7.6.8)                                           \
443              */                                                         \
444             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
445                      p_mb->p_data[i_b], p_mb->i_addb_c_stride );        \
446         }                                                               \
447     }                                                                   \
448 }
449
450 #define DECODEBLOCKSBW( OPBLOCK )                                       \
451 {                                                                       \
452     int             i_b, i_mask;                                        \
453                                                                         \
454     i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks);                       \
455                                                                         \
456     /* luminance */                                                     \
457     for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 )                        \
458     {                                                                   \
459         if( p_mb->i_coded_block_pattern & i_mask )                      \
460         {                                                               \
461             /*                                                          \
462              * Inverse DCT (ISO/IEC 13818-2 section Annex A)            \
463              */                                                         \
464             (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b],        \
465                                   p_mb->pi_sparse_pos[i_b] );           \
466                                                                         \
467             /*                                                          \
468              * Adding prediction and coefficient data (ISO/IEC 13818-2  \
469              * section 7.6.8)                                           \
470              */                                                         \
471             OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b],                     \
472                      p_mb->p_data[i_b], p_mb->i_addb_l_stride );        \
473         }                                                               \
474     }                                                                   \
475 }
476
477 void vdec_DecodeMacroblockC ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
478 {
479     if( !(p_mb->i_mb_type & MB_INTRA) )
480     {
481         /*
482          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
483          */
484         if( p_mb->pf_motion == 0 )
485         {
486             intf_ErrMsg( "vdec error: pf_motion set to NULL" );
487         }
488         else
489         {
490             p_mb->pf_motion( p_mb );
491         }
492
493         DECODEBLOCKSC( AddBlock )
494     }
495     else
496     {
497         DECODEBLOCKSC( CopyBlock )
498     }
499
500     /*
501      * Decoding is finished, release the macroblock and free
502      * unneeded memory.
503      */
504     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
505 }
506
507 void vdec_DecodeMacroblockBW ( vdec_thread_t *p_vdec, macroblock_t * p_mb )
508 {
509     if( !(p_mb->i_mb_type & MB_INTRA) )
510     {
511         /*
512          * Motion Compensation (ISO/IEC 13818-2 section 7.6)
513          */
514         if( p_mb->pf_motion == 0 )
515         {
516             intf_ErrMsg( "vdec error: pf_motion set to NULL" );
517         }
518         else
519         {
520             p_mb->pf_motion( p_mb );
521         }
522
523         DECODEBLOCKSBW( AddBlock )
524     }
525     else
526     {
527         DECODEBLOCKSBW( CopyBlock )
528     }
529
530     /*
531      * Decoding is finished, release the macroblock and free
532      * unneeded memory.
533      */
534     vpar_ReleaseMacroblock( &p_vdec->p_vpar->vfifo, p_mb );
535 }
536
537
538
539 /*****************************************************************************
540  * RunThread: video decoder thread
541  *****************************************************************************
542  * Video decoder thread. This function does only return when the thread is
543  * terminated.
544  *****************************************************************************/
545 static void RunThread( vdec_thread_t *p_vdec )
546 {
547     intf_DbgMsg("vdec debug: running video decoder thread (%p) (pid == %i)",
548                 p_vdec, getpid());
549
550     /*
551      * Initialize thread and free configuration
552      */
553     p_vdec->b_error = vdec_InitThread( p_vdec );
554     if( p_vdec->b_error )
555     {
556         return;
557     }
558     p_vdec->b_run = 1;
559
560     /*
561      * Main loop - it is not executed if an error occured during
562      * initialization
563      */
564     while( (!p_vdec->b_die) && (!p_vdec->b_error) )
565     {
566         macroblock_t *          p_mb;
567
568         if( (p_mb = vpar_GetMacroblock( &p_vdec->p_vpar->vfifo )) != NULL )
569         {
570             vdec_DecodeMacroblockC ( p_vdec, p_mb );
571         }
572     }
573
574     /*
575      * Error loop
576      */
577     if( p_vdec->b_error )
578     {
579         ErrorThread( p_vdec );
580     }
581
582     /* End of thread */
583     EndThread( p_vdec );
584     p_vdec->b_run = 0;
585 }