From: Sam Hocevar Date: Mon, 26 Feb 2001 17:41:11 +0000 (+0000) Subject: * Optimizations done to the SPU decoder. Now the RLE is expanded at X-Git-Tag: 0.2.70~99 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=a20df577fa18badca1d14d3934056bba0578aae1;p=vlc * Optimizations done to the SPU decoder. Now the RLE is expanded at decoding time, not rendering time. The main loop in vout_RenderSPU is now 20 lines long. --- diff --git a/src/interface/main.c b/src/interface/main.c index 7a82964776..0517de79bc 100644 --- a/src/interface/main.c +++ b/src/interface/main.c @@ -331,14 +331,14 @@ int main( int i_argc, char *ppsz_argv[], char *ppsz_env[] ) } /* - * Open video device and start aout thread + * Open video device and start vout thread */ if( p_main->b_video ) { p_main->p_vout = vout_CreateThread( NULL ); if( p_main->p_vout == NULL ) { - /* On error during video initialization, switch off audio */ + /* On error during video initialization, switch off video */ intf_ErrMsg( "vout error: video initialization failed," " video is deactivated" ); p_main->b_video = 0; diff --git a/src/spu_decoder/spu_decoder.c b/src/spu_decoder/spu_decoder.c index e1a43fab34..a5e9ed2701 100644 --- a/src/spu_decoder/spu_decoder.c +++ b/src/spu_decoder/spu_decoder.c @@ -53,8 +53,8 @@ static void EndThread ( spudec_thread_t * ); static int SyncPacket ( spudec_thread_t * ); static void ParsePacket ( spudec_thread_t * ); -static int ParseRLE ( spudec_thread_t *, subpicture_t * ); static int ParseControlSequences( spudec_thread_t *, subpicture_t * ); +static int ParseRLE ( u8 *, subpicture_t * ); /***************************************************************************** * spudec_CreateThread: create a spu decoder thread @@ -212,8 +212,8 @@ static int SyncPacket( spudec_thread_t *p_spudec ) /* The total SPU packet size, often bigger than a PS packet */ p_spudec->i_spu_size = GetBits( &p_spudec->bit_stream, 16 ); - /* The RLE stuff size */ - p_spudec->i_rle_size = GetBits( &p_spudec->bit_stream, 16 ); + /* The RLE stuff size (remove 4 because we just read 32 bits) */ + p_spudec->i_rle_size = GetBits( &p_spudec->bit_stream, 16 ) - 4; /* If the values we got are a bit strange, skip packet */ if( p_spudec->i_rle_size >= p_spudec->i_spu_size ) @@ -233,34 +233,54 @@ static int SyncPacket( spudec_thread_t *p_spudec ) static void ParsePacket( spudec_thread_t *p_spudec ) { subpicture_t * p_spu; + u8 * p_source; + + /* We cannot display a subpicture with no date */ + if( DECODER_FIFO_START(*p_spudec->p_fifo)->i_pts == 0 ) + { + return; + } /* Allocate the subpicture internal data. */ p_spu = vout_CreateSubPicture( p_spudec->p_vout, DVD_SUBPICTURE, - p_spudec->i_rle_size ); + p_spudec->i_rle_size * 4 ); + /* Rationale for the "p_spudec->i_rle_size * 4": we are going to + * expand the RLE stuff so that we won't need to read nibbles later + * on. This will speed things up a lot. Plus, we won't need to do + * this stupid interlacing stuff. */ if( p_spu == NULL ) { return; } - /* Get display time */ + /* Get display time now. If we do it later, we may miss a PTS. */ p_spu->begin_date = p_spu->end_date = DECODER_FIFO_START(*p_spudec->p_fifo)->i_pts; - if( ParseRLE( p_spudec, p_spu ) ) + /* Allocate the temporary buffer we will parse */ + p_source = malloc( p_spudec->i_rle_size ); + + if( p_source == NULL ) { - /* There was a parse error, delete the subpicture */ + intf_ErrMsg( "spudec error: could not allocate p_source" ); vout_DestroySubPicture( p_spudec->p_vout, p_spu ); return; } + /* Get RLE data */ + GetChunk( &p_spudec->bit_stream, p_source, p_spudec->i_rle_size ); + +#if 0 /* Dump the subtitle info */ - intf_WarnHexDump( 0, p_spu->p_data, p_spudec->i_rle_size - 4 ); + intf_WarnHexDump( 0, p_spu->p_data, p_spudec->i_rle_size ); +#endif /* Getting the control part */ if( ParseControlSequences( p_spudec, p_spu ) ) { /* There was a parse error, delete the subpicture */ + free( p_source ); vout_DestroySubPicture( p_spudec->p_vout, p_spu ); return; } @@ -270,32 +290,22 @@ static void ParsePacket( spudec_thread_t *p_spudec ) p_spu->i_width, p_spu->i_height, p_spu->i_x, p_spu->i_y, p_spu->type.spu.i_offset[0], p_spu->type.spu.i_offset[1] ); - /* SPU is finished - we can tell the video output to display it */ - vout_DisplaySubPicture( p_spudec->p_vout, p_spu ); -} - -/***************************************************************************** - * ParseRLE: parse the RLE part of the subtitle - ***************************************************************************** - * This part parses the subtitle graphical data and stores it in a more - * convenient structure for later decoding. For more information on the - * subtitles format, see http://sam.zoy.org/doc/dvd/subtitles/index.html - * TODO: pre-parse the RLE stuff here. - *****************************************************************************/ -static int ParseRLE( spudec_thread_t *p_spudec, subpicture_t * p_spu ) -{ - /* Get RLE data, skip 4 bytes for the first two read offsets */ - GetChunk( &p_spudec->bit_stream, p_spu->p_data, p_spudec->i_rle_size - 4 ); - - if( p_spudec->p_fifo->b_die ) + if( ParseRLE( p_source, p_spu ) ) { - return( 1 ); + /* There was a parse error, delete the subpicture */ + free( p_source ); + vout_DestroySubPicture( p_spudec->p_vout, p_spu ); + return; } - return( 0 ); + /* SPU is finished - we can tell the video output to display it */ + vout_DisplaySubPicture( p_spudec->p_vout, p_spu ); + + /* Clean up */ + free( p_source ); } - /***************************************************************************** +/***************************************************************************** * ParseControlSequences: parse all SPU control sequences ***************************************************************************** * This is the most important part in SPU decoding. We get dates, palette @@ -305,11 +315,15 @@ static int ParseRLE( spudec_thread_t *p_spudec, subpicture_t * p_spu ) static int ParseControlSequences( spudec_thread_t *p_spudec, subpicture_t * p_spu ) { - int i_index = p_spudec->i_rle_size; + /* Our current index in the SPU packet */ + int i_index = p_spudec->i_rle_size + 4; + + /* The next start-of-control-sequence index and the previous one */ int i_next_index = 0, i_prev_index; - int i_date; + /* Command time and date */ u8 i_command; + int i_date; do { @@ -320,7 +334,7 @@ static int ParseControlSequences( spudec_thread_t *p_spudec, i_prev_index = i_next_index; i_next_index = GetBits( &p_spudec->bit_stream, 16 ); - /* Current offset */ + /* Skip what we just read */ i_index += 4; do @@ -463,3 +477,127 @@ static int ParseControlSequences( spudec_thread_t *p_spudec, return( 0 ); } +/***************************************************************************** + * ParseRLE: parse the RLE part of the subtitle + ***************************************************************************** + * This part parses the subtitle graphical data and stores it in a more + * convenient structure for later decoding. For more information on the + * subtitles format, see http://sam.zoy.org/doc/dvd/subtitles/index.html + *****************************************************************************/ +static int ParseRLE( u8 *p_source, subpicture_t * p_spu ) +{ + int i_code; + int i_id = 0; + + int i_width = p_spu->i_width; + int i_height = p_spu->i_height; + int i_x = 0, i_y = 0; + + u16 *p_dest = (u16 *)p_spu->p_data; + int pi_index[2]; + + pi_index[0] = p_spu->type.spu.i_offset[0] << 1; + pi_index[1] = p_spu->type.spu.i_offset[1] << 1; + + while( i_y < i_height ) + { + i_code = GetNibble( p_source, pi_index + i_id ); + + if( i_code >= 0x04 ) + { + found_code: + + if( ((i_code >> 2) + i_x + i_y * i_width) > i_height * i_width ) + { + intf_ErrMsg( "spudec error: out of bounds, %i at (%i,%i) is " + "out of %ix%i", + i_code >> 2, i_x, i_y, i_width, i_height); + return( 1 ); + } + else + { + /* Store the code */ + *p_dest++ = i_code; + + i_x += i_code >> 2; + } + + if( i_x > i_width ) + { + intf_ErrMsg( "spudec error: i_x overflowed, %i > %i", + i_x, i_width ); + return( 1 ); + } + + if( i_x == i_width ) + { + /* byte-align the stream */ + if( pi_index[i_id] & 0x1 ) + { + pi_index[i_id]++; + } + + i_id = ~i_id & 0x1; + + i_y++; + i_x = 0; + + if( i_y > i_height ) + { + intf_ErrMsg( "spudec error: i_y overflowed at EOL, " + "%i > %i", i_y, i_height ); + return( 1 ); + } + } + + continue; + } + + i_code = ( i_code << 4 ) + GetNibble( p_source, pi_index + i_id ); + + /* 00 11 xx cc */ + if( i_code >= 0x10 ) + { + /* 00 01 xx cc */ + goto found_code; + } + + i_code = ( i_code << 4 ) + GetNibble( p_source, pi_index + i_id ); + + /* 00 00 11 xx xx cc */ + if( i_code >= 0x040 ) + { + goto found_code; /* 00 00 01 xx xx cc */ + } + + i_code = ( i_code << 4 ) + GetNibble( p_source, pi_index + i_id ); + + if( i_code >= 0x0100 ) /* 00 00 00 11 xx xx xx cc */ + { + goto found_code; /* 00 00 00 01 xx xx xx cc */ + } + + if( i_code & ~0x0003 ) + { + /* We have a boo boo ! */ + intf_ErrMsg( "spudec error: unknown code 0x%.4x", i_code ); + return( 1 ); + } + else + { + /* If the 14 first bits are 0, then it's a new line */ + i_code |= ( i_width - i_x ) << 2; + goto found_code; + } + } + + /* FIXME: we shouldn't need these padding bytes */ + while( i_y < i_height ) + { + *p_dest++ = i_width << 2; + i_y++; + } + + return( 0 ); +} + diff --git a/src/spu_decoder/spu_decoder.h b/src/spu_decoder/spu_decoder.h index 12c175334d..47ce700a27 100644 --- a/src/spu_decoder/spu_decoder.h +++ b/src/spu_decoder/spu_decoder.h @@ -64,6 +64,21 @@ typedef struct spudec_thread_s #define SPU_CMD_SET_OFFSETS 0x06 #define SPU_CMD_END 0xff +/***************************************************************************** + * GetNibble: read a nibble from a source packet. + *****************************************************************************/ +static __inline__ u8 GetNibble( u8 *p_source, int *pi_index ) +{ + if( *pi_index & 0x1 ) + { + return( p_source[(*pi_index)++ >> 1] & 0xf ); + } + else + { + return( p_source[(*pi_index)++ >> 1] >> 4 ); + } +} + /***************************************************************************** * Prototypes *****************************************************************************/ diff --git a/src/video_output/video_spu.c b/src/video_output/video_spu.c index e00b392d72..42ec9b586c 100644 --- a/src/video_output/video_spu.c +++ b/src/video_output/video_spu.c @@ -37,157 +37,70 @@ #include "video_output.h" #include "video_spu.h" -#include "intf_msg.h" - /* FIXME: fake palette - the real one has to be sought in the .IFO */ static int p_palette[4] = { 0x0000, 0xffff, 0x5555, 0x8888 }; -static __inline__ u8 GetNibble( u8 *p_source, int *pi_index ) -{ - if( *pi_index & 0x1 ) - { - return( p_source[(*pi_index)++ >> 1] & 0xf ); - } - else - { - return( p_source[(*pi_index)++ >> 1] >> 4 ); - } -} - /***************************************************************************** * vout_RenderSPU: draw an SPU on a picture ***************************************************************************** - * + * This is a fast implementation of the subpicture drawing code. The data + * has been preprocessed once in spu_decoder.c, so we don't need to parse the + * RLE buffer again and again. Most sanity checks are done in spu_decoder.c + * so that this routine can be as fast as possible. *****************************************************************************/ void vout_RenderSPU( vout_buffer_t *p_buffer, subpicture_t *p_spu, int i_bytes_per_pixel, int i_bytes_per_line ) { - int i_code = 0x00; - int i_id = 0; - int i_color; + int i_len, i_color; + u16 *p_source = (u16 *)p_spu->p_data; - /* SPU size */ - int i_width = p_spu->i_width; - int i_height = p_spu->i_height; - - /* Drawing coordinates inside the SPU */ - int i_x = 0, i_y = 0; + /* FIXME: we need a way to get 720 and 576 from the stream */ + int i_xscale = ( p_buffer->i_pic_width << 6 ) / 720; + int i_yscale = ( p_buffer->i_pic_height << 6 ) / 576; - /* FIXME: we need a way to get this information from the stream */ - #define TARGET_WIDTH 720 - #define TARGET_HEIGHT 576 - int i_xscale = ( p_buffer->i_pic_width << 6 ) / TARGET_WIDTH; - int i_yscale = ( p_buffer->i_pic_height << 6 ) / TARGET_HEIGHT; + int i_width = p_spu->i_width * i_xscale; + int i_height = p_spu->i_height * i_yscale; - u8 *p_source = p_spu->p_data; - u8 *p_dest; - int pi_index[2]; - - pi_index[0] = ( p_spu->type.spu.i_offset[0] - 2 ) << 1; - pi_index[1] = ( p_spu->type.spu.i_offset[1] - 2 ) << 1; + int i_x = 0, i_y = 0; - p_dest = p_buffer->p_data - /* add the picture coordinates and the SPU coordinates */ - + ( p_buffer->i_pic_x + ((p_spu->i_x * i_xscale) >> 6)) - * i_bytes_per_pixel - + ( p_buffer->i_pic_y + ((p_spu->i_y * i_yscale) >> 6)) - * i_bytes_per_line; + u8 *p_dest = p_buffer->p_data + /* Add the picture coordinates and the SPU coordinates */ + + ( p_buffer->i_pic_x + ((p_spu->i_x * i_xscale) >> 6)) + * i_bytes_per_pixel + + ( p_buffer->i_pic_y + ((p_spu->i_y * i_yscale) >> 6)) + * i_bytes_per_line; - while( pi_index[0] >> 1 < p_spu->type.spu.i_offset[1] ) + /* Draw until we reach the bottom of the subtitle */ + while( i_y < i_height ) { - i_code = GetNibble( p_source, pi_index + i_id ); + /* Get RLE information */ + i_len = i_xscale * ( *p_source >> 2 ); + i_color = *p_source++ & 0x3; - if( i_code >= 0x04 ) + /* Draw the line */ + if( i_color ) { - found_code: + memset( p_dest + i_bytes_per_pixel * ( i_x >> 6 ) + + i_bytes_per_line * ( i_y >> 6 ), + p_palette[ i_color ], + i_bytes_per_pixel * ( ( i_len >> 6 ) + 1 ) ); - if( ((i_code >> 2) + i_x + i_y * i_width) > i_height * i_width ) + /* Duplicate line if needed */ + if( i_yscale > 1 << 6 ) { - intf_DbgMsg ( "video_spu: invalid draw request ! %d %d", - i_code >> 2, i_height * i_width - - ( (i_code >> 2) + i_x + i_y * i_width ) ); - return; - } - else - { - if( (i_color = i_code & 0x3) ) - { - u8 *p_target = p_dest - + i_bytes_per_pixel * ((i_x * i_xscale) >> 6) - + i_bytes_per_line * ((i_y * i_yscale) >> 6); - - memset( p_target, p_palette[i_color], - ((((i_code >> 2) * i_xscale) >> 6) + 1) - * i_bytes_per_pixel ); - } - i_x += i_code >> 2; + memset( p_dest + i_bytes_per_pixel * ( i_x >> 6 ) + + i_bytes_per_line * ( ( i_y >> 6 ) + 1 ), + p_palette[ i_color ], + i_bytes_per_pixel * ( ( i_len >> 6 ) + 1 ) ); } - - if( i_x >= i_width ) - { - /* byte-align the stream */ - if( pi_index[i_id] & 0x1 ) - { - pi_index[i_id]++; - } - - i_id = ~i_id & 0x1; - - i_y++; - i_x = 0; - - if( i_width <= i_y ) - { - return; - } - } - continue; - } - - i_code = ( i_code << 4 ) + GetNibble( p_source, pi_index + i_id ); - - if( i_code >= 0x10 ) /* 00 11 xx cc */ - { - goto found_code; /* 00 01 xx cc */ - } - - i_code = ( i_code << 4 ) + GetNibble( p_source, pi_index + i_id ); - if( i_code >= 0x040 ) /* 00 00 11 xx xx cc */ - { - goto found_code; /* 00 00 01 xx xx cc */ - } - - i_code = ( i_code << 4 ) + GetNibble( p_source, pi_index + i_id ); - if( i_code >= 0x0100 ) /* 00 00 00 11 xx xx xx cc */ - { - goto found_code; /* 00 00 00 01 xx xx xx cc */ } - if( i_code & ~0x0003 ) - { - /* we have a boo boo ! */ - intf_ErrMsg( "video_spu: unknown code 0x%x " - "(dest %x side %x, x=%d, y=%d)", - i_code, p_source, i_id, i_x, i_y ); - return; - } - else + /* Check for end of line */ + i_x += i_len; + if( i_x >= i_width ) { - /* if the 14 first bits are 0, then it's a new line */ - if( pi_index[i_id] & 0x1 ) - { - pi_index[i_id]++; - } - - i_id = ~i_id & 0x1; - - i_y++; + i_y += i_yscale; i_x = 0; - - if( i_width <= i_y ) - { - return; - } } } }