* fixed 32bpp MMX YUV, made the comments clearer, removed an emms.
* removed all "*vlc" aliases except "gvlc" and "fbvlc". The other sucked.
* new --synchro flag which lets you force which images are decoded.
+ * removed an unused variable in the MMX YUVs.
+ * fixed 32bpp MMX YUV, made the comments clearer, removed an emms.
Tue Aug 8 11:24:01 CEST 2000
0.1.99f :
Urgency: Important
Description: Fix 32bpp MMX YUV
The MMX 32bpp YUV function is buggy.
-Status: Todo
+Status: Done 13 Aug 2000 (sam)
Task: 0x11
Difficulty: Hard
* It will also set horizontal and vertical scaling indicators.
*****************************************************************************/
void SetOffset( int i_width, int i_height, int i_pic_width, int i_pic_height,
- boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset )
+ boolean_t *pb_h_scaling, int *pi_v_scaling, int *p_offset,
+ boolean_t b_double )
{
int i_x; /* x position in destination */
int i_scale_count; /* modulo counter */
#define PALETTE_TABLE_SIZE 2176 /* YUV -> 8bpp palette lookup table */
/* argument lists for YUV functions */
-#define YUV_ARGS_8BPP p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_y, \
-yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \
-int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
+#define YUV_ARGS( word_size ) p_vout_thread_t p_vout, word_size *p_pic, \
+yuv_data_t *p_y, yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, \
+int i_pic_width, int i_pic_height, int i_pic_line_width, \
+int i_matrix_coefficients
-#define YUV_ARGS_16BPP p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *p_y, \
-yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \
-int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
-
-#define YUV_ARGS_24BPP p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, \
-yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \
-int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
-
-#define YUV_ARGS_32BPP p_vout_thread_t p_vout, u32 *p_pic, yuv_data_t *p_y, \
-yuv_data_t *p_u, yuv_data_t *p_v, int i_width, int i_height, int i_pic_width, \
-int i_pic_height, int i_pic_line_width, int i_matrix_coefficients
+#define YUV_ARGS_8BPP YUV_ARGS( u8 )
+#define YUV_ARGS_16BPP YUV_ARGS( u16 )
+#define YUV_ARGS_24BPP YUV_ARGS( u32 )
+#define YUV_ARGS_32BPP YUV_ARGS( u32 )
/*****************************************************************************
* Local prototypes
void SetYUV ( vout_thread_t *p_vout );
void SetOffset ( int i_width, int i_height, int i_pic_width,
int i_pic_height, boolean_t *pb_h_scaling,
- int *pi_v_scaling, int *p_offset );
+ int *pi_v_scaling, int *p_offset,
+ boolean_t b_double );
void ConvertY4Gray8 ( YUV_ARGS_8BPP );
void ConvertYUV420RGB8 ( YUV_ARGS_8BPP );
int i_x, i_y; /* horizontal and vertical indexes */
int i_scale_count; /* scale modulo counter */
int i_chroma_width; /* chroma width */
- u16 * p_yuv; /* base conversion table */
u16 * p_pic_start; /* beginning of the current line for copy */
u16 * p_buffer_start; /* conversion buffer start */
u16 * p_buffer; /* conversion buffer pointer */
*/
i_pic_line_width -= i_pic_width;
i_chroma_width = i_width / 2;
- p_yuv = p_vout->yuv.yuv.p_rgb16;
p_buffer_start = p_vout->yuv.p_buffer;
p_offset_start = p_vout->yuv.p_offset;
SetOffset( i_width, i_height, i_pic_width, i_pic_height,
- &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
+ &b_horizontal_scaling, &i_vertical_scaling, p_offset_start, 0 );
/*
* Perform conversion
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
}
- __asm__( "emms" );
}
/*****************************************************************************
int i_x, i_y; /* horizontal and vertical indexes */
int i_scale_count; /* scale modulo counter */
int i_chroma_width; /* chroma width */
- u32 * p_yuv; /* base conversion table */
u32 * p_pic_start; /* beginning of the current line for copy */
u32 * p_buffer_start; /* conversion buffer start */
u32 * p_buffer; /* conversion buffer pointer */
*/
i_pic_line_width -= i_pic_width;
i_chroma_width = i_width / 2;
- p_yuv = p_vout->yuv.yuv.p_rgb32;
p_buffer_start = p_vout->yuv.p_buffer;
p_offset_start = p_vout->yuv.p_offset;
SetOffset( i_width, i_height, i_pic_width, i_pic_height,
- &b_horizontal_scaling, &i_vertical_scaling, p_offset_start );
+ &b_horizontal_scaling, &i_vertical_scaling, p_offset_start, 0 );
/*
* Perform conversion
for ( i_x = i_width / 8; i_x--; )
{
- __asm__( ".align 8" MMX_INIT_32
+ __asm__( ".align 8"
+ MMX_INIT_32
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
- __asm__( ".align 8" MMX_YUV_ADD MMX_YUV_MUL MMX_UNPACK_32
+ __asm__( ".align 8"
+ MMX_YUV_MUL
+ MMX_YUV_ADD
+ MMX_UNPACK_32
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
p_y += 8;
}
SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
+ SCALE_HEIGHT( 420, 4 );
}
- __asm__( "emms" );
}
/*****************************************************************************
punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0 \n\
"
+/*
+ * convert RGB plane to RGB 16 bits,
+ * mm0 -> B, mm1 -> R, mm2 -> G,
+ * mm4 -> GB, mm5 -> AR pixel 4-7,
+ * mm6 -> GB, mm7 -> AR pixel 0-3
+ */
+
#define MMX_UNPACK_16 " \n\
\n\
# mask unneeded bits off \n\
-pand mmx_redmask, %%mm0 # b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 \n\
-pand mmx_grnmask, %%mm2 # g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 \n\
-pand mmx_redmask, %%mm1 # r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 \n\
-psrlw mmx_blueshift,%%mm0 # 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 \n\
+pand mmx_redmask, %%mm0 # b7b6b5b4 b3______ b7b6b5b4 b3______ \n\
+pand mmx_grnmask, %%mm2 # g7g6g5g4 g3g2____ g7g6g5g4 g3g2____ \n\
+pand mmx_redmask, %%mm1 # r7r6r5r4 r3______ r7r6r5r4 r3______ \n\
+psrlw mmx_blueshift,%%mm0 # ______b7 b6b5b4b3 ______b7 b6b5b4b3 \n\
pxor %%mm4, %%mm4 # zero mm4 \n\
movq %%mm0, %%mm5 # Copy B7-B0 \n\
movq %%mm2, %%mm7 # Copy G7-G0 \n\
\n\
# convert rgb24 plane to rgb16 pack for pixel 0-3 \n\
-punpcklbw %%mm4, %%mm2 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 \n\
-punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 \n\
-psllw mmx_blueshift,%%mm2 # 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 \n\
+punpcklbw %%mm4, %%mm2 # ________ ________ g7g6g5g4 g3g2____ \n\
+punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3______ ______b7 b6b5b4b3 \n\
+psllw mmx_blueshift,%%mm2 # ________ __g7g6g5 g4g3g2__ ________ \n\
por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\
movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
movq %%mm0, (%3) # store pixel 0-3 \n\
\n\
# convert rgb24 plane to rgb16 pack for pixel 0-3 \n\
-punpckhbw %%mm4, %%mm7 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 \n\
-punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 \n\
-psllw mmx_blueshift,%%mm7 # 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 \n\
-movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
+punpckhbw %%mm4, %%mm7 # ________ ________ g7g6g5g4 g3g2____ \n\
+punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3______ ______b7 b6b5b4b3 \n\
+psllw mmx_blueshift,%%mm7 # ________ __g7g6g5 g4g3g2__ ________ \n\
+movd 4(%1), %%mm0 # Load 4 Cb __ __ __ __ u3 u2 u1 u0 \n\
por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\
-movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
+movd 4(%2), %%mm1 # Load 4 Cr __ __ __ __ v3 v2 v1 v0 \n\
movq %%mm5, 8(%3) # store pixel 4-7 \n\
"
punpckhbw %%mm2, %%mm4 # G7 B7 G6 B6 G5 B5 G4 B4 \n\
punpckhwd %%mm5, %%mm4 # 00 R7 G7 B7 00 R6 B6 G6 \n\
movq %%mm4, 24(%3) # Store ARGB7 ARGB6 \n\
-movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
-movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
-pxor %%mm4, %%mm4 # zero mm4 \n\
-movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
+ \n\
+#movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
+#movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
+#pxor %%mm4, %%mm4 # zero mm4 \n\
+#movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
"
-