/*****************************************************************************
* Preamble
*****************************************************************************/
-#include <string.h> /* strerror() */
-#include <stdlib.h> /* malloc(), free() */
#include <vlc/vlc.h>
#include <vlc_vout.h>
return 0;
}
-/* Following functions are local */
+#if 0
+static inline unsigned long long read_cycles(void)
+{
+ unsigned long long v;
+ __asm__ __volatile__("rdtsc" : "=A" (v): );
+
+ return v;
+}
+#endif
+/* Following functions are local */
/*****************************************************************************
* I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
*****************************************************************************/
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- __asm__ __volatile__("emms" :: );
+ /* re-enable FPU registers */
+ MMX_END;
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
/*
- ** SSE2 128 bytes fetch/store instructions are faster
+ ** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
+
if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((int)p_line2|(int)p_y2))) )
+ ((intptr_t)p_line2|(intptr_t)p_y2))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
p_line2 += i_dest_margin;
}
}
+ /* make sure all SSE2 stores are visible thereafter */
+ SSE2_END;
+
#endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
}
MMX_CALL( MMX_YUV420_YVYU );
#endif
}
+ for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+ {
+ C_YUV420_YVYU( );
+ }
p_y1 += i_source_margin;
p_y2 += i_source_margin;
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- __asm__ __volatile__("emms" :: );
+ /* re-enable FPU registers */
+ MMX_END;
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
/*
- ** SSE2 128 bytes fetch/store instructions are faster
+ ** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((int)p_line2|(int)p_y2))) )
+ ((intptr_t)p_line2|(intptr_t)p_y2))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
p_line2 += i_dest_margin;
}
}
+ /* make sure all SSE2 stores are visible thereafter */
+ SSE2_END;
#endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
}
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- __asm__ __volatile__("emms" :: );
+ /* re-enable FPU registers */
+ MMX_END;
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
/*
- ** SSE2 128 bytes fetch/store instructions are faster
+ ** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((int)p_line2|(int)p_y2))) )
+ ((intptr_t)p_line2|(intptr_t)p_y2))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
p_line2 += i_dest_margin;
}
}
+ /* make sure all SSE2 stores are visible thereafter */
+ SSE2_END;
#endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
}
MMX_CALL( MMX_YUV420_UYVY );
#endif
}
+ for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
+ {
+ C_YUV420_UYVY( );
+ }
p_y1 += i_source_margin;
p_y2 += i_source_margin;
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- __asm__ __volatile__("emms" :: );
+ /* re-enable FPU registers */
+ MMX_END;
#endif
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
/*
- ** SSE2 128 bytes fetch/store instructions are faster
+ ** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((int)p_line2|(int)p_y2))) )
+ ((intptr_t)p_line2|(intptr_t)p_y2))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
p_line2 += i_dest_margin;
}
}
+ /* make sure all SSE2 stores are visible thereafter */
+ SSE2_END;
#endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
}
#endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)