+}
+#else
+static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
+ yuv_data_t * p_data, int i_incr )
+{
+ asm __volatile__ (
+ "movq (%1),%%mm0\n\t"
+ "packuswb 8(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 16(%1),%%mm0\n\t"
+ "packuswb 24(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 32(%1),%%mm0\n\t"
+ "packuswb 40(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 48(%1),%%mm0\n\t"
+ "packuswb 56(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 64(%1),%%mm0\n\t"
+ "packuswb 72(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 80(%1),%%mm0\n\t"
+ "packuswb 88(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 96(%1),%%mm0\n\t"
+ "packuswb 104(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ "addl %2,%0\n\t"
+
+ "movq 112(%1),%%mm0\n\t"
+ "packuswb 120(%1),%%mm0\n\t"
+ "movq %%mm0,(%0)\n\t"
+ //"emms"
+ :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
+}
+#endif
+
+
+/*****************************************************************************
+ * vdec_DecodeMacroblock : decode a macroblock of a picture
+ *****************************************************************************/
+#define DECODEBLOCKSC( OPBLOCK ) \
+{ \
+ int i_b, i_mask; \
+ \
+ i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks); \
+ \
+ /* luminance */ \
+ for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 ) \
+ { \
+ if( p_mb->i_coded_block_pattern & i_mask ) \
+ { \
+ /* \
+ * Inverse DCT (ISO/IEC 13818-2 section Annex A) \
+ */ \
+ (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b], \
+ p_mb->pi_sparse_pos[i_b] ); \
+ \
+ /* \
+ * Adding prediction and coefficient data (ISO/IEC 13818-2 \
+ * section 7.6.8) \
+ */ \
+ OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b], \
+ p_mb->p_data[i_b], p_mb->i_addb_l_stride ); \
+ } \
+ } \
+ \
+ /* chrominance */ \
+ for( i_b = 4; i_b < 4 + p_mb->i_chroma_nb_blocks; \
+ i_b++, i_mask >>= 1 ) \
+ { \
+ if( p_mb->i_coded_block_pattern & i_mask ) \
+ { \
+ /* \
+ * Inverse DCT (ISO/IEC 13818-2 section Annex A) \
+ */ \
+ (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b], \
+ p_mb->pi_sparse_pos[i_b] ); \
+ \
+ /* \
+ * Adding prediction and coefficient data (ISO/IEC 13818-2 \
+ * section 7.6.8) \
+ */ \
+ OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b], \
+ p_mb->p_data[i_b], p_mb->i_addb_c_stride ); \
+ } \
+ } \
+}
+
+#define DECODEBLOCKSBW( OPBLOCK ) \
+{ \
+ int i_b, i_mask; \
+ \
+ i_mask = 1 << (3 + p_mb->i_chroma_nb_blocks); \
+ \
+ /* luminance */ \
+ for( i_b = 0; i_b < 4; i_b++, i_mask >>= 1 ) \
+ { \
+ if( p_mb->i_coded_block_pattern & i_mask ) \
+ { \
+ /* \
+ * Inverse DCT (ISO/IEC 13818-2 section Annex A) \
+ */ \
+ (p_mb->pf_idct[i_b])( p_vdec, p_mb->ppi_blocks[i_b], \
+ p_mb->pi_sparse_pos[i_b] ); \
+ \
+ /* \
+ * Adding prediction and coefficient data (ISO/IEC 13818-2 \
+ * section 7.6.8) \
+ */ \
+ OPBLOCK( p_vdec, p_mb->ppi_blocks[i_b], \
+ p_mb->p_data[i_b], p_mb->i_addb_l_stride ); \
+ } \
+ } \
+}