/*
- * yuv2rgb_mmx.c, Software YUV to RGB coverter with Intel MMX "technology"
+ * yuv2rgb_mmx.c, Software YUV to RGB converter with Intel MMX "technology"
*
* Copyright (C) 2000, Silicon Integrated System Corp.
- * All Rights Reserved.
*
* Author: Olie Lho <ollie@sis.com.tw>
*
+ * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
+ * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
+ * context / deglobalize stuff by Michael Niedermayer
+ *
* This file is part of mpeg2dec, a free MPEG-2 video decoder
*
* mpeg2dec is free software; you can redistribute it and/or modify
* You should have received a copy of the GNU General Public License
* along with mpeg2dec; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
- * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
- * context / deglobalize stuff by Michael Niedermayer
*/
#undef MOVNTQ
//printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
//srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
for (y= 0; y<srcSliceH; y++ ) {
- uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
- uint8_t *_py = src[0] + y*srcStride[0];
- uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
- uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
+ uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
+ uint8_t *py = src[0] + y*srcStride[0];
+ uint8_t *pu = src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
b5Dither= dither8[y&1];
"add $4, %0 \n\t"
" js 1b \n\t"
- : "+r" (index), "+r" (_image)
- : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
+ : "+r" (index), "+r" (image)
+ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
//printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
//srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
for (y= 0; y<srcSliceH; y++ ) {
- uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
- uint8_t *_py = src[0] + y*srcStride[0];
- uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
- uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
+ uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
+ uint8_t *py = src[0] + y*srcStride[0];
+ uint8_t *pu = src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
b5Dither= dither8[y&1];
"add $16, %1 \n\t"
"add $4, %0 \n\t"
" js 1b \n\t"
- : "+r" (index), "+r" (_image)
- : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
+ : "+r" (index), "+r" (image)
+ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y= 0; y<srcSliceH; y++ ) {
- uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
- uint8_t *_py = src[0] + y*srcStride[0];
- uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
- uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
+ uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
+ uint8_t *py = src[0] + y*srcStride[0];
+ uint8_t *pu = src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
"add $4, %0 \n\t"
" js 1b \n\t"
- : "+r" (index), "+r" (_image)
- : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
+ : "+r" (index), "+r" (image)
+ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y= 0; y<srcSliceH; y++ ) {
- uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
- uint8_t *_py = src[0] + y*srcStride[0];
- uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
- uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
+ uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
+ uint8_t *py = src[0] + y*srcStride[0];
+ uint8_t *pu = src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
"add $4, %0 \n\t"
" js 1b \n\t"
- : "+r" (index), "+r" (_image)
- : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
+ : "+r" (index), "+r" (image)
+ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}