"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
*/
-#define YSCALEYUV2RGBX \
+#define YSCALEYUV2PACKEDX \
"xorl %%eax, %%eax \n\t"\
".balign 16 \n\t"\
"1: \n\t"\
"movl %1, %%edx \n\t" /* -chrFilterSize */\
- "movl %3, %%ebx \n\t" /* chrMmxFilter+lumFilterSize */\
- "movl %7, %%ecx \n\t" /* chrSrc+lumFilterSize */\
+ "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\
+ "movl %7, %%ecx \n\t" /* chrSrc+chrFilterSize */\
"pxor %%mm3, %%mm3 \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"2: \n\t"\
"paddw %%mm5, %%mm7 \n\t"\
"addl $1, %%edx \n\t"\
" jnz 2b \n\t"\
-\
+
+
+#define YSCALEYUV2RGBX \
+ YSCALEYUV2PACKEDX\
"psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
\
"packuswb %%mm1, %%mm1 \n\t"
+#define YSCALEYUV2PACKED \
+ "movd %6, %%mm6 \n\t" /*yalpha1*/\
+ "punpcklwd %%mm6, %%mm6 \n\t"\
+ "punpcklwd %%mm6, %%mm6 \n\t"\
+ "psraw $3, %%mm6 \n\t"\
+ "movq %%mm6, 3968(%2) \n\t"\
+ "movd %7, %%mm5 \n\t" /*uvalpha1*/\
+ "punpcklwd %%mm5, %%mm5 \n\t"\
+ "punpcklwd %%mm5, %%mm5 \n\t"\
+ "psraw $3, %%mm5 \n\t"\
+ "movq %%mm5, 3976(%2) \n\t"\
+ "xorl %%eax, %%eax \n\t"\
+ ".balign 16 \n\t"\
+ "1: \n\t"\
+ "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
+ "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
+ "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
+ "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
+ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+ "movq 3976(%2), %%mm0 \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+ "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+ "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+ "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+ "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
+ "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
+ "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\
+ "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
+ "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
+ "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
+ "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+ "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+ "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+ "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+ "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
#define YSCALEYUV2RGB \
"movd %6, %%mm6 \n\t" /*yalpha1*/\
"punpcklwd %%mm6, %%mm6 \n\t"\
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t"
-
+
+#define YSCALEYUV2PACKED1 \
+ "xorl %%eax, %%eax \n\t"\
+ ".balign 16 \n\t"\
+ "1: \n\t"\
+ "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\
+ "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
+ "psraw $7, %%mm3 \n\t" \
+ "psraw $7, %%mm4 \n\t" \
+ "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
+ "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
+ "psraw $7, %%mm1 \n\t" \
+ "psraw $7, %%mm7 \n\t" \
+
#define YSCALEYUV2RGB1 \
"xorl %%eax, %%eax \n\t"\
".balign 16 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t"
+#define YSCALEYUV2PACKED1b \
+ "xorl %%eax, %%eax \n\t"\
+ ".balign 16 \n\t"\
+ "1: \n\t"\
+ "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
+ "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
+ "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
+ "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
+ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+ "psrlw $8, %%mm3 \n\t" \
+ "psrlw $8, %%mm4 \n\t" \
+ "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
+ "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
+ "psraw $7, %%mm1 \n\t" \
+ "psraw $7, %%mm7 \n\t"
+
// do vertical chrominance interpolation
#define YSCALEYUV2RGB1b \
"xorl %%eax, %%eax \n\t"\
#define WRITEBGR24 WRITEBGR24MMX
#endif
+#define WRITEYUY2 \
+ "packuswb %%mm3, %%mm3 \n\t"\
+ "packuswb %%mm4, %%mm4 \n\t"\
+ "packuswb %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm4, %%mm3 \n\t"\
+ "movq %%mm1, %%mm7 \n\t"\
+ "punpcklbw %%mm3, %%mm1 \n\t"\
+ "punpckhbw %%mm3, %%mm7 \n\t"\
+\
+ MOVNTQ(%%mm1, (%4, %%eax, 2))\
+ MOVNTQ(%%mm7, 8(%4, %%eax, 2))\
+\
+ "addl $8, %%eax \n\t"\
+ "cmpl %5, %%eax \n\t"\
+ " jb 1b \n\t"
+
+
static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW,
/**
* vertical scale YV12 to RGB
*/
-static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- uint8_t *dest, int dstW, int dstFormat, int16_t * lumMmxFilter, int16_t * chrMmxFilter)
+ uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY)
{
-/* if(flags&SWS_FULL_UV_IPOL)
- {
-//FIXME
- }//FULL_UV_IPOL
- else*/
+ switch(c->dstFormat)
{
#ifdef HAVE_MMX
- if(dstFormat == IMGFMT_BGR32) //FIXME untested
+ case IMGFMT_BGR32:
{
asm volatile(
YSCALEYUV2RGBX
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
);
}
- else if(dstFormat == IMGFMT_BGR24) //FIXME untested
+ break;
+ case IMGFMT_BGR24:
{
asm volatile(
YSCALEYUV2RGBX
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
);
}
- else if(dstFormat==IMGFMT_BGR15)
+ break;
+ case IMGFMT_BGR15:
{
asm volatile(
YSCALEYUV2RGBX
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
);
}
- else if(dstFormat==IMGFMT_BGR16)
+ break;
+ case IMGFMT_BGR16:
{
asm volatile(
YSCALEYUV2RGBX
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
);
}
-#else
-yuv2rgbXinC(lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrSrc, chrFilterSize,
- dest, dstW, dstFormat);
+ break;
+ case IMGFMT_YUY2:
+ {
+ asm volatile(
+ YSCALEYUV2PACKEDX
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+ "psraw $3, %%mm3 \n\t"
+ "psraw $3, %%mm4 \n\t"
+ "psraw $3, %%mm1 \n\t"
+ "psraw $3, %%mm7 \n\t"
+ WRITEYUY2
+ :: "m" (-lumFilterSize), "m" (-chrFilterSize),
+ "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
+ "r" (dest), "m" (dstW),
+ "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
+ : "%eax", "%ebx", "%ecx", "%edx", "%esi"
+ );
+ }
+ break;
#endif
- } //!FULL_UV_IPOL
+ default:
+ yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+ chrFilter, chrSrc, chrFilterSize,
+ dest, dstW, dstY);
+ break;
+ }
}
-
/**
* vertical bilinear scale YV12 to RGB
*/
-static inline void RENAME(yuv2rgb2)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstFormat, int flags)
+static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
+ uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
{
int yalpha1=yalpha^4095;
int uvalpha1=uvalpha^4095;
+ int i;
+#if 0 //isnt used
if(flags&SWS_FULL_CHR_H_INT)
{
-
-#ifdef HAVE_MMX
- if(dstFormat==IMGFMT_BGR32)
+ switch(dstFormat)
{
+#ifdef HAVE_MMX
+ case IMGFMT_BGR32:
asm volatile(
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR24)
- {
+ break;
+ case IMGFMT_BGR24:
asm volatile(
FULL_YSCALEYUV2RGB
"m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx"
);
- }
- else if(dstFormat==IMGFMT_BGR15)
- {
+ break;
+ case IMGFMT_BGR15:
asm volatile(
FULL_YSCALEYUV2RGB
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR16)
- {
+ break;
+ case IMGFMT_BGR16:
asm volatile(
FULL_YSCALEYUV2RGB
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
-#else
+ break;
+#endif
+ case IMGFMT_RGB32:
+#ifndef HAVE_MMX
+ case IMGFMT_BGR32:
+#endif
if(dstFormat==IMGFMT_BGR32)
{
int i;
clip_table15r[(Y + yuvtab_3343[V]) >>13];
}
}
-#endif
}//FULL_UV_IPOL
else
{
+#endif // if 0
#ifdef HAVE_MMX
- if(dstFormat==IMGFMT_BGR32)
- {
+ switch(c->dstFormat)
+ {
+ case IMGFMT_BGR32:
asm volatile(
YSCALEYUV2RGB
WRITEBGR32
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR24)
- {
+ return;
+ case IMGFMT_BGR24:
asm volatile(
"movl %4, %%ebx \n\t"
YSCALEYUV2RGB
"m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx"
);
- }
- else if(dstFormat==IMGFMT_BGR15)
- {
+ return;
+ case IMGFMT_BGR15:
asm volatile(
YSCALEYUV2RGB
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR16)
- {
+ return;
+ case IMGFMT_BGR16:
asm volatile(
YSCALEYUV2RGB
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
-#else
- if(dstFormat==IMGFMT_BGR32)
- {
- int i;
-#ifdef WORDS_BIGENDIAN
- dest++;
-#endif
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
- dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
- dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
-
- dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
- dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
- dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
- }
- }
- else if(dstFormat==IMGFMT_BGR24)
- {
- int i;
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- dest[0]=clip_table[((Y1 + Cb) >>13)];
- dest[1]=clip_table[((Y1 + Cg) >>13)];
- dest[2]=clip_table[((Y1 + Cr) >>13)];
-
- dest[3]=clip_table[((Y2 + Cb) >>13)];
- dest[4]=clip_table[((Y2 + Cg) >>13)];
- dest[5]=clip_table[((Y2 + Cr) >>13)];
- dest+=6;
- }
- }
- else if(dstFormat==IMGFMT_BGR16)
- {
- int i;
-#ifdef DITHER1XBPP
- static int ditherb1=1<<14;
- static int ditherg1=1<<13;
- static int ditherr1=2<<14;
- static int ditherb2=3<<14;
- static int ditherg2=3<<13;
- static int ditherr2=0<<14;
-
- ditherb1 ^= (1^2)<<14;
- ditherg1 ^= (1^2)<<13;
- ditherr1 ^= (1^2)<<14;
- ditherb2 ^= (3^0)<<14;
- ditherg2 ^= (3^0)<<13;
- ditherr2 ^= (3^0)<<14;
-#else
- const int ditherb1=0;
- const int ditherg1=0;
- const int ditherr1=0;
- const int ditherb2=0;
- const int ditherg2=0;
- const int ditherr2=0;
-#endif
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- ((uint16_t*)dest)[i] =
- clip_table16b[(Y1 + Cb + ditherb1) >>13] |
- clip_table16g[(Y1 + Cg + ditherg1) >>13] |
- clip_table16r[(Y1 + Cr + ditherr1) >>13];
-
- ((uint16_t*)dest)[i+1] =
- clip_table16b[(Y2 + Cb + ditherb2) >>13] |
- clip_table16g[(Y2 + Cg + ditherg2) >>13] |
- clip_table16r[(Y2 + Cr + ditherr2) >>13];
- }
- }
- else if(dstFormat==IMGFMT_BGR15)
- {
- int i;
-#ifdef DITHER1XBPP
- static int ditherb1=1<<14;
- static int ditherg1=1<<14;
- static int ditherr1=2<<14;
- static int ditherb2=3<<14;
- static int ditherg2=3<<14;
- static int ditherr2=0<<14;
-
- ditherb1 ^= (1^2)<<14;
- ditherg1 ^= (1^2)<<14;
- ditherr1 ^= (1^2)<<14;
- ditherb2 ^= (3^0)<<14;
- ditherg2 ^= (3^0)<<14;
- ditherr2 ^= (3^0)<<14;
-#else
- const int ditherb1=0;
- const int ditherg1=0;
- const int ditherr1=0;
- const int ditherb2=0;
- const int ditherg2=0;
- const int ditherr2=0;
-#endif
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
+ return;
+ case IMGFMT_YUY2:
+ asm volatile(
+ YSCALEYUV2PACKED
+ WRITEYUY2
- ((uint16_t*)dest)[i] =
- clip_table15b[(Y1 + Cb + ditherb1) >>13] |
- clip_table15g[(Y1 + Cg + ditherg1) >>13] |
- clip_table15r[(Y1 + Cr + ditherr1) >>13];
-
- ((uint16_t*)dest)[i+1] =
- clip_table15b[(Y2 + Cb + ditherb2) >>13] |
- clip_table15g[(Y2 + Cg + ditherg2) >>13] |
- clip_table15r[(Y2 + Cr + ditherr2) >>13];
- }
- }
-#endif
- } //!FULL_UV_IPOL
+ :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
+ "m" (yalpha1), "m" (uvalpha1)
+ : "%eax"
+ );
+ return;
+ default: break;
+ }
+#endif //HAVE_MMX
+YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
}
/**
* YV12 to RGB without scaling or interpolating
*/
-static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags)
+static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
+ uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
{
int uvalpha1=uvalpha^4095;
const int yalpha1=0;
+ int i;
+
+ uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
+ const int yalpha= 4096; //FIXME ...
if(flags&SWS_FULL_CHR_H_INT)
{
- RENAME(yuv2rgb2)(buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, dstFormat, flags);
+ RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
return;
}
#ifdef HAVE_MMX
if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
{
- if(dstFormat==IMGFMT_BGR32)
+ switch(dstFormat)
{
+ case IMGFMT_BGR32:
asm volatile(
YSCALEYUV2RGB1
WRITEBGR32
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR24)
- {
+ return;
+ case IMGFMT_BGR24:
asm volatile(
"movl %4, %%ebx \n\t"
YSCALEYUV2RGB1
"m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx"
);
- }
- else if(dstFormat==IMGFMT_BGR15)
- {
+ return;
+ case IMGFMT_BGR15:
asm volatile(
YSCALEYUV2RGB1
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR16)
- {
+ return;
+ case IMGFMT_BGR16:
asm volatile(
YSCALEYUV2RGB1
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
+ return;
+ case IMGFMT_YUY2:
+ asm volatile(
+ YSCALEYUV2PACKED1
+ WRITEYUY2
+ :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
+ "m" (yalpha1), "m" (uvalpha1)
+ : "%eax"
+ );
+ return;
}
}
else
{
- if(dstFormat==IMGFMT_BGR32)
+ switch(dstFormat)
{
+ case IMGFMT_BGR32:
asm volatile(
YSCALEYUV2RGB1b
WRITEBGR32
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR24)
- {
+ return;
+ case IMGFMT_BGR24:
asm volatile(
"movl %4, %%ebx \n\t"
YSCALEYUV2RGB1b
"m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx"
);
- }
- else if(dstFormat==IMGFMT_BGR15)
- {
+ return;
+ case IMGFMT_BGR15:
asm volatile(
YSCALEYUV2RGB1b
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
- }
- else if(dstFormat==IMGFMT_BGR16)
- {
+ return;
+ case IMGFMT_BGR16:
asm volatile(
YSCALEYUV2RGB1b
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"m" (yalpha1), "m" (uvalpha1)
: "%eax"
);
+ return;
+ case IMGFMT_YUY2:
+ asm volatile(
+ YSCALEYUV2PACKED1b
+ WRITEYUY2
+ :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
+ "m" (yalpha1), "m" (uvalpha1)
+ : "%eax"
+ );
+ return;
}
}
-#else
-//FIXME write 2 versions (for even & odd lines)
-
- if(dstFormat==IMGFMT_BGR32)
- {
- int i;
-#ifdef WORDS_BIGENDIAN
- dest++;
-#endif
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[buf0[i]>>7];
- int Y2=yuvtab_2568[buf0[i+1]>>7];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
- dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
- dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
-
- dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
- dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
- dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
- }
- }
- else if(dstFormat==IMGFMT_BGR24)
- {
- int i;
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[buf0[i]>>7];
- int Y2=yuvtab_2568[buf0[i+1]>>7];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- dest[0]=clip_table[((Y1 + Cb) >>13)];
- dest[1]=clip_table[((Y1 + Cg) >>13)];
- dest[2]=clip_table[((Y1 + Cr) >>13)];
-
- dest[3]=clip_table[((Y2 + Cb) >>13)];
- dest[4]=clip_table[((Y2 + Cg) >>13)];
- dest[5]=clip_table[((Y2 + Cr) >>13)];
- dest+=6;
- }
- }
- else if(dstFormat==IMGFMT_BGR16)
- {
- int i;
-#ifdef DITHER1XBPP
- static int ditherb1=1<<14;
- static int ditherg1=1<<13;
- static int ditherr1=2<<14;
- static int ditherb2=3<<14;
- static int ditherg2=3<<13;
- static int ditherr2=0<<14;
-
- ditherb1 ^= (1^2)<<14;
- ditherg1 ^= (1^2)<<13;
- ditherr1 ^= (1^2)<<14;
- ditherb2 ^= (3^0)<<14;
- ditherg2 ^= (3^0)<<13;
- ditherr2 ^= (3^0)<<14;
-#else
- const int ditherb1=0;
- const int ditherg1=0;
- const int ditherr1=0;
- const int ditherb2=0;
- const int ditherg2=0;
- const int ditherr2=0;
#endif
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[buf0[i]>>7];
- int Y2=yuvtab_2568[buf0[i+1]>>7];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- ((uint16_t*)dest)[i] =
- clip_table16b[(Y1 + Cb + ditherb1) >>13] |
- clip_table16g[(Y1 + Cg + ditherg1) >>13] |
- clip_table16r[(Y1 + Cr + ditherr1) >>13];
-
- ((uint16_t*)dest)[i+1] =
- clip_table16b[(Y2 + Cb + ditherb2) >>13] |
- clip_table16g[(Y2 + Cg + ditherg2) >>13] |
- clip_table16r[(Y2 + Cr + ditherr2) >>13];
- }
- }
- else if(dstFormat==IMGFMT_BGR15)
+ if( uvalpha < 2048 )
{
- int i;
-#ifdef DITHER1XBPP
- static int ditherb1=1<<14;
- static int ditherg1=1<<14;
- static int ditherr1=2<<14;
- static int ditherb2=3<<14;
- static int ditherg2=3<<14;
- static int ditherr2=0<<14;
-
- ditherb1 ^= (1^2)<<14;
- ditherg1 ^= (1^2)<<14;
- ditherr1 ^= (1^2)<<14;
- ditherb2 ^= (3^0)<<14;
- ditherg2 ^= (3^0)<<14;
- ditherr2 ^= (3^0)<<14;
-#else
- const int ditherb1=0;
- const int ditherg1=0;
- const int ditherr1=0;
- const int ditherb2=0;
- const int ditherg2=0;
- const int ditherr2=0;
-#endif
- for(i=0; i<dstW-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[buf0[i]>>7];
- int Y2=yuvtab_2568[buf0[i+1]>>7];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- ((uint16_t*)dest)[i] =
- clip_table15b[(Y1 + Cb + ditherb1) >>13] |
- clip_table15g[(Y1 + Cg + ditherg1) >>13] |
- clip_table15r[(Y1 + Cr + ditherr1) >>13];
-
- ((uint16_t*)dest)[i+1] =
- clip_table15b[(Y2 + Cb + ditherb2) >>13] |
- clip_table15g[(Y2 + Cg + ditherg2) >>13] |
- clip_table15r[(Y2 + Cr + ditherr2) >>13];
- }
+ YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
+ }else{
+ YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
}
-#endif
}
//FIXME yuy2* can read upto 7 samples to much
{
if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like
{
- if((dstY&1) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+ const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+ if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
{
int16_t *lumBuf = lumPixBuf[0];
{
int chrAlpha= vChrFilter[2*dstY+1];
- RENAME(yuv2rgb1)(*lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
- dest, dstW, chrAlpha, dstFormat, flags);
+ RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+ dest, dstW, chrAlpha, dstFormat, flags, dstY);
}
else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
{
int lumAlpha= vLumFilter[2*dstY+1];
int chrAlpha= vChrFilter[2*dstY+1];
- RENAME(yuv2rgb2)(*lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
- dest, dstW, lumAlpha, chrAlpha, dstFormat, flags);
+ RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+ dest, dstW, lumAlpha, chrAlpha, dstY);
}
else //General RGB
{
- RENAME(yuv2rgbX)(
+ RENAME(yuv2packedX)(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
- dest, dstW, dstFormat,
- lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4);
+ dest, dstW,
+ lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY);
}
}
}
{
int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- if(isPlanarYUV(dstFormat)) //YV12
+ if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12
{
- if(dstY&1) uDest=vDest= NULL;
+ const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+ if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
yuv2yuvXinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
{
ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
- yuv2rgbXinC(
+ yuv2packedXinC(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
- dest, dstW, dstFormat);
+ dest, dstW, dstY);
}
}
}