#undef PAVGB
#undef PREFETCH
#undef PREFETCHW
-#undef EMMS
-#undef SFENCE
-
-#if HAVE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS "femms"
-#else
-#define EMMS "emms"
-#endif
#if HAVE_AMD3DNOW
#define PREFETCH "prefetch"
#define PREFETCHW " # nop"
#endif
-#if HAVE_MMX2
-#define SFENCE "sfence"
-#else
-#define SFENCE " # nop"
-#endif
-
#if HAVE_MMX2
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
#elif HAVE_AMD3DNOW
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
#if HAVE_ALTIVEC
-#include "swscale_altivec_template.c"
+#include "ppc/swscale_altivec_template.c"
#endif
#define YSCALEYUV2YV12X(x, offset, dest, width) \
#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
-static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t **alpSrc,
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{
#if HAVE_MMX
#endif //!HAVE_ALTIVEC
}
-static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
}
-static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, int16_t *alpSrc,
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{
int i;
if (c->flags & SWS_ACCURATE_RND){
while(p--){
- if (dst[p]){
- __asm__ volatile(
- YSCALEYUV2YV121_ACCURATE
- :: "r" (src[p]), "r" (dst[p] + counter[p]),
- "g" (-counter[p])
- : "%"REG_a
- );
- }
+ if (dst[p]){
+ __asm__ volatile(
+ YSCALEYUV2YV121_ACCURATE
+ :: "r" (src[p]), "r" (dst[p] + counter[p]),
+ "g" (-counter[p])
+ : "%"REG_a
+ );
+ }
}
}else{
while(p--){
- if (dst[p]){
- __asm__ volatile(
- YSCALEYUV2YV121
- :: "r" (src[p]), "r" (dst[p] + counter[p]),
- "g" (-counter[p])
- : "%"REG_a
- );
- }
+ if (dst[p]){
+ __asm__ volatile(
+ YSCALEYUV2YV121
+ :: "r" (src[p]), "r" (dst[p] + counter[p]),
+ "g" (-counter[p])
+ : "%"REG_a
+ );
+ }
}
}
return;
/**
* vertical scale YV12 to RGB
*/
-static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
{
#if HAVE_MMX
x86_reg dummy=0;
YSCALEYUV2PACKEDX_END
}else{
- YSCALEYUV2PACKEDX_ACCURATE
- YSCALEYUV2RGBX
- "pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ YSCALEYUV2PACKEDX_ACCURATE
+ YSCALEYUV2RGBX
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- YSCALEYUV2PACKEDX_END
+ YSCALEYUV2PACKEDX_END
}
return;
case PIX_FMT_BGR24:
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
}else{
- YSCALEYUV2PACKEDX
- YSCALEYUV2RGBX
- "pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- YSCALEYUV2PACKEDX_END
+ YSCALEYUV2PACKEDX
+ YSCALEYUV2RGBX
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ YSCALEYUV2PACKEDX_END
}
return;
case PIX_FMT_BGR24:
#if HAVE_ALTIVEC
/* The following list of supported dstFormat values should
match what's found in the body of ff_yuv2packedX_altivec() */
- if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf
+ if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
(c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
/**
* vertical bilinear scale YV12 to RGB
*/
-static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint16_t *abuf0, uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
{
int yalpha1=4095- yalpha;
int uvalpha1=4095-uvalpha;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
#if ARCH_X86_64
__asm__ volatile(
- "mov %4, %%"REG_b" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%4, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
"a" (&c->redDither)
,"r" (abuf0), "r" (abuf1)
- : "%"REG_b, "%"REG_BP
+ : "%"REG_BP
);
#else
*(uint16_t **)(&c->u_temp)=abuf0;
);
#endif
}else{
- __asm__ volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB(%%REGBP, %5)
- "pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ __asm__ volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB(%%REGBP, %5)
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
}
return;
case PIX_FMT_BGR24:
/**
* YV12 to RGB without scaling or interpolating
*/
-static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
+static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
{
const int yalpha1=0;
int i;
- uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
const int yalpha= 4096; //FIXME ...
if (flags&SWS_FULL_CHR_H_INT)
{
- RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
+ c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
return;
}
"a" (&c->redDither)
);
}else{
- __asm__ volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1(%%REGBP, %5)
- "pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ __asm__ volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1(%%REGBP, %5)
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
}
return;
case PIX_FMT_BGR24:
"a" (&c->redDither)
);
}else{
- __asm__ volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- "pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ __asm__ volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
}
return;
case PIX_FMT_BGR24:
//FIXME yuy2* can read up to 7 samples too much
-static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
#if HAVE_MMX
__asm__ volatile(
#endif
}
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
#if HAVE_MMX
__asm__ volatile(
assert(src1 == src2);
}
+static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+ __asm__ volatile(
+ "mov %0, %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1, %%"REG_a",2), %%mm0 \n\t"
+ "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
+ "movq (%2, %%"REG_a",2), %%mm2 \n\t"
+ "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
+ "psrlw $8, %%mm0 \n\t"
+ "psrlw $8, %%mm1 \n\t"
+ "psrlw $8, %%mm2 \n\t"
+ "psrlw $8, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%3, %%"REG_a") \n\t"
+ "movq %%mm2, (%4, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
+ : "%"REG_a
+ );
+#else
+ int i;
+ for (i=0; i<width; i++)
+ {
+ dstU[i]= src1[2*i + 1];
+ dstV[i]= src2[2*i + 1];
+ }
+#endif
+}
+
/* This is almost identical to the previous, end exists only because
* yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
#if HAVE_MMX
__asm__ volatile(
#endif
}
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
#if HAVE_MMX
__asm__ volatile(
assert(src1 == src2);
}
-#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\
-{\
- int i;\
- for (i=0; i<width; i++)\
- {\
- int b= (((type*)src)[i]>>shb)&maskb;\
- int g= (((type*)src)[i]>>shg)&maskg;\
- int r= (((type*)src)[i]>>shr)&maskr;\
-\
- dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
- }\
-}
-
-BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
-BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
-
-static inline void RENAME(abgrToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused){
+static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+ __asm__ volatile(
+ "movq "MANGLE(bm01010101)", %%mm4 \n\t"
+ "mov %0, %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1, %%"REG_a",2), %%mm0 \n\t"
+ "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
+ "movq (%2, %%"REG_a",2), %%mm2 \n\t"
+ "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
+ "pand %%mm4, %%mm0 \n\t"
+ "pand %%mm4, %%mm1 \n\t"
+ "pand %%mm4, %%mm2 \n\t"
+ "pand %%mm4, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%3, %%"REG_a") \n\t"
+ "movq %%mm2, (%4, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
+ : "%"REG_a
+ );
+#else
int i;
- for (i=0; i<width; i++){
- dst[i]= src[4*i];
+ for (i=0; i<width; i++)
+ {
+ dstU[i]= src1[2*i];
+ dstV[i]= src2[2*i];
}
+#endif
}
-#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
-static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
-{\
- int i;\
- for (i=0; i<width; i++)\
- {\
- int b= (((type*)src)[i]&maskb)>>shb;\
- int g= (((type*)src)[i]&maskg)>>shg;\
- int r= (((type*)src)[i]&maskr)>>shr;\
-\
- dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
- dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
- }\
-}\
-static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
-{\
- int i;\
- for (i=0; i<width; i++)\
- {\
- int pix0= ((type*)src)[2*i+0];\
- int pix1= ((type*)src)[2*i+1];\
- int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
- int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
- int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
- g&= maskg|(2*maskg);\
-\
- g>>=shg;\
-\
- dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
- dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
- }\
-}
-
-BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
-BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
-
#if HAVE_MMX
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat)
{
if(srcFormat == PIX_FMT_BGR24){
);
}
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, int srcFormat)
{
__asm__ volatile(
"movq 24+%4, %%mm6 \n\t"
}
#endif
-static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
#if HAVE_MMX
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
#endif /* HAVE_MMX */
}
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
#if HAVE_MMX
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
assert(src1 == src2);
}
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++)
assert(src1 == src2);
}
-static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
#if HAVE_MMX
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
#endif
}
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
#if HAVE_MMX
assert(src1==src2);
#endif
}
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
assert(src1==src2);
}
-static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
-{
- int i;
- for (i=0; i<width; i++)
- {
- int d= src[i];
-
- dst[i]= pal[d] & 0xFF;
- }
-}
-
-static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
-{
- int i;
- assert(src1 == src2);
- for (i=0; i<width; i++)
- {
- int p= pal[src1[i]];
-
- dstU[i]= p>>8;
- dstV[i]= p>>16;
- }
-}
-
-static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
- int i, j;
- for (i=0; i<width/8; i++){
- int d= ~src[i];
- for(j=0; j<8; j++)
- dst[8*i+j]= ((d>>(7-j))&1)*255;
- }
-}
-
-static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
- int i, j;
- for (i=0; i<width/8; i++){
- int d= src[i];
- for(j=0; j<8; j++)
- dst[8*i+j]= ((d>>(7-j))&1)*255;
- }
-}
-
// bilinear / bicubic scaling
-static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
- int16_t *filter, int16_t *filterPos, long filterSize)
+static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
+ const int16_t *filter, const int16_t *filterPos, long filterSize)
{
#if HAVE_MMX
assert(filterSize % 4 == 0 && filterSize>0);
#endif /* HAVE_ALTIVEC */
#endif /* HAVE_MMX */
}
- // *** horizontal scale Y line to temp buffer
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
- int flags, int canMMX2BeUsed, int16_t *hLumFilter,
- int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
- int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
- int32_t *mmx2FilterPos, uint32_t *pal, int isAlpha)
+
+static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
+ int dstWidth, const uint8_t *src, int srcW,
+ int xInc)
{
- if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
- {
- RENAME(yuy2ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
- {
- RENAME(uyvyToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_RGB32)
- {
- if (isAlpha)
- RENAME(abgrToA)(formatConvBuffer, src+3, srcW, pal);
- else
- RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_RGB32_1)
- {
- if (isAlpha)
- RENAME(abgrToA)(formatConvBuffer, src, srcW, pal);
- else
- RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_BGR24)
- {
- RENAME(bgr24ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_BGR565)
- {
- RENAME(bgr16ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_BGR555)
- {
- RENAME(bgr15ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_BGR32)
- {
- if (isAlpha)
- RENAME(abgrToA)(formatConvBuffer, src+3, srcW, pal);
- else
- RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_BGR32_1)
- {
- if (isAlpha)
- RENAME(abgrToA)(formatConvBuffer, src, srcW, pal);
- else
- RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_RGB24)
- {
- RENAME(rgb24ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_RGB565)
- {
- RENAME(rgb16ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_RGB555)
- {
- RENAME(rgb15ToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
- }
- else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
+ int i;
+ unsigned int xpos=0;
+ for (i=0;i<dstWidth;i++)
{
- RENAME(palToY)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
+ register unsigned int xx=xpos>>16;
+ register unsigned int xalpha=(xpos&0xFFFF)>>9;
+ dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
+ xpos+=xInc;
}
- else if (srcFormat==PIX_FMT_MONOBLACK)
- {
- RENAME(monoblack2Y)(formatConvBuffer, src, srcW, pal);
- src= formatConvBuffer;
+}
+
+ // *** horizontal scale Y line to temp buffer
+static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
+ int flags, const int16_t *hLumFilter,
+ const int16_t *hLumFilterPos, int hLumFilterSize,
+ int srcFormat, uint8_t *formatConvBuffer,
+ uint32_t *pal, int isAlpha)
+{
+ int32_t av_unused *mmx2FilterPos = c->lumMmx2FilterPos;
+ int16_t av_unused *mmx2Filter = c->lumMmx2Filter;
+ int av_unused canMMX2BeUsed = c->canMMX2BeUsed;
+ void av_unused *funnyYCode = c->funnyYCode;
+ void (*internal_func)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->hascale_internal : c->hyscale_internal;
+
+ if (isAlpha) {
+ if (srcFormat == PIX_FMT_RGB32 || srcFormat == PIX_FMT_BGR32 )
+ src += 3;
+ } else {
+ if (srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
+ src += ALT32_CORR;
}
- else if (srcFormat==PIX_FMT_MONOWHITE)
- {
- RENAME(monowhite2Y)(formatConvBuffer, src, srcW, pal);
+
+ if (srcFormat == PIX_FMT_RGB48LE)
+ src++;
+
+ if (internal_func) {
+ internal_func(formatConvBuffer, src, srcW, pal);
src= formatConvBuffer;
}
if (!(flags&SWS_FAST_BILINEAR))
#endif
{
- RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
+ c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
}
else // fast bilinear upscale / crap downscale
{
#if HAVE_MMX2
int i;
#if defined(PIC)
- uint64_t ebxsave __attribute__((aligned(8)));
+ DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif
if (canMMX2BeUsed)
{
} //if MMX2 can't be used
#endif
#else
- int i;
- unsigned int xpos=0;
- for (i=0;i<dstWidth;i++)
- {
- register unsigned int xx=xpos>>16;
- register unsigned int xalpha=(xpos&0xFFFF)>>9;
- dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
- xpos+=xInc;
- }
+ c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
#endif /* ARCH_X86 */
}
}
}
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
- int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
- int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
- int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
- int32_t *mmx2FilterPos, uint32_t *pal)
+static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc)
{
- if (srcFormat==PIX_FMT_YUYV422)
- {
- RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_UYVY422)
- {
- RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_RGB32)
- {
- if(c->chrSrcHSubSample)
- RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_RGB32_1)
- {
- if(c->chrSrcHSubSample)
- RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
- else
- RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_BGR24)
- {
- if(c->chrSrcHSubSample)
- RENAME(bgr24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_BGR565)
- {
- if(c->chrSrcHSubSample)
- RENAME(bgr16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_BGR555)
- {
- if(c->chrSrcHSubSample)
- RENAME(bgr15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_BGR32)
- {
- if(c->chrSrcHSubSample)
- RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_BGR32_1)
- {
- if(c->chrSrcHSubSample)
- RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
- else
- RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_RGB24)
- {
- if(c->chrSrcHSubSample)
- RENAME(rgb24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_RGB565)
- {
- if(c->chrSrcHSubSample)
- RENAME(rgb16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
- }
- else if (srcFormat==PIX_FMT_RGB555)
+ int i;
+ unsigned int xpos=0;
+ for (i=0;i<dstWidth;i++)
{
- if(c->chrSrcHSubSample)
- RENAME(rgb15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- else
- RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
- src1= formatConvBuffer;
- src2= formatConvBuffer+VOFW;
+ register unsigned int xx=xpos>>16;
+ register unsigned int xalpha=(xpos&0xFFFF)>>9;
+ dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
+ dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
+ /* slower
+ dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
+ dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
+ */
+ xpos+=xInc;
}
- else if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
- {
+}
+
+inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
+ int srcW, int xInc, int flags, const int16_t *hChrFilter,
+ const int16_t *hChrFilterPos, int hChrFilterSize,
+ int srcFormat, uint8_t *formatConvBuffer,
+ uint32_t *pal)
+{
+ int32_t av_unused *mmx2FilterPos = c->chrMmx2FilterPos;
+ int16_t av_unused *mmx2Filter = c->chrMmx2Filter;
+ int av_unused canMMX2BeUsed = c->canMMX2BeUsed;
+ void av_unused *funnyUVCode = c->funnyUVCode;
+
+ if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
return;
+
+ if (srcFormat==PIX_FMT_RGB32_1 || srcFormat==PIX_FMT_BGR32_1) {
+ src1 += ALT32_CORR;
+ src2 += ALT32_CORR;
}
- else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
- {
- RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+
+ if (srcFormat==PIX_FMT_RGB48LE) {
+ src1++;
+ src2++;
+ }
+
+ if (c->hcscale_internal) {
+ c->hcscale_internal(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
src1= formatConvBuffer;
src2= formatConvBuffer+VOFW;
}
if (!(flags&SWS_FAST_BILINEAR))
#endif
{
- RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
- RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+ c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+ c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
}
else // fast bilinear upscale / crap downscale
{
#if HAVE_MMX2
int i;
#if defined(PIC)
- uint64_t ebxsave __attribute__((aligned(8)));
+ DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif
if (canMMX2BeUsed)
{
} //if MMX2 can't be used
#endif
#else
- int i;
- unsigned int xpos=0;
- for (i=0;i<dstWidth;i++)
- {
- register unsigned int xx=xpos>>16;
- register unsigned int xalpha=(xpos&0xFFFF)>>9;
- dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
- dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
- /* slower
- dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
- dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
- */
- xpos+=xInc;
- }
+ c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
#endif /* ARCH_X86 */
}
if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
const int dstFormat= c->dstFormat;
const int srcFormat= c->srcFormat;
const int flags= c->flags;
- const int canMMX2BeUsed= c->canMMX2BeUsed;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *hLumFilterPos= c->hLumFilterPos;
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
- uint8_t *funnyYCode= c->funnyYCode;
- uint8_t *funnyUVCode= c->funnyUVCode;
uint8_t *formatConvBuffer= c->formatConvBuffer;
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
assert(lastInLumBuf + 1 - srcSliceY >= 0);
//printf("%d %d\n", lumBufIndex, vLumBufSize);
RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
- flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
- funnyYCode, c->srcFormat, formatConvBuffer,
- c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0);
+ flags, hLumFilter, hLumFilterPos, hLumFilterSize,
+ c->srcFormat, formatConvBuffer,
+ pal, 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
- flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
- funnyYCode, c->srcFormat, formatConvBuffer,
- c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1);
+ flags, hLumFilter, hLumFilterPos, hLumFilterSize,
+ c->srcFormat, formatConvBuffer,
+ pal, 1);
lastInLumBuf++;
}
while(lastInChrBuf < lastChrSrcY)
if (!(isGray(srcFormat) || isGray(dstFormat)))
RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
- flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
- funnyUVCode, c->srcFormat, formatConvBuffer,
- c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
+ flags, hChrFilter, hChrFilterPos, hChrFilterSize,
+ c->srcFormat, formatConvBuffer,
+ pal);
lastInChrBuf++;
}
//wrap buf index around to stay inside the ring buffer
assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
assert(lastInLumBuf + 1 - srcSliceY >= 0);
RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
- flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
- funnyYCode, c->srcFormat, formatConvBuffer,
- c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0);
+ flags, hLumFilter, hLumFilterPos, hLumFilterSize,
+ c->srcFormat, formatConvBuffer,
+ pal, 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
- flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
- funnyYCode, c->srcFormat, formatConvBuffer,
- c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1);
+ flags, hLumFilter, hLumFilterPos, hLumFilterSize,
+ c->srcFormat, formatConvBuffer,
+ pal, 1);
lastInLumBuf++;
}
while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
if (!(isGray(srcFormat) || isGray(dstFormat)))
RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
- flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
- funnyUVCode, c->srcFormat, formatConvBuffer,
- c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
+ flags, hChrFilter, hChrFilterPos, hChrFilterSize,
+ c->srcFormat, formatConvBuffer,
+ pal);
lastInChrBuf++;
}
//wrap buf index around to stay inside the ring buffer
#endif
if (dstY < dstH-2)
{
- int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
#if HAVE_MMX
int i;
if (flags & SWS_ACCURATE_RND){
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
- RENAME(yuv2nv12X)(c,
+ c->yuv2nv12X(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
int16_t *lumBuf = lumPixBuf[0];
int16_t *chrBuf= chrPixBuf[0];
int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
- RENAME(yuv2yuv1)(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
+ c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
}
else //General YV12
{
- RENAME(yuv2yuvX)(c,
+ c->yuv2yuvX(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}else{
- RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+ c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL,
dest, dstW, chrAlpha, dstFormat, flags, dstY);
}
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}else{
- RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+ c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
dest, dstW, lumAlpha, chrAlpha, dstY);
}
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}else{
- RENAME(yuv2packedX)(c,
+ c->yuv2packedX(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}
else // hmm looks like we can't use MMX here without overwriting this array's tail
{
- int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
#if HAVE_MMX
- __asm__ volatile(SFENCE:::"memory");
- __asm__ volatile(EMMS:::"memory");
+ if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
+ /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
+ if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
+ else __asm__ volatile("emms" :::"memory");
#endif
/* store changed local vars back in the context */
c->dstY= dstY;
return dstY - lastDstY;
}
+
+static void RENAME(sws_init_swScale)(SwsContext *c)
+{
+ enum PixelFormat srcFormat = c->srcFormat;
+
+ c->yuv2nv12X = RENAME(yuv2nv12X );
+ c->yuv2yuv1 = RENAME(yuv2yuv1 );
+ c->yuv2yuvX = RENAME(yuv2yuvX );
+ c->yuv2packed1 = RENAME(yuv2packed1 );
+ c->yuv2packed2 = RENAME(yuv2packed2 );
+ c->yuv2packedX = RENAME(yuv2packedX );
+
+ c->hScale = RENAME(hScale );
+
+ c->hyscale_fast = RENAME(hyscale_fast);
+ c->hcscale_fast = RENAME(hcscale_fast);
+
+ c->hcscale_internal = NULL;
+ switch(srcFormat) {
+ case PIX_FMT_YUYV422 : c->hcscale_internal = RENAME(yuy2ToUV); break;
+ case PIX_FMT_UYVY422 : c->hcscale_internal = RENAME(uyvyToUV); break;
+ case PIX_FMT_RGB8 :
+ case PIX_FMT_BGR8 :
+ case PIX_FMT_PAL8 :
+ case PIX_FMT_BGR4_BYTE:
+ case PIX_FMT_RGB4_BYTE: c->hcscale_internal = palToUV; break;
+ case PIX_FMT_YUV420PBE:
+ case PIX_FMT_YUV422PBE:
+ case PIX_FMT_YUV444PBE: c->hcscale_internal = RENAME(BEToUV); break;
+ case PIX_FMT_YUV420PLE:
+ case PIX_FMT_YUV422PLE:
+ case PIX_FMT_YUV444PLE: c->hcscale_internal = RENAME(LEToUV); break;
+ }
+ if (c->chrSrcHSubSample) {
+ switch(srcFormat) {
+ case PIX_FMT_RGB48BE:
+ case PIX_FMT_RGB48LE: c->hcscale_internal = rgb48ToUV_half; break;
+ case PIX_FMT_RGB32 :
+ case PIX_FMT_RGB32_1: c->hcscale_internal = bgr32ToUV_half; break;
+ case PIX_FMT_BGR24 : c->hcscale_internal = RENAME(bgr24ToUV_half); break;
+ case PIX_FMT_BGR565 : c->hcscale_internal = bgr16ToUV_half; break;
+ case PIX_FMT_BGR555 : c->hcscale_internal = bgr15ToUV_half; break;
+ case PIX_FMT_BGR32 :
+ case PIX_FMT_BGR32_1: c->hcscale_internal = rgb32ToUV_half; break;
+ case PIX_FMT_RGB24 : c->hcscale_internal = RENAME(rgb24ToUV_half); break;
+ case PIX_FMT_RGB565 : c->hcscale_internal = rgb16ToUV_half; break;
+ case PIX_FMT_RGB555 : c->hcscale_internal = rgb15ToUV_half; break;
+ }
+ } else {
+ switch(srcFormat) {
+ case PIX_FMT_RGB48BE:
+ case PIX_FMT_RGB48LE: c->hcscale_internal = rgb48ToUV; break;
+ case PIX_FMT_RGB32 :
+ case PIX_FMT_RGB32_1: c->hcscale_internal = bgr32ToUV; break;
+ case PIX_FMT_BGR24 : c->hcscale_internal = RENAME(bgr24ToUV); break;
+ case PIX_FMT_BGR565 : c->hcscale_internal = bgr16ToUV; break;
+ case PIX_FMT_BGR555 : c->hcscale_internal = bgr15ToUV; break;
+ case PIX_FMT_BGR32 :
+ case PIX_FMT_BGR32_1: c->hcscale_internal = rgb32ToUV; break;
+ case PIX_FMT_RGB24 : c->hcscale_internal = RENAME(rgb24ToUV); break;
+ case PIX_FMT_RGB565 : c->hcscale_internal = rgb16ToUV; break;
+ case PIX_FMT_RGB555 : c->hcscale_internal = rgb15ToUV; break;
+ }
+ }
+
+ c->hyscale_internal = NULL;
+ c->hascale_internal = NULL;
+ switch (srcFormat) {
+ case PIX_FMT_YUYV422 :
+ case PIX_FMT_YUV420PBE:
+ case PIX_FMT_YUV422PBE:
+ case PIX_FMT_YUV444PBE:
+ case PIX_FMT_GRAY16BE : c->hyscale_internal = RENAME(yuy2ToY); break;
+ case PIX_FMT_UYVY422 :
+ case PIX_FMT_YUV420PLE:
+ case PIX_FMT_YUV422PLE:
+ case PIX_FMT_YUV444PLE:
+ case PIX_FMT_GRAY16LE : c->hyscale_internal = RENAME(uyvyToY); break;
+ case PIX_FMT_BGR24 : c->hyscale_internal = RENAME(bgr24ToY); break;
+ case PIX_FMT_BGR565 : c->hyscale_internal = bgr16ToY; break;
+ case PIX_FMT_BGR555 : c->hyscale_internal = bgr15ToY; break;
+ case PIX_FMT_RGB24 : c->hyscale_internal = RENAME(rgb24ToY); break;
+ case PIX_FMT_RGB565 : c->hyscale_internal = rgb16ToY; break;
+ case PIX_FMT_RGB555 : c->hyscale_internal = rgb15ToY; break;
+ case PIX_FMT_RGB8 :
+ case PIX_FMT_BGR8 :
+ case PIX_FMT_PAL8 :
+ case PIX_FMT_BGR4_BYTE:
+ case PIX_FMT_RGB4_BYTE: c->hyscale_internal = palToY; break;
+ case PIX_FMT_MONOBLACK: c->hyscale_internal = monoblack2Y; break;
+ case PIX_FMT_MONOWHITE: c->hyscale_internal = monowhite2Y; break;
+ case PIX_FMT_RGB32 :
+ case PIX_FMT_RGB32_1: c->hyscale_internal = bgr32ToY; break;
+ case PIX_FMT_BGR32 :
+ case PIX_FMT_BGR32_1: c->hyscale_internal = rgb32ToY; break;
+ case PIX_FMT_RGB48BE:
+ case PIX_FMT_RGB48LE: c->hyscale_internal = rgb48ToY; break;
+ }
+ if (c->alpPixBuf) {
+ switch (srcFormat) {
+ case PIX_FMT_RGB32 :
+ case PIX_FMT_RGB32_1:
+ case PIX_FMT_BGR32 :
+ case PIX_FMT_BGR32_1: c->hascale_internal = abgrToA; break;
+ }
+ }
+}