#include <string.h>
#include <math.h>
#include <stdio.h>
-#include <unistd.h>
#include "config.h"
#include <assert.h>
#if HAVE_SYS_MMAN_H
//#define HAVE_AMD3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
-//#define WORDS_BIGENDIAN
#define DITHER1XBPP
#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
|| (x)==PIX_FMT_YUVA420P \
|| (x)==PIX_FMT_YUYV422 \
|| (x)==PIX_FMT_UYVY422 \
+ || (x)==PIX_FMT_RGB48BE \
+ || (x)==PIX_FMT_RGB48LE \
|| (x)==PIX_FMT_RGB32 \
|| (x)==PIX_FMT_RGB32_1 \
|| (x)==PIX_FMT_BGR24 \
|| (x)==PIX_FMT_YUV440P \
|| (x)==PIX_FMT_MONOWHITE \
|| (x)==PIX_FMT_MONOBLACK \
+ || (x)==PIX_FMT_YUV420PLE \
+ || (x)==PIX_FMT_YUV422PLE \
+ || (x)==PIX_FMT_YUV444PLE \
+ || (x)==PIX_FMT_YUV420PBE \
+ || (x)==PIX_FMT_YUV422PBE \
+ || (x)==PIX_FMT_YUV444PBE \
)
#define isSupportedOut(x) ( \
(x)==PIX_FMT_YUV420P \
+ || (x)==PIX_FMT_YUVA420P \
|| (x)==PIX_FMT_YUYV422 \
|| (x)==PIX_FMT_UYVY422 \
|| (x)==PIX_FMT_YUV444P \
|| (x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_YUV410P \
|| (x)==PIX_FMT_YUV440P \
+ || (x)==PIX_FMT_YUV420PLE \
+ || (x)==PIX_FMT_YUV422PLE \
+ || (x)==PIX_FMT_YUV444PLE \
+ || (x)==PIX_FMT_YUV420PBE \
+ || (x)==PIX_FMT_YUV422PBE \
+ || (x)==PIX_FMT_YUV444PBE \
)
#define isPacked(x) ( \
(x)==PIX_FMT_PAL8 \
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
-static const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={
+DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4[2][8])={
{ 1, 3, 1, 3, 1, 3, 1, 3, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
};
-static const uint8_t __attribute__((aligned(8))) dither_2x2_8[2][8]={
+DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8[2][8])={
{ 6, 2, 6, 2, 6, 2, 6, 2, },
{ 0, 4, 0, 4, 0, 4, 0, 4, },
};
-const uint8_t __attribute__((aligned(8))) dither_8x8_32[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32[8][8])={
{ 17, 9, 23, 15, 16, 8, 22, 14, },
{ 5, 29, 3, 27, 4, 28, 2, 26, },
{ 21, 13, 19, 11, 20, 12, 18, 10, },
{ 1, 25, 7, 31, 0, 24, 6, 30, },
};
-#if 0
-const uint8_t __attribute__((aligned(8))) dither_8x8_64[8][8]={
-{ 0, 48, 12, 60, 3, 51, 15, 63, },
-{ 32, 16, 44, 28, 35, 19, 47, 31, },
-{ 8, 56, 4, 52, 11, 59, 7, 55, },
-{ 40, 24, 36, 20, 43, 27, 39, 23, },
-{ 2, 50, 14, 62, 1, 49, 13, 61, },
-{ 34, 18, 46, 30, 33, 17, 45, 29, },
-{ 10, 58, 6, 54, 9, 57, 5, 53, },
-{ 42, 26, 38, 22, 41, 25, 37, 21, },
-};
-#endif
-
-const uint8_t __attribute__((aligned(8))) dither_8x8_73[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73[8][8])={
{ 0, 55, 14, 68, 3, 58, 17, 72, },
{ 37, 18, 50, 32, 40, 22, 54, 35, },
{ 9, 64, 5, 59, 13, 67, 8, 63, },
{ 48, 30, 43, 25, 47, 29, 42, 24, },
};
-#if 0
-const uint8_t __attribute__((aligned(8))) dither_8x8_128[8][8]={
-{ 68, 36, 92, 60, 66, 34, 90, 58, },
-{ 20, 116, 12, 108, 18, 114, 10, 106, },
-{ 84, 52, 76, 44, 82, 50, 74, 42, },
-{ 0, 96, 24, 120, 6, 102, 30, 126, },
-{ 64, 32, 88, 56, 70, 38, 94, 62, },
-{ 16, 112, 8, 104, 22, 118, 14, 110, },
-{ 80, 48, 72, 40, 86, 54, 78, 46, },
-{ 4, 100, 28, 124, 2, 98, 26, 122, },
-};
-#endif
-
#if 1
-const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
{117, 62, 158, 103, 113, 58, 155, 100, },
{ 34, 199, 21, 186, 31, 196, 17, 182, },
{144, 89, 131, 76, 141, 86, 127, 72, },
};
#elif 1
// tries to correct a gamma of 1.5
-const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
{ 0, 143, 18, 200, 2, 156, 25, 215, },
{ 78, 28, 125, 64, 89, 36, 138, 74, },
{ 10, 180, 3, 161, 16, 195, 8, 175, },
};
#elif 1
// tries to correct a gamma of 2.0
-const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
{ 0, 124, 8, 193, 0, 140, 12, 213, },
{ 55, 14, 104, 42, 66, 19, 119, 52, },
{ 3, 168, 1, 145, 6, 187, 3, 162, },
};
#else
// tries to correct a gamma of 2.5
-const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
{ 0, 107, 3, 187, 0, 125, 6, 212, },
{ 39, 7, 86, 28, 49, 11, 102, 36, },
{ 1, 158, 0, 131, 3, 180, 1, 151, },
return "rgb4";
case PIX_FMT_RGB4_BYTE:
return "rgb4 byte";
+ case PIX_FMT_RGB48BE:
+ return "rgb48be";
+ case PIX_FMT_RGB48LE:
+ return "rgb48le";
case PIX_FMT_NV12:
return "nv12";
case PIX_FMT_NV21:
return "vdpau_wmv3";
case PIX_FMT_VDPAU_VC1:
return "vdpau_vc1";
+ case PIX_FMT_YUV420PLE:
+ return "yuv420ple";
+ case PIX_FMT_YUV422PLE:
+ return "yuv422ple";
+ case PIX_FMT_YUV444PLE:
+ return "yuv444ple";
+ case PIX_FMT_YUV420PBE:
+ return "yuv420pbe";
+ case PIX_FMT_YUV422PBE:
+ return "yuv422pbe";
+ case PIX_FMT_YUV444PBE:
+ return "yuv444pbe";
default:
return "Unknown format";
}
}
-static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
+static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
{
//FIXME Optimize (just quickly written not optimized..)
int i;
}
-static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
//FIXME Optimize (just quickly written not optimized..)
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
switch(c->dstFormat)\
{\
+ case PIX_FMT_RGB48BE:\
+ case PIX_FMT_RGB48LE:\
+ func(uint8_t,0)\
+ ((uint8_t*)dest)[ 0]= r[Y1];\
+ ((uint8_t*)dest)[ 1]= r[Y1];\
+ ((uint8_t*)dest)[ 2]= g[Y1];\
+ ((uint8_t*)dest)[ 3]= g[Y1];\
+ ((uint8_t*)dest)[ 4]= b[Y1];\
+ ((uint8_t*)dest)[ 5]= b[Y1];\
+ ((uint8_t*)dest)[ 6]= r[Y2];\
+ ((uint8_t*)dest)[ 7]= r[Y2];\
+ ((uint8_t*)dest)[ 8]= g[Y2];\
+ ((uint8_t*)dest)[ 9]= g[Y2];\
+ ((uint8_t*)dest)[10]= b[Y2];\
+ ((uint8_t*)dest)[11]= b[Y2];\
+ dest+=12;\
+ }\
+ break;\
case PIX_FMT_RGBA:\
case PIX_FMT_BGRA:\
if (CONFIG_SMALL){\
}\
-static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
{
int i;
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
}
-static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
- int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
{
int i;
int step= fmt_depth(c->dstFormat)/8;
}
}
-//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
+static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width)
+{
+ int i;
+ for (i = 0; i < width; i++) {
+ int r = src[i*6+0];
+ int g = src[i*6+2];
+ int b = src[i*6+4];
+
+ dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
+ uint8_t *src1, uint8_t *src2, int width)
+{
+ int i;
+ assert(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r = src1[6*i + 0];
+ int g = src1[6*i + 2];
+ int b = src1[6*i + 4];
+
+ dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
+ uint8_t *src1, uint8_t *src2, int width)
+{
+ int i;
+ assert(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r= src1[12*i + 0] + src1[12*i + 6];
+ int g= src1[12*i + 2] + src1[12*i + 8];
+ int b= src1[12*i + 4] + src1[12*i + 10];
+
+ dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+ dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+ }
+}
+
+#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
+static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
+{\
+ int i;\
+ for (i=0; i<width; i++)\
+ {\
+ int b= (((const type*)src)[i]>>shb)&maskb;\
+ int g= (((const type*)src)[i]>>shg)&maskg;\
+ int r= (((const type*)src)[i]>>shr)&maskr;\
+\
+ dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
+ }\
+}
+
+BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
+BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
+BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
+BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
+BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
+BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
+
+static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused){
+ int i;
+ for (i=0; i<width; i++){
+ dst[i]= src[4*i];
+ }
+}
+
+#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
+static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+{\
+ int i;\
+ for (i=0; i<width; i++)\
+ {\
+ int b= (((const type*)src)[i]&maskb)>>shb;\
+ int g= (((const type*)src)[i]&maskg)>>shg;\
+ int r= (((const type*)src)[i]&maskr)>>shr;\
+\
+ dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
+ dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
+ }\
+}\
+static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+{\
+ int i;\
+ for (i=0; i<width; i++)\
+ {\
+ int pix0= ((const type*)src)[2*i+0];\
+ int pix1= ((const type*)src)[2*i+1];\
+ int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
+ int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
+ int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
+ g&= maskg|(2*maskg);\
+\
+ g>>=shg;\
+\
+ dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
+ dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
+ }\
+}
+
+BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
+BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
+BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
+BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
+BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
+BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
+
+static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
+{
+ int i;
+ for (i=0; i<width; i++)
+ {
+ int d= src[i];
+
+ dst[i]= pal[d] & 0xFF;
+ }
+}
+
+static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src1, const uint8_t *src2,
+ long width, uint32_t *pal)
+{
+ int i;
+ assert(src1 == src2);
+ for (i=0; i<width; i++)
+ {
+ int p= pal[src1[i]];
+
+ dstU[i]= p>>8;
+ dstV[i]= p>>16;
+ }
+}
+
+static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+ int i, j;
+ for (i=0; i<width/8; i++){
+ int d= ~src[i];
+ for(j=0; j<8; j++)
+ dst[8*i+j]= ((d>>(7-j))&1)*255;
+ }
+}
+
+static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+ int i, j;
+ for (i=0; i<width/8; i++){
+ int d= src[i];
+ for(j=0; j<8; j++)
+ dst[8*i+j]= ((d>>(7-j))&1)*255;
+ }
+}
+
+
+//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
//Plain C versions
-#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
+#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_C
#endif
#if ARCH_PPC
-#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
-#undef COMPILE_C
+#if HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT
#define COMPILE_ALTIVEC
#endif
#endif //ARCH_PPC
#if ARCH_X86
-#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
#define COMPILE_MMX
#endif
-#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
#define COMPILE_MMX2
#endif
-#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
#define COMPILE_3DNOW
#endif
#endif //ARCH_X86
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef HAVE_ALTIVEC
-#define HAVE_MMX 0
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
-#define HAVE_ALTIVEC 0
+#define COMPILE_TEMPLATE_MMX 0
+#define COMPILE_TEMPLATE_MMX2 0
+#define COMPILE_TEMPLATE_AMD3DNOW 0
+#define COMPILE_TEMPLATE_ALTIVEC 0
#ifdef COMPILE_C
#define RENAME(a) a ## _C
#ifdef COMPILE_ALTIVEC
#undef RENAME
-#undef HAVE_ALTIVEC
-#define HAVE_ALTIVEC 1
+#undef COMPILE_TEMPLATE_ALTIVEC
+#define COMPILE_TEMPLATE_ALTIVEC 1
#define RENAME(a) a ## _altivec
#include "swscale_template.c"
#endif
#if ARCH_X86
-//x86 versions
-/*
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define ARCH_X86
-#define RENAME(a) a ## _X86
-#include "swscale_template.c"
-*/
//MMX versions
#ifdef COMPILE_MMX
#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
+#undef COMPILE_TEMPLATE_MMX
+#undef COMPILE_TEMPLATE_MMX2
+#undef COMPILE_TEMPLATE_AMD3DNOW
+#define COMPILE_TEMPLATE_MMX 1
+#define COMPILE_TEMPLATE_MMX2 0
+#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
#endif
//MMX2 versions
#ifdef COMPILE_MMX2
#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 1
-#define HAVE_AMD3DNOW 0
+#undef COMPILE_TEMPLATE_MMX
+#undef COMPILE_TEMPLATE_MMX2
+#undef COMPILE_TEMPLATE_AMD3DNOW
+#define COMPILE_TEMPLATE_MMX 1
+#define COMPILE_TEMPLATE_MMX2 1
+#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
#endif
//3DNOW versions
#ifdef COMPILE_3DNOW
#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 1
+#undef COMPILE_TEMPLATE_MMX
+#undef COMPILE_TEMPLATE_MMX2
+#undef COMPILE_TEMPLATE_AMD3DNOW
+#define COMPILE_TEMPLATE_MMX 1
+#define COMPILE_TEMPLATE_MMX2 0
+#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNow
#include "swscale_template.c"
#endif
#endif //ARCH_X86
-// minor note: the HAVE_xyz are messed up after this line so don't use them
-
static double getSplineCoeff(double a, double b, double c, double d, double dist)
{
// printf("%f %f %f %f %f\n", a,b,c,d,dist);
{
int i;
int xDstInSrc;
- if (flags&SWS_BICUBIC) filterSize= 4;
- else if (flags&SWS_X ) filterSize= 4;
- else filterSize= 2; // SWS_BILINEAR / SWS_AREA
+ filterSize= 2;
filter= av_malloc(dstW*sizeof(*filter)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
}
#ifdef COMPILE_MMX2
-static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
+static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits)
{
uint8_t *fragmentA;
x86_reg imm8OfPShufW1A;
int b=((xpos+xInc)>>16) - xx;
int c=((xpos+xInc*2)>>16) - xx;
int d=((xpos+xInc*3)>>16) - xx;
-
+ int inc = (d+1<4);
+ uint8_t *fragment = (d+1<4) ? fragmentB : fragmentA;
+ x86_reg imm8OfPShufW1 = (d+1<4) ? imm8OfPShufW1B : imm8OfPShufW1A;
+ x86_reg imm8OfPShufW2 = (d+1<4) ? imm8OfPShufW2B : imm8OfPShufW2A;
+ x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA;
+ int maxShift= 3-(d+inc);
+ int shift=0;
+
+ if (filterCode) {
filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
filterPos[i/2]= xx;
- if (d+1<4)
- {
- int maxShift= 3-(d+1);
- int shift=0;
-
- memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
+ memcpy(filterCode + fragmentPos, fragment, fragmentLength);
- funnyCode[fragmentPos + imm8OfPShufW1B]=
- (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
- funnyCode[fragmentPos + imm8OfPShufW2B]=
- a | (b<<2) | (c<<4) | (d<<6);
+ filterCode[fragmentPos + imm8OfPShufW1]=
+ (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
+ filterCode[fragmentPos + imm8OfPShufW2]=
+ a | (b<<2) | (c<<4) | (d<<6);
- if (i+3>=dstW) shift=maxShift; //avoid overread
- else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
+ if (i+4-inc>=dstW) shift=maxShift; //avoid overread
+ else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
- if (shift && i>=shift)
- {
- funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
- funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
- filterPos[i/2]-=shift;
- }
-
- fragmentPos+= fragmentLengthB;
- }
- else
+ if (shift && i>=shift)
{
- int maxShift= 3-d;
- int shift=0;
-
- memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
-
- funnyCode[fragmentPos + imm8OfPShufW1A]=
- funnyCode[fragmentPos + imm8OfPShufW2A]=
- a | (b<<2) | (c<<4) | (d<<6);
-
- if (i+4>=dstW) shift=maxShift; //avoid overread
- else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
-
- if (shift && i>=shift)
- {
- funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
- funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
- filterPos[i/2]-=shift;
- }
-
- fragmentPos+= fragmentLengthA;
+ filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+ filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
+ filterPos[i/2]-=shift;
+ }
}
- funnyCode[fragmentPos]= RET;
+ fragmentPos+= fragmentLength;
+
+ if (filterCode)
+ filterCode[fragmentPos]= RET;
}
xpos+=xInc;
}
+ if (filterCode)
filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part
+
+ return fragmentPos + 1;
}
#endif /* COMPILE_MMX2 */
}
}
-static SwsFunc getSwsFunc(int flags){
+static SwsFunc getSwsFunc(SwsContext *c)
+{
+#if CONFIG_RUNTIME_CPUDETECT
+ int flags = c->flags;
-#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
-#if ARCH_X86
+#if ARCH_X86 && CONFIG_GPL
// ordered per speed fastest first
- if (flags & SWS_CPU_CAPS_MMX2)
+ if (flags & SWS_CPU_CAPS_MMX2) {
+ sws_init_swScale_MMX2(c);
return swScale_MMX2;
- else if (flags & SWS_CPU_CAPS_3DNOW)
+ } else if (flags & SWS_CPU_CAPS_3DNOW) {
+ sws_init_swScale_3DNow(c);
return swScale_3DNow;
- else if (flags & SWS_CPU_CAPS_MMX)
+ } else if (flags & SWS_CPU_CAPS_MMX) {
+ sws_init_swScale_MMX(c);
return swScale_MMX;
- else
+ } else {
+ sws_init_swScale_C(c);
return swScale_C;
+ }
#else
#if ARCH_PPC
- if (flags & SWS_CPU_CAPS_ALTIVEC)
+ if (flags & SWS_CPU_CAPS_ALTIVEC) {
+ sws_init_swScale_altivec(c);
return swScale_altivec;
- else
+ } else {
+ sws_init_swScale_C(c);
return swScale_C;
+ }
#endif
+ sws_init_swScale_C(c);
return swScale_C;
-#endif /* ARCH_X86 */
-#else //RUNTIME_CPUDETECT
-#if HAVE_MMX2
+#endif /* ARCH_X86 && CONFIG_GPL */
+#else //CONFIG_RUNTIME_CPUDETECT
+#if COMPILE_TEMPLATE_MMX2
+ sws_init_swScale_MMX2(c);
return swScale_MMX2;
-#elif HAVE_AMD3DNOW
+#elif COMPILE_TEMPLATE_AMD3DNOW
+ sws_init_swScale_3DNow(c);
return swScale_3DNow;
-#elif HAVE_MMX
+#elif COMPILE_TEMPLATE_MMX
+ sws_init_swScale_MMX(c);
return swScale_MMX;
-#elif HAVE_ALTIVEC
+#elif COMPILE_TEMPLATE_ALTIVEC
+ sws_init_swScale_altivec(c);
return swScale_altivec;
#else
+ sws_init_swScale_C(c);
return swScale_C;
#endif
-#endif //!RUNTIME_CPUDETECT
+#endif //!CONFIG_RUNTIME_CPUDETECT
}
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
else
{
int i;
- uint8_t *srcPtr= src[0];
+ const uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst;
for (i=0; i<srcSliceH; i++)
{
yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+ if (dstParam[3])
+ fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
return srcSliceH;
}
uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+ if (dstParam[3])
+ fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
return srcSliceH;
}
dst[2]+(srcSliceY>>1)*dstStride[2],
c->srcW, srcSliceH,
dstStride[0], dstStride[1], srcStride[0]);
+ if (dst[3])
+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
return srcSliceH;
}
}
}
- if (c->dstFormat==PIX_FMT_YUV420P){
- planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
- planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
+ if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P){
+ planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
+ srcSliceH >> 2, srcStride[1], dstStride[1]);
+ planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
+ srcSliceH >> 2, srcStride[2], dstStride[2]);
}else{
- planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
- planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
+ planar2x(src[1], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
+ srcSliceH >> 2, srcStride[1], dstStride[2]);
+ planar2x(src[2], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
+ srcSliceH >> 2, srcStride[2], dstStride[1]);
}
+ if (dst[3])
+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
return srcSliceH;
}
static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])
{
- int plane;
- for (plane=0; plane<3; plane++)
+ int plane, i, j;
+ for (plane=0; plane<4; plane++)
{
- int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
- int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
- int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
-
- if (dst[plane] && !src[plane])
- fillPlane(dst[plane], dstStride[plane], length, height, y, 128);
- else
+ int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
+ int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
+ int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
+ uint8_t *srcPtr= src[plane];
+ uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
+
+ if (!dst[plane]) continue;
+ // ignore palette for GRAY8
+ if (plane == 1 && !dst[2]) continue;
+ if (!src[plane] || (plane == 1 && !src[2])){
+ if(is16BPS(c->dstFormat))
+ length*=2;
+ fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
+ }else
{
- if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
+ if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)){
+ if (!isBE(c->srcFormat)) srcPtr++;
+ for (i=0; i<height; i++){
+ for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
+ srcPtr+= srcStride[plane];
+ dstPtr+= dstStride[plane];
+ }
+ }else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)){
+ for (i=0; i<height; i++){
+ for (j=0; j<length; j++){
+ dstPtr[ j<<1 ] = srcPtr[j];
+ dstPtr[(j<<1)+1] = srcPtr[j];
+ }
+ srcPtr+= srcStride[plane];
+ dstPtr+= dstStride[plane];
+ }
+ }else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
+ && isBE(c->srcFormat) != isBE(c->dstFormat)){
+
+ for (i=0; i<height; i++){
+ for (j=0; j<length; j++)
+ ((uint16_t*)dstPtr)[j] = bswap_16(((uint16_t*)srcPtr)[j]);
+ srcPtr+= srcStride[plane];
+ dstPtr+= dstStride[plane];
+ }
+ } else if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
else
{
- int i;
- uint8_t *srcPtr= src[plane];
- uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
+ if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
+ length*=2;
for (i=0; i<height; i++)
{
memcpy(dstPtr, srcPtr, length);
return srcSliceH;
}
-static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t* dst[], int dstStride[]){
-
- int length= c->srcW;
- int y= srcSliceY;
- int height= srcSliceH;
- int i, j;
- uint8_t *srcPtr= src[0];
- uint8_t *dstPtr= dst[0] + dstStride[0]*y;
-
- if (!isGray(c->dstFormat)){
- int height= -((-srcSliceH)>>c->chrDstVSubSample);
- memset(dst[1], 128, dstStride[1]*height);
- memset(dst[2], 128, dstStride[2]*height);
- }
- if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
- for (i=0; i<height; i++)
- {
- for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
- srcPtr+= srcStride[0];
- dstPtr+= dstStride[0];
- }
- return srcSliceH;
-}
-
-static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t* dst[], int dstStride[]){
-
- int length= c->srcW;
- int y= srcSliceY;
- int height= srcSliceH;
- int i, j;
- uint8_t *srcPtr= src[0];
- uint8_t *dstPtr= dst[0] + dstStride[0]*y;
- for (i=0; i<height; i++)
- {
- for (j=0; j<length; j++)
- {
- dstPtr[j<<1] = srcPtr[j];
- dstPtr[(j<<1)+1] = srcPtr[j];
- }
- srcPtr+= srcStride[0];
- dstPtr+= dstStride[0];
- }
- return srcSliceH;
-}
-
-static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t* dst[], int dstStride[]){
-
- int length= c->srcW;
- int y= srcSliceY;
- int height= srcSliceH;
- int i, j;
- uint16_t *srcPtr= (uint16_t*)src[0];
- uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
- for (i=0; i<height; i++)
- {
- for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
- srcPtr+= srcStride[0]/2;
- dstPtr+= dstStride[0]/2;
- }
- return srcSliceH;
-}
-
static void getSubSampleFactors(int *h, int *v, int format){
switch(format){
*v=0;
break;
case PIX_FMT_YUV420P:
+ case PIX_FMT_YUV420PLE:
+ case PIX_FMT_YUV420PBE:
case PIX_FMT_YUVA420P:
case PIX_FMT_GRAY16BE:
case PIX_FMT_GRAY16LE:
*v=2;
break;
case PIX_FMT_YUV444P:
+ case PIX_FMT_YUV444PLE:
+ case PIX_FMT_YUV444PBE:
*h=0;
*v=0;
break;
case PIX_FMT_YUV422P:
+ case PIX_FMT_YUV422PLE:
+ case PIX_FMT_YUV422PBE:
*h=1;
*v=0;
break;
else return r;
}
-/**
- * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
- * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
- * @return -1 if not supported
- */
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
int64_t crv = inv_table[0];
int64_t cbu = inv_table[1];
c->saturation= saturation;
c->srcRange = srcRange;
c->dstRange = dstRange;
- if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
+ if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
c->uOffset= 0x0400040004000400LL;
c->vOffset= 0x0400040004000400LL;
return 0;
}
-/**
- * @return -1 if not supported
- */
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
}
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
- SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
+ SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
+{
SwsContext *c;
int i;
__asm__ volatile("emms\n\t"::: "memory");
#endif
-#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
+#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
-#if HAVE_MMX2
+#if COMPILE_TEMPLATE_MMX2
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
-#elif HAVE_AMD3DNOW
+#elif COMPILE_TEMPLATE_AMD3DNOW
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
-#elif HAVE_MMX
+#elif COMPILE_TEMPLATE_MMX
flags |= SWS_CPU_CAPS_MMX;
-#elif HAVE_ALTIVEC
+#elif COMPILE_TEMPLATE_ALTIVEC
flags |= SWS_CPU_CAPS_ALTIVEC;
#elif ARCH_BFIN
flags |= SWS_CPU_CAPS_BFIN;
#endif
-#endif /* RUNTIME_CPUDETECT */
+#endif /* CONFIG_RUNTIME_CPUDETECT */
if (clip_table[512] != 255) globalInit();
if (!rgb15to16) sws_rgb2rgb_init(flags);
c->param[1] = SWS_PARAM_DEFAULT;
}
- c->chrIntHSubSample= c->chrDstHSubSample;
- c->chrIntVSubSample= c->chrSrcVSubSample;
-
// Note the -((-x)>>y) is so that we always round toward +inf.
c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
c->swScale= ff_yuv2rgb_get_func_ptr(c);
}
- if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_BITEXACT))
+ if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT))
{
c->swScale= yvu9toyv12Wrapper;
}
/* bgr24toYV12 */
- if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
+ if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
c->swScale= bgr24toyv12Wrapper;
/* RGB/BGR -> RGB/BGR (no dither needed forms) */
&& srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
&& dstFormat != PIX_FMT_RGB32_1
&& dstFormat != PIX_FMT_BGR32_1
+ && srcFormat != PIX_FMT_RGB48LE && dstFormat != PIX_FMT_RGB48LE
+ && srcFormat != PIX_FMT_RGB48BE && dstFormat != PIX_FMT_RGB48BE
&& (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
c->swScale= rgb2rgbWrapper;
else if (dstFormat == PIX_FMT_UYVY422)
c->swScale= PlanarToUyvyWrapper;
}
-
- if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV420P)
- c->swScale= YUYV2YUV420Wrapper;
- if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV420P)
- c->swScale= UYVY2YUV420Wrapper;
}
+ if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
+ c->swScale= YUYV2YUV420Wrapper;
+ if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
+ c->swScale= UYVY2YUV420Wrapper;
if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
c->swScale= YUYV2YUV422Wrapper;
if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
/* simple copy */
if ( srcFormat == dstFormat
|| (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
+ || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
|| (isPlanarYUV(srcFormat) && isGray(dstFormat))
- || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
+ || (isPlanarYUV(dstFormat) && isGray(srcFormat))
+ || (isGray(dstFormat) && isGray(srcFormat))
+ || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat)
+ && c->chrDstHSubSample == c->chrSrcHSubSample
+ && c->chrDstVSubSample == c->chrSrcVSubSample
+ && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21
+ && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21))
{
if (isPacked(c->srcFormat))
c->swScale= packedCopy;
else /* Planar YUV or gray */
c->swScale= planarCopy;
}
-
- /* gray16{le,be} conversions */
- if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
- {
- c->swScale= gray16togray;
- }
- if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
- {
- c->swScale= graytogray16;
- }
- if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
- {
- c->swScale= gray16swap;
- }
-
#if ARCH_BFIN
if (flags & SWS_CPU_CAPS_BFIN)
ff_bfin_get_unscaled_swscale (c);
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
srcFilter->chrH, dstFilter->chrH, c->param);
-#define MAX_FUNNY_CODE_SIZE 10000
#if defined(COMPILE_MMX2)
// can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
{
+ c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8);
+ c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, NULL, NULL, NULL, 4);
+
#ifdef MAP_ANONYMOUS
- c->funnyYCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
- c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ c->lumMmx2FilterCode = mmap(NULL, c->lumMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ c->chrMmx2FilterCode = mmap(NULL, c->chrMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
#elif HAVE_VIRTUALALLOC
- c->funnyYCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
- c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ c->lumMmx2FilterCode = VirtualAlloc(NULL, c->lumMmx2FilterCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ c->chrMmx2FilterCode = VirtualAlloc(NULL, c->chrMmx2FilterCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
- c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
- c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
+ c->lumMmx2FilterCode = av_malloc(c->lumMmx2FilterCodeSize);
+ c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize);
#endif
c->lumMmx2Filter = av_malloc((dstW /8+8)*sizeof(int16_t));
c->lumMmx2FilterPos= av_malloc((dstW /2/8+8)*sizeof(int32_t));
c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
- initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
- initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
+ initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
+ initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
+
+#ifdef MAP_ANONYMOUS
+ mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
+ mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
+#endif
}
#endif /* defined(COMPILE_MMX2) */
} // initialize horizontal stuff
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
srcFilter->chrV, dstFilter->chrV, c->param);
-#if HAVE_ALTIVEC
+#ifdef COMPILE_ALTIVEC
c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
}
- c->swScale= getSwsFunc(flags);
+ c->swScale= getSwsFunc(c);
return c;
}
+static void reset_ptr(uint8_t* src[], int format){
+ if(!isALPHA(format))
+ src[3]=NULL;
+ if(!isPlanarYUV(format)){
+ src[3]=src[2]=NULL;
+ if( format != PIX_FMT_PAL8
+ && format != PIX_FMT_RGB8
+ && format != PIX_FMT_BGR8
+ && format != PIX_FMT_RGB4_BYTE
+ && format != PIX_FMT_BGR4_BYTE
+ )
+ src[1]= NULL;
+ }
+}
+
/**
* swscale wrapper, so we don't need to export the SwsContext.
* Assumes planar YUV to be in YUV order instead of YVU.
int srcSliceH, uint8_t* dst[], int dstStride[]){
int i;
uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
+ uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
switch(c->dstFormat) {
case PIX_FMT_BGR32:
-#ifndef WORDS_BIGENDIAN
+#if !HAVE_BIGENDIAN
case PIX_FMT_RGB24:
#endif
c->pal_rgb[i]= r + (g<<8) + (b<<16);
break;
case PIX_FMT_BGR32_1:
-#ifdef WORDS_BIGENDIAN
+#if HAVE_BIGENDIAN
case PIX_FMT_BGR24:
#endif
c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
break;
case PIX_FMT_RGB32_1:
-#ifdef WORDS_BIGENDIAN
+#if HAVE_BIGENDIAN
case PIX_FMT_RGB24:
#endif
c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
break;
case PIX_FMT_RGB32:
-#ifndef WORDS_BIGENDIAN
+#if !HAVE_BIGENDIAN
case PIX_FMT_BGR24:
#endif
default:
// slices go from top to bottom
int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
- return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
+
+ reset_ptr(src2, c->srcFormat);
+ reset_ptr(dst2, c->dstFormat);
+
+ return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2);
} else {
// slices go from bottom to top => we flip the image internally
- uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
- dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
- dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2],
- dst[3] + (c->dstH-1)*dstStride[3]};
int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
src2[3] += (srcSliceH-1)*srcStride[3];
+ dst2[0] += ( c->dstH -1)*dstStride[0];
+ dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1];
+ dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2];
+ dst2[3] += ( c->dstH -1)*dstStride[3];
+
+ reset_ptr(src2, c->srcFormat);
+ reset_ptr(dst2, c->dstFormat);
return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
}
av_freep(&c->vChrFilter);
av_freep(&c->hLumFilter);
av_freep(&c->hChrFilter);
-#if HAVE_ALTIVEC
+#ifdef COMPILE_ALTIVEC
av_freep(&c->vYCoeffsBank);
av_freep(&c->vCCoeffsBank);
#endif
#if ARCH_X86 && CONFIG_GPL
#ifdef MAP_ANONYMOUS
- if (c->funnyYCode ) munmap(c->funnyYCode , MAX_FUNNY_CODE_SIZE);
- if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
+ if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize);
+ if (c->chrMmx2FilterCode) munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize);
#elif HAVE_VIRTUALALLOC
- if (c->funnyYCode ) VirtualFree(c->funnyYCode , MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
- if (c->funnyUVCode) VirtualFree(c->funnyUVCode, MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
+ if (c->lumMmx2FilterCode) VirtualFree(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, MEM_RELEASE);
+ if (c->chrMmx2FilterCode) VirtualFree(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, MEM_RELEASE);
#else
- av_free(c->funnyYCode );
- av_free(c->funnyUVCode);
+ av_free(c->lumMmx2FilterCode);
+ av_free(c->chrMmx2FilterCode);
#endif
- c->funnyYCode=NULL;
- c->funnyUVCode=NULL;
+ c->lumMmx2FilterCode=NULL;
+ c->chrMmx2FilterCode=NULL;
#endif /* ARCH_X86 && CONFIG_GPL */
av_freep(&c->lumMmx2Filter);
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
int srcW, int srcH, enum PixelFormat srcFormat,
int dstW, int dstH, enum PixelFormat dstFormat, int flags,
- SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
+ SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
{
static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};