YVU9 -> YV12
untested special converters
- YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
+ YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
YV12/I420 -> YV12/I420
YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
BGR24 -> BGR32 & RGB24 -> RGB32
#include <unistd.h>
#include "config.h"
#include <assert.h>
-#ifdef HAVE_SYS_MMAN_H
+#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
+#if HAVE_VIRTUALALLOC
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
#include "swscale.h"
#include "swscale_internal.h"
#include "rgb2rgb.h"
#undef PAVGB
//#undef HAVE_MMX2
-//#define HAVE_3DNOW
+//#define HAVE_AMD3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
//#define WORDS_BIGENDIAN
#define DITHER1XBPP
-#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
+#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
-#define RET 0xC3 //near return opcode for X86
+#define RET 0xC3 //near return opcode for x86
#ifdef M_PI
#define PI M_PI
|| (x)==PIX_FMT_RGB555 \
|| (x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_YUV410P \
+ || (x)==PIX_FMT_YUV440P \
|| (x)==PIX_FMT_GRAY16BE \
|| (x)==PIX_FMT_GRAY16LE \
|| (x)==PIX_FMT_YUV444P \
|| (x)==PIX_FMT_BGR4_BYTE \
|| (x)==PIX_FMT_RGB4_BYTE \
|| (x)==PIX_FMT_YUV440P \
+ || (x)==PIX_FMT_MONOWHITE \
+ || (x)==PIX_FMT_MONOBLACK \
)
#define isSupportedOut(x) ( \
(x)==PIX_FMT_YUV420P \
+ || (x)==PIX_FMT_YUVA420P \
|| (x)==PIX_FMT_YUYV422 \
|| (x)==PIX_FMT_UYVY422 \
|| (x)==PIX_FMT_YUV444P \
|| (x)==PIX_FMT_GRAY16LE \
|| (x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_YUV410P \
+ || (x)==PIX_FMT_YUV440P \
)
#define isPacked(x) ( \
(x)==PIX_FMT_PAL8 \
|| isRGB(x) \
|| isBGR(x) \
)
+#define usePal(x) ( \
+ (x)==PIX_FMT_PAL8 \
+ || (x)==PIX_FMT_BGR4_BYTE \
+ || (x)==PIX_FMT_RGB4_BYTE \
+ || (x)==PIX_FMT_BGR8 \
+ || (x)==PIX_FMT_RGB8 \
+ )
-#define RGB2YUV_SHIFT 16
-#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
-#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
-#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
-#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
-#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
-#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
-#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
+#define RGB2YUV_SHIFT 15
+#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-extern const int32_t Inverse_Table_6_9[8][4];
+extern const int32_t ff_yuv2rgb_coeffs[8][4];
static const double rgb2yuv_table[8][9]={
{0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
TODO
more intelligent misalignment avoidance for the horizontal scaler
write special vertical cubic upscale version
-Optimize C code (yv12 / minmax)
-add support for packed pixel yuv input & output
+optimize C code (YV12 / minmax)
+add support for packed pixel YUV input & output
add support for Y8 output
-optimize bgr24 & bgr32
+optimize BGR24 & BGR32
add BGR4 output support
write special BGR->BGR scaler
*/
-#if defined(ARCH_X86) && defined (CONFIG_GPL)
+#if ARCH_X86 && CONFIG_GPL
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
-static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
-static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
-static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
-static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
-
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
0x0103010301030103LL,
0x0200020002000200LL,};
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUV[2][4]) = {
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
{0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
{0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
};
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
-#endif /* defined(ARCH_X86) */
+#endif /* ARCH_X86 && CONFIG_GPL */
// clipping helper table for C implementations:
static unsigned char clip_table[768];
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
-extern const uint8_t dither_2x2_4[2][8];
-extern const uint8_t dither_2x2_8[2][8];
-extern const uint8_t dither_8x8_32[8][8];
-extern const uint8_t dither_8x8_73[8][8];
-extern const uint8_t dither_8x8_220[8][8];
+static const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={
+{ 1, 3, 1, 3, 1, 3, 1, 3, },
+{ 2, 0, 2, 0, 2, 0, 2, 0, },
+};
+
+static const uint8_t __attribute__((aligned(8))) dither_2x2_8[2][8]={
+{ 6, 2, 6, 2, 6, 2, 6, 2, },
+{ 0, 4, 0, 4, 0, 4, 0, 4, },
+};
+
+const uint8_t __attribute__((aligned(8))) dither_8x8_32[8][8]={
+{ 17, 9, 23, 15, 16, 8, 22, 14, },
+{ 5, 29, 3, 27, 4, 28, 2, 26, },
+{ 21, 13, 19, 11, 20, 12, 18, 10, },
+{ 0, 24, 6, 30, 1, 25, 7, 31, },
+{ 16, 8, 22, 14, 17, 9, 23, 15, },
+{ 4, 28, 2, 26, 5, 29, 3, 27, },
+{ 20, 12, 18, 10, 21, 13, 19, 11, },
+{ 1, 25, 7, 31, 0, 24, 6, 30, },
+};
+
+#if 0
+const uint8_t __attribute__((aligned(8))) dither_8x8_64[8][8]={
+{ 0, 48, 12, 60, 3, 51, 15, 63, },
+{ 32, 16, 44, 28, 35, 19, 47, 31, },
+{ 8, 56, 4, 52, 11, 59, 7, 55, },
+{ 40, 24, 36, 20, 43, 27, 39, 23, },
+{ 2, 50, 14, 62, 1, 49, 13, 61, },
+{ 34, 18, 46, 30, 33, 17, 45, 29, },
+{ 10, 58, 6, 54, 9, 57, 5, 53, },
+{ 42, 26, 38, 22, 41, 25, 37, 21, },
+};
+#endif
+
+const uint8_t __attribute__((aligned(8))) dither_8x8_73[8][8]={
+{ 0, 55, 14, 68, 3, 58, 17, 72, },
+{ 37, 18, 50, 32, 40, 22, 54, 35, },
+{ 9, 64, 5, 59, 13, 67, 8, 63, },
+{ 46, 27, 41, 23, 49, 31, 44, 26, },
+{ 2, 57, 16, 71, 1, 56, 15, 70, },
+{ 39, 21, 52, 34, 38, 19, 51, 33, },
+{ 11, 66, 7, 62, 10, 65, 6, 60, },
+{ 48, 30, 43, 25, 47, 29, 42, 24, },
+};
+
+#if 0
+const uint8_t __attribute__((aligned(8))) dither_8x8_128[8][8]={
+{ 68, 36, 92, 60, 66, 34, 90, 58, },
+{ 20, 116, 12, 108, 18, 114, 10, 106, },
+{ 84, 52, 76, 44, 82, 50, 74, 42, },
+{ 0, 96, 24, 120, 6, 102, 30, 126, },
+{ 64, 32, 88, 56, 70, 38, 94, 62, },
+{ 16, 112, 8, 104, 22, 118, 14, 110, },
+{ 80, 48, 72, 40, 86, 54, 78, 46, },
+{ 4, 100, 28, 124, 2, 98, 26, 122, },
+};
+#endif
+
+#if 1
+const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{117, 62, 158, 103, 113, 58, 155, 100, },
+{ 34, 199, 21, 186, 31, 196, 17, 182, },
+{144, 89, 131, 76, 141, 86, 127, 72, },
+{ 0, 165, 41, 206, 10, 175, 52, 217, },
+{110, 55, 151, 96, 120, 65, 162, 107, },
+{ 28, 193, 14, 179, 38, 203, 24, 189, },
+{138, 83, 124, 69, 148, 93, 134, 79, },
+{ 7, 172, 48, 213, 3, 168, 45, 210, },
+};
+#elif 1
+// tries to correct a gamma of 1.5
+const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{ 0, 143, 18, 200, 2, 156, 25, 215, },
+{ 78, 28, 125, 64, 89, 36, 138, 74, },
+{ 10, 180, 3, 161, 16, 195, 8, 175, },
+{109, 51, 93, 38, 121, 60, 105, 47, },
+{ 1, 152, 23, 210, 0, 147, 20, 205, },
+{ 85, 33, 134, 71, 81, 30, 130, 67, },
+{ 14, 190, 6, 171, 12, 185, 5, 166, },
+{117, 57, 101, 44, 113, 54, 97, 41, },
+};
+#elif 1
+// tries to correct a gamma of 2.0
+const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{ 0, 124, 8, 193, 0, 140, 12, 213, },
+{ 55, 14, 104, 42, 66, 19, 119, 52, },
+{ 3, 168, 1, 145, 6, 187, 3, 162, },
+{ 86, 31, 70, 21, 99, 39, 82, 28, },
+{ 0, 134, 11, 206, 0, 129, 9, 200, },
+{ 62, 17, 114, 48, 58, 16, 109, 45, },
+{ 5, 181, 2, 157, 4, 175, 1, 151, },
+{ 95, 36, 78, 26, 90, 34, 74, 24, },
+};
+#else
+// tries to correct a gamma of 2.5
+const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{ 0, 107, 3, 187, 0, 125, 6, 212, },
+{ 39, 7, 86, 28, 49, 11, 102, 36, },
+{ 1, 158, 0, 131, 3, 180, 1, 151, },
+{ 68, 19, 52, 12, 81, 25, 64, 17, },
+{ 0, 119, 5, 203, 0, 113, 4, 195, },
+{ 45, 9, 96, 33, 42, 8, 91, 30, },
+{ 2, 172, 1, 144, 2, 165, 0, 137, },
+{ 77, 23, 60, 15, 72, 21, 56, 14, },
+};
+#endif
const char *sws_format_name(enum PixelFormat format)
{
return "nv21";
case PIX_FMT_YUV440P:
return "yuv440p";
+ case PIX_FMT_VDPAU_H264:
+ return "vdpau_h264";
+ case PIX_FMT_VDPAU_MPEG1:
+ return "vdpau_mpeg1";
+ case PIX_FMT_VDPAU_MPEG2:
+ return "vdpau_mpeg2";
+ case PIX_FMT_VDPAU_WMV3:
+ return "vdpau_wmv3";
+ case PIX_FMT_VDPAU_VC1:
+ return "vdpau_vc1";
default:
return "Unknown format";
}
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+ int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
{
- //FIXME Optimize (just quickly writen not opti..)
+ //FIXME Optimize (just quickly written not optimized..)
int i;
for (i=0; i<dstW; i++)
{
uDest[i]= av_clip_uint8(u>>19);
vDest[i]= av_clip_uint8(v>>19);
}
+
+ if (CONFIG_SWSCALE_ALPHA && aDest)
+ for (i=0; i<dstW; i++){
+ int val=1<<18;
+ int j;
+ for (j=0; j<lumFilterSize; j++)
+ val += alpSrc[j][i] * lumFilter[j];
+
+ aDest[i]= av_clip_uint8(val>>19);
+ }
+
}
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
- //FIXME Optimize (just quickly writen not opti..)
+ //FIXME Optimize (just quickly written not optimized..)
int i;
for (i=0; i<dstW; i++)
{
}
}
-#define YSCALE_YUV_2_PACKEDX_C(type) \
+#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){\
int j;\
int Y1 = 1<<18;\
int Y2 = 1<<18;\
int U = 1<<18;\
int V = 1<<18;\
+ int av_unused A1, A2;\
type av_unused *r, *b, *g;\
const int i2= 2*i;\
\
Y2>>=19;\
U >>=19;\
V >>=19;\
+ if (alpha){\
+ A1 = 1<<18;\
+ A2 = 1<<18;\
+ for (j=0; j<lumFilterSize; j++){\
+ A1 += alpSrc[j][i2 ] * lumFilter[j];\
+ A2 += alpSrc[j][i2+1] * lumFilter[j];\
+ }\
+ A1>>=19;\
+ A2>>=19;\
+ }\
+
+#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
+ YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
if ((Y1|Y2|U|V)&256)\
{\
if (Y1>255) Y1=255; \
else if (U<0) U=0; \
if (V>255) V=255; \
else if (V<0) V=0; \
+ }\
+ if (alpha && ((A1|A2)&256)){\
+ A1=av_clip_uint8(A1);\
+ A2=av_clip_uint8(A2);\
}
-#define YSCALE_YUV_2_GRAY16_C(type) \
+#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
+ for (i=0; i<dstW; i++){\
+ int j;\
+ int Y = 0;\
+ int U = -128<<19;\
+ int V = -128<<19;\
+ int av_unused A;\
+ int R,G,B;\
+ \
+ for (j=0; j<lumFilterSize; j++){\
+ Y += lumSrc[j][i ] * lumFilter[j];\
+ }\
+ for (j=0; j<chrFilterSize; j++){\
+ U += chrSrc[j][i ] * chrFilter[j];\
+ V += chrSrc[j][i+VOFW] * chrFilter[j];\
+ }\
+ Y >>=10;\
+ U >>=10;\
+ V >>=10;\
+ if (alpha){\
+ A = rnd;\
+ for (j=0; j<lumFilterSize; j++)\
+ A += alpSrc[j][i ] * lumFilter[j];\
+ A >>=19;\
+ if (A&256)\
+ A = av_clip_uint8(A);\
+ }\
+
+#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
+ YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
+ Y-= c->yuv2rgb_y_offset;\
+ Y*= c->yuv2rgb_y_coeff;\
+ Y+= rnd;\
+ R= Y + V*c->yuv2rgb_v2r_coeff;\
+ G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
+ B= Y + U*c->yuv2rgb_u2b_coeff;\
+ if ((R|G|B)&(0xC0000000)){\
+ if (R>=(256<<22)) R=(256<<22)-1; \
+ else if (R<0)R=0; \
+ if (G>=(256<<22)) G=(256<<22)-1; \
+ else if (G<0)G=0; \
+ if (B>=(256<<22)) B=(256<<22)-1; \
+ else if (B<0)B=0; \
+ }\
+
+
+#define YSCALE_YUV_2_GRAY16_C \
for (i=0; i<(dstW>>1); i++){\
int j;\
int Y1 = 1<<18;\
int Y2 = 1<<18;\
int U = 1<<18;\
int V = 1<<18;\
- type av_unused *r, *b, *g;\
+ \
const int i2= 2*i;\
\
for (j=0; j<lumFilterSize; j++)\
else if (Y2<0)Y2=0; \
}
-#define YSCALE_YUV_2_RGBX_C(type) \
- YSCALE_YUV_2_PACKEDX_C(type) \
+#define YSCALE_YUV_2_RGBX_C(type,alpha) \
+ YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
r = (type *)c->table_rV[V]; \
g = (type *)(c->table_gU[U] + c->table_gV[V]); \
b = (type *)c->table_bU[U]; \
-#define YSCALE_YUV_2_PACKED2_C \
+#define YSCALE_YUV_2_PACKED2_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){ \
const int i2= 2*i; \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \
int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \
+ type av_unused *r, *b, *g; \
+ int av_unused A1, A2; \
+ if (alpha){\
+ A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
+ A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
+ }\
#define YSCALE_YUV_2_GRAY16_2_C \
for (i=0; i<(dstW>>1); i++){ \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; \
-#define YSCALE_YUV_2_RGB2_C(type) \
- YSCALE_YUV_2_PACKED2_C\
- type *r, *b, *g;\
+#define YSCALE_YUV_2_RGB2_C(type,alpha) \
+ YSCALE_YUV_2_PACKED2_C(type,alpha)\
r = (type *)c->table_rV[V];\
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
b = (type *)c->table_bU[U];\
-#define YSCALE_YUV_2_PACKED1_C \
+#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf1[i ])>>7;\
int V= (uvbuf1[i+VOFW])>>7;\
+ type av_unused *r, *b, *g;\
+ int av_unused A1, A2;\
+ if (alpha){\
+ A1= abuf0[i2 ]>>7;\
+ A2= abuf0[i2+1]>>7;\
+ }\
#define YSCALE_YUV_2_GRAY16_1_C \
for (i=0; i<(dstW>>1); i++){\
int Y1= buf0[i2 ]<<1;\
int Y2= buf0[i2+1]<<1;\
-#define YSCALE_YUV_2_RGB1_C(type) \
- YSCALE_YUV_2_PACKED1_C\
- type *r, *b, *g;\
+#define YSCALE_YUV_2_RGB1_C(type,alpha) \
+ YSCALE_YUV_2_PACKED1_C(type,alpha)\
r = (type *)c->table_rV[V];\
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
b = (type *)c->table_bU[U];\
-#define YSCALE_YUV_2_PACKED1B_C \
+#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
for (i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
+ type av_unused *r, *b, *g;\
+ int av_unused A1, A2;\
+ if (alpha){\
+ A1= abuf0[i2 ]>>7;\
+ A2= abuf0[i2+1]>>7;\
+ }\
-#define YSCALE_YUV_2_RGB1B_C(type) \
- YSCALE_YUV_2_PACKED1B_C\
- type *r, *b, *g;\
+#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
+ YSCALE_YUV_2_PACKED1B_C(type,alpha)\
r = (type *)c->table_rV[V];\
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
b = (type *)c->table_bU[U];\
-#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16)\
+#define YSCALE_YUV_2_MONO2_C \
+ const uint8_t * const d128=dither_8x8_220[y&7];\
+ uint8_t *g= c->table_gU[128] + c->table_gV[128];\
+ for (i=0; i<dstW-7; i+=8){\
+ int acc;\
+ acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
+ acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
+ acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
+ acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
+ acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
+ acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
+ acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
+ acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
+ ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
+ dest++;\
+ }\
+
+
+#define YSCALE_YUV_2_MONOX_C \
+ const uint8_t * const d128=dither_8x8_220[y&7];\
+ uint8_t *g= c->table_gU[128] + c->table_gV[128];\
+ int acc=0;\
+ for (i=0; i<dstW-1; i+=2){\
+ int j;\
+ int Y1=1<<18;\
+ int Y2=1<<18;\
+\
+ for (j=0; j<lumFilterSize; j++)\
+ {\
+ Y1 += lumSrc[j][i] * lumFilter[j];\
+ Y2 += lumSrc[j][i+1] * lumFilter[j];\
+ }\
+ Y1>>=19;\
+ Y2>>=19;\
+ if ((Y1|Y2)&256)\
+ {\
+ if (Y1>255) Y1=255;\
+ else if (Y1<0)Y1=0;\
+ if (Y2>255) Y2=255;\
+ else if (Y2<0)Y2=0;\
+ }\
+ acc+= acc + g[Y1+d128[(i+0)&7]];\
+ acc+= acc + g[Y2+d128[(i+1)&7]];\
+ if ((i&7)==6){\
+ ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
+ dest++;\
+ }\
+ }
+
+
+#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
switch(c->dstFormat)\
{\
- case PIX_FMT_RGB32:\
- case PIX_FMT_BGR32:\
- case PIX_FMT_RGB32_1:\
- case PIX_FMT_BGR32_1:\
- func(uint32_t)\
- ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
- ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
+ case PIX_FMT_RGBA:\
+ case PIX_FMT_BGRA:\
+ if (CONFIG_SMALL){\
+ int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
+ func(uint32_t,needAlpha)\
+ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
+ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
+ }\
+ }else{\
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
+ func(uint32_t,1)\
+ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
+ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
+ }\
+ }else{\
+ func(uint32_t,0)\
+ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
+ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
+ }\
+ }\
+ }\
+ break;\
+ case PIX_FMT_ARGB:\
+ case PIX_FMT_ABGR:\
+ if (CONFIG_SMALL){\
+ int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
+ func(uint32_t,needAlpha)\
+ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
+ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
+ }\
+ }else{\
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
+ func(uint32_t,1)\
+ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
+ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
+ }\
+ }else{\
+ func(uint32_t,0)\
+ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
+ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
+ }\
+ }\
} \
break;\
case PIX_FMT_RGB24:\
- func(uint8_t)\
+ func(uint8_t,0)\
((uint8_t*)dest)[0]= r[Y1];\
((uint8_t*)dest)[1]= g[Y1];\
((uint8_t*)dest)[2]= b[Y1];\
}\
break;\
case PIX_FMT_BGR24:\
- func(uint8_t)\
+ func(uint8_t,0)\
((uint8_t*)dest)[0]= b[Y1];\
((uint8_t*)dest)[1]= g[Y1];\
((uint8_t*)dest)[2]= r[Y1];\
const int dr2= dither_2x2_8[y&1 ][1];\
const int dg2= dither_2x2_4[y&1 ][1];\
const int db2= dither_2x2_8[(y&1)^1][1];\
- func(uint16_t)\
+ func(uint16_t,0)\
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
}\
const int dr2= dither_2x2_8[y&1 ][1];\
const int dg2= dither_2x2_8[y&1 ][0];\
const int db2= dither_2x2_8[(y&1)^1][1];\
- func(uint16_t)\
+ func(uint16_t,0)\
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
}\
{\
const uint8_t * const d64= dither_8x8_73[y&7];\
const uint8_t * const d32= dither_8x8_32[y&7];\
- func(uint8_t)\
+ func(uint8_t,0)\
((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
}\
{\
const uint8_t * const d64= dither_8x8_73 [y&7];\
const uint8_t * const d128=dither_8x8_220[y&7];\
- func(uint8_t)\
+ func(uint8_t,0)\
((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
+ ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
}\
{\
const uint8_t * const d64= dither_8x8_73 [y&7];\
const uint8_t * const d128=dither_8x8_220[y&7];\
- func(uint8_t)\
+ func(uint8_t,0)\
((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
}\
}\
break;\
case PIX_FMT_MONOBLACK:\
+ case PIX_FMT_MONOWHITE:\
{\
- const uint8_t * const d128=dither_8x8_220[y&7];\
- uint8_t *g= c->table_gU[128] + c->table_gV[128];\
- for (i=0; i<dstW-7; i+=8){\
- int acc;\
- acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
- acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
- acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
- acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
- acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
- acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
- acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
- acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
- ((uint8_t*)dest)[0]= acc;\
- dest++;\
- }\
-\
-/*\
-((uint8_t*)dest)-= dstW>>4;\
-{\
- int acc=0;\
- int left=0;\
- static int top[1024];\
- static int last_new[1024][1024];\
- static int last_in3[1024][1024];\
- static int drift[1024][1024];\
- int topLeft=0;\
- int shift=0;\
- int count=0;\
- const uint8_t * const d128=dither_8x8_220[y&7];\
- int error_new=0;\
- int error_in3=0;\
- int f=0;\
- \
- for (i=dstW>>1; i<dstW; i++){\
- int in= ((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19);\
- int in2 = (76309 * (in - 16) + 32768) >> 16;\
- int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
- int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
- + (last_new[y][i] - in3)*f/256;\
- int new= old> 128 ? 255 : 0;\
-\
- error_new+= FFABS(last_new[y][i] - new);\
- error_in3+= FFABS(last_in3[y][i] - in3);\
- f= error_new - error_in3*4;\
- if (f<0) f=0;\
- if (f>256) f=256;\
-\
- topLeft= top[i];\
- left= top[i]= old - new;\
- last_new[y][i]= new;\
- last_in3[y][i]= in3;\
-\
- acc+= acc + (new&1);\
- if ((i&7)==6){\
- ((uint8_t*)dest)[0]= acc;\
- ((uint8_t*)dest)++;\
- }\
- }\
-}\
-*/\
+ func_monoblack\
}\
break;\
case PIX_FMT_YUYV422:\
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
- uint8_t *dest, int dstW, int y)
+ int16_t **alpSrc, uint8_t *dest, int dstW, int y)
{
int i;
- switch(c->dstFormat)
- {
- case PIX_FMT_BGR32:
- case PIX_FMT_RGB32:
- case PIX_FMT_BGR32_1:
- case PIX_FMT_RGB32_1:
- YSCALE_YUV_2_RGBX_C(uint32_t)
- ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
- ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
- }
- break;
+ YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
+}
+
+static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+ int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+ int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+{
+ int i;
+ int step= fmt_depth(c->dstFormat)/8;
+ int aidx= 3;
+
+ switch(c->dstFormat){
+ case PIX_FMT_ARGB:
+ dest++;
+ aidx= 0;
case PIX_FMT_RGB24:
- YSCALE_YUV_2_RGBX_C(uint8_t)
- ((uint8_t*)dest)[0]= r[Y1];
- ((uint8_t*)dest)[1]= g[Y1];
- ((uint8_t*)dest)[2]= b[Y1];
- ((uint8_t*)dest)[3]= r[Y2];
- ((uint8_t*)dest)[4]= g[Y2];
- ((uint8_t*)dest)[5]= b[Y2];
- dest+=6;
- }
- break;
- case PIX_FMT_BGR24:
- YSCALE_YUV_2_RGBX_C(uint8_t)
- ((uint8_t*)dest)[0]= b[Y1];
- ((uint8_t*)dest)[1]= g[Y1];
- ((uint8_t*)dest)[2]= r[Y1];
- ((uint8_t*)dest)[3]= b[Y2];
- ((uint8_t*)dest)[4]= g[Y2];
- ((uint8_t*)dest)[5]= r[Y2];
- dest+=6;
- }
- break;
- case PIX_FMT_RGB565:
- case PIX_FMT_BGR565:
- {
- const int dr1= dither_2x2_8[y&1 ][0];
- const int dg1= dither_2x2_4[y&1 ][0];
- const int db1= dither_2x2_8[(y&1)^1][0];
- const int dr2= dither_2x2_8[y&1 ][1];
- const int dg2= dither_2x2_4[y&1 ][1];
- const int db2= dither_2x2_8[(y&1)^1][1];
- YSCALE_YUV_2_RGBX_C(uint16_t)
- ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
- ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
- }
- }
- break;
- case PIX_FMT_RGB555:
- case PIX_FMT_BGR555:
- {
- const int dr1= dither_2x2_8[y&1 ][0];
- const int dg1= dither_2x2_8[y&1 ][1];
- const int db1= dither_2x2_8[(y&1)^1][0];
- const int dr2= dither_2x2_8[y&1 ][1];
- const int dg2= dither_2x2_8[y&1 ][0];
- const int db2= dither_2x2_8[(y&1)^1][1];
- YSCALE_YUV_2_RGBX_C(uint16_t)
- ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
- ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
- }
- }
- break;
- case PIX_FMT_RGB8:
- case PIX_FMT_BGR8:
- {
- const uint8_t * const d64= dither_8x8_73[y&7];
- const uint8_t * const d32= dither_8x8_32[y&7];
- YSCALE_YUV_2_RGBX_C(uint8_t)
- ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
- ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
+ aidx--;
+ case PIX_FMT_RGBA:
+ if (CONFIG_SMALL){
+ int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
+ YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
+ dest[aidx]= needAlpha ? A : 255;
+ dest[0]= R>>22;
+ dest[1]= G>>22;
+ dest[2]= B>>22;
+ dest+= step;
}
- }
- break;
- case PIX_FMT_RGB4:
- case PIX_FMT_BGR4:
- {
- const uint8_t * const d64= dither_8x8_73 [y&7];
- const uint8_t * const d128=dither_8x8_220[y&7];
- YSCALE_YUV_2_RGBX_C(uint8_t)
- ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
- +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
+ }else{
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
+ YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
+ dest[aidx]= A;
+ dest[0]= R>>22;
+ dest[1]= G>>22;
+ dest[2]= B>>22;
+ dest+= step;
+ }
+ }else{
+ YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
+ dest[aidx]= 255;
+ dest[0]= R>>22;
+ dest[1]= G>>22;
+ dest[2]= B>>22;
+ dest+= step;
+ }
}
}
break;
- case PIX_FMT_RGB4_BYTE:
- case PIX_FMT_BGR4_BYTE:
- {
- const uint8_t * const d64= dither_8x8_73 [y&7];
- const uint8_t * const d128=dither_8x8_220[y&7];
- YSCALE_YUV_2_RGBX_C(uint8_t)
- ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
- ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
+ case PIX_FMT_ABGR:
+ dest++;
+ aidx= 0;
+ case PIX_FMT_BGR24:
+ aidx--;
+ case PIX_FMT_BGRA:
+ if (CONFIG_SMALL){
+ int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
+ YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
+ dest[aidx]= needAlpha ? A : 255;
+ dest[0]= B>>22;
+ dest[1]= G>>22;
+ dest[2]= R>>22;
+ dest+= step;
}
- }
- break;
- case PIX_FMT_MONOBLACK:
- {
- const uint8_t * const d128=dither_8x8_220[y&7];
- uint8_t *g= c->table_gU[128] + c->table_gV[128];
- int acc=0;
- for (i=0; i<dstW-1; i+=2){
- int j;
- int Y1=1<<18;
- int Y2=1<<18;
-
- for (j=0; j<lumFilterSize; j++)
- {
- Y1 += lumSrc[j][i] * lumFilter[j];
- Y2 += lumSrc[j][i+1] * lumFilter[j];
- }
- Y1>>=19;
- Y2>>=19;
- if ((Y1|Y2)&256)
- {
- if (Y1>255) Y1=255;
- else if (Y1<0)Y1=0;
- if (Y2>255) Y2=255;
- else if (Y2<0)Y2=0;
+ }else{
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
+ YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
+ dest[aidx]= A;
+ dest[0]= B>>22;
+ dest[1]= G>>22;
+ dest[2]= R>>22;
+ dest+= step;
}
- acc+= acc + g[Y1+d128[(i+0)&7]];
- acc+= acc + g[Y2+d128[(i+1)&7]];
- if ((i&7)==6){
- ((uint8_t*)dest)[0]= acc;
- dest++;
+ }else{
+ YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
+ dest[aidx]= 255;
+ dest[0]= B>>22;
+ dest[1]= G>>22;
+ dest[2]= R>>22;
+ dest+= step;
}
}
}
break;
- case PIX_FMT_YUYV422:
- YSCALE_YUV_2_PACKEDX_C(void)
- ((uint8_t*)dest)[2*i2+0]= Y1;
- ((uint8_t*)dest)[2*i2+1]= U;
- ((uint8_t*)dest)[2*i2+2]= Y2;
- ((uint8_t*)dest)[2*i2+3]= V;
- }
- break;
- case PIX_FMT_UYVY422:
- YSCALE_YUV_2_PACKEDX_C(void)
- ((uint8_t*)dest)[2*i2+0]= U;
- ((uint8_t*)dest)[2*i2+1]= Y1;
- ((uint8_t*)dest)[2*i2+2]= V;
- ((uint8_t*)dest)[2*i2+3]= Y2;
- }
- break;
- case PIX_FMT_GRAY16BE:
- YSCALE_YUV_2_GRAY16_C(void)
- ((uint8_t*)dest)[2*i2+0]= Y1>>8;
- ((uint8_t*)dest)[2*i2+1]= Y1;
- ((uint8_t*)dest)[2*i2+2]= Y2>>8;
- ((uint8_t*)dest)[2*i2+3]= Y2;
- }
- break;
- case PIX_FMT_GRAY16LE:
- YSCALE_YUV_2_GRAY16_C(void)
- ((uint8_t*)dest)[2*i2+0]= Y1;
- ((uint8_t*)dest)[2*i2+1]= Y1>>8;
- ((uint8_t*)dest)[2*i2+2]= Y2;
- ((uint8_t*)dest)[2*i2+3]= Y2>>8;
- }
- break;
+ default:
+ assert(0);
}
}
+static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val){
+ int i;
+ uint8_t *ptr = plane + stride*y;
+ for (i=0; i<height; i++){
+ memset(ptr, val, width);
+ ptr += stride;
+ }
+}
-//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
+//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
//Plain C versions
-#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
+#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
#define COMPILE_C
#endif
-#ifdef ARCH_POWERPC
-#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
+#if ARCH_PPC
+#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#undef COMPILE_C
#define COMPILE_ALTIVEC
-#endif //HAVE_ALTIVEC
-#endif //ARCH_POWERPC
+#endif
+#endif //ARCH_PPC
-#if defined(ARCH_X86)
+#if ARCH_X86
-#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
+#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
#define COMPILE_MMX
#endif
-#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
+#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
#define COMPILE_MMX2
#endif
-#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
+#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
#define COMPILE_3DNOW
#endif
-#endif //ARCH_X86 || ARCH_X86_64
+#endif //ARCH_X86
#undef HAVE_MMX
#undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
+#undef HAVE_ALTIVEC
+#define HAVE_MMX 0
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define HAVE_ALTIVEC 0
#ifdef COMPILE_C
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_3DNOW
-#undef HAVE_ALTIVEC
#define RENAME(a) a ## _C
#include "swscale_template.c"
#endif
#ifdef COMPILE_ALTIVEC
#undef RENAME
-#define HAVE_ALTIVEC
+#undef HAVE_ALTIVEC
+#define HAVE_ALTIVEC 1
#define RENAME(a) a ## _altivec
#include "swscale_template.c"
#endif
-#if defined(ARCH_X86)
+#if ARCH_X86
-//X86 versions
+//x86 versions
/*
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
#define ARCH_X86
#define RENAME(a) a ## _X86
#include "swscale_template.c"
//MMX versions
#ifdef COMPILE_MMX
#undef RENAME
-#define HAVE_MMX
+#undef HAVE_MMX
#undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
#endif
//MMX2 versions
#ifdef COMPILE_MMX2
#undef RENAME
-#define HAVE_MMX
-#define HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
#endif
//3DNOW versions
#ifdef COMPILE_3DNOW
#undef RENAME
-#define HAVE_MMX
+#undef HAVE_MMX
#undef HAVE_MMX2
-#define HAVE_3DNOW
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
#define RENAME(a) a ## _3DNow
#include "swscale_template.c"
#endif
-#endif //ARCH_X86 || ARCH_X86_64
+#endif //ARCH_X86
-// minor note: the HAVE_xyz is messed up after that line so don't use it
+// minor note: the HAVE_xyz are messed up after this line so don't use them
static double getSplineCoeff(double a, double b, double c, double d, double dist)
{
int filterSize;
int filter2Size;
int minFilterSize;
- double *filter=NULL;
- double *filter2=NULL;
+ int64_t *filter=NULL;
+ int64_t *filter2=NULL;
+ const int64_t fone= 1LL<<54;
int ret= -1;
-#if defined(ARCH_X86)
+#if ARCH_X86
if (flags & SWS_CPU_CAPS_MMX)
- asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
+ __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
#endif
- // Note the +1 is for the MMXscaler which reads over the end
+ // NOTE: the +1 is for the MMX scaler which reads over the end
*filterPos = av_malloc((dstW+1)*sizeof(int16_t));
if (FFABS(xInc - 0x10000) <10) // unscaled
{
int i;
filterSize= 1;
- filter= av_malloc(dstW*sizeof(double)*filterSize);
- for (i=0; i<dstW*filterSize; i++) filter[i]=0;
+ filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
for (i=0; i<dstW; i++)
{
- filter[i*filterSize]=1;
+ filter[i*filterSize]= fone;
(*filterPos)[i]=i;
}
int i;
int xDstInSrc;
filterSize= 1;
- filter= av_malloc(dstW*sizeof(double)*filterSize);
+ filter= av_malloc(dstW*sizeof(*filter)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
for (i=0; i<dstW; i++)
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
(*filterPos)[i]= xx;
- filter[i]= 1.0;
+ filter[i]= fone;
xDstInSrc+= xInc;
}
}
if (flags&SWS_BICUBIC) filterSize= 4;
else if (flags&SWS_X ) filterSize= 4;
else filterSize= 2; // SWS_BILINEAR / SWS_AREA
- filter= av_malloc(dstW*sizeof(double)*filterSize);
+ filter= av_malloc(dstW*sizeof(*filter)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
for (i=0; i<dstW; i++)
int j;
(*filterPos)[i]= xx;
- //Bilinear upscale / linear interpolate / Area averaging
+ //bilinear upscale / linear interpolate / area averaging
for (j=0; j<filterSize; j++)
{
- double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
- double coeff= 1.0 - d;
+ int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
if (coeff<0) coeff=0;
filter[i*filterSize + j]= coeff;
xx++;
}
else
{
- double xDstInSrc;
- double sizeFactor, filterSizeInSrc;
- const double xInc1= (double)xInc / (double)(1<<16);
-
- if (flags&SWS_BICUBIC) sizeFactor= 4.0;
- else if (flags&SWS_X) sizeFactor= 8.0;
- else if (flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear
- else if (flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;)
- else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
- else if (flags&SWS_SINC) sizeFactor= 20.0; // infinite ;)
- else if (flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;)
- else if (flags&SWS_BILINEAR) sizeFactor= 2.0;
+ int xDstInSrc;
+ int sizeFactor;
+
+ if (flags&SWS_BICUBIC) sizeFactor= 4;
+ else if (flags&SWS_X) sizeFactor= 8;
+ else if (flags&SWS_AREA) sizeFactor= 1; //downscale only, for upscale it is bilinear
+ else if (flags&SWS_GAUSS) sizeFactor= 8; // infinite ;)
+ else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
+ else if (flags&SWS_SINC) sizeFactor= 20; // infinite ;)
+ else if (flags&SWS_SPLINE) sizeFactor= 20; // infinite ;)
+ else if (flags&SWS_BILINEAR) sizeFactor= 2;
else {
- sizeFactor= 0.0; //GCC warning killer
+ sizeFactor= 0; //GCC warning killer
assert(0);
}
- if (xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale
- else filterSizeInSrc= sizeFactor*srcW / (double)dstW;
+ if (xInc <= 1<<16) filterSize= 1 + sizeFactor; // upscale
+ else filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
- filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
if (filterSize > srcW-2) filterSize=srcW-2;
- filter= av_malloc(dstW*sizeof(double)*filterSize);
+ filter= av_malloc(dstW*sizeof(*filter)*filterSize);
- xDstInSrc= xInc1 / 2.0 - 0.5;
+ xDstInSrc= xInc - 0x10000;
for (i=0; i<dstW; i++)
{
- int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
+ int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
int j;
(*filterPos)[i]= xx;
for (j=0; j<filterSize; j++)
{
- double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
- double coeff;
+ int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
+ double floatd;
+ int64_t coeff;
+
+ if (xInc > 1<<16)
+ d= d*dstW/srcW;
+ floatd= d * (1.0/(1<<30));
+
if (flags & SWS_BICUBIC)
{
- double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
- double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
-
- if (d<1.0)
- coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
- else if (d<2.0)
- coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
+ int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24);
+ int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
+ int64_t dd = ( d*d)>>30;
+ int64_t ddd= (dd*d)>>30;
+
+ if (d < 1LL<<30)
+ coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
+ else if (d < 1LL<<31)
+ coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
else
coeff=0.0;
+ coeff *= fone>>(30+24);
}
/* else if (flags & SWS_X)
{
else if (flags & SWS_X)
{
double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
+ double c;
- if (d<1.0)
- coeff = cos(d*PI);
+ if (floatd<1.0)
+ c = cos(floatd*PI);
else
- coeff=-1.0;
- if (coeff<0.0) coeff= -pow(-coeff, A);
- else coeff= pow( coeff, A);
- coeff= coeff*0.5 + 0.5;
+ c=-1.0;
+ if (c<0.0) c= -pow(-c, A);
+ else c= pow( c, A);
+ coeff= (c*0.5 + 0.5)*fone;
}
else if (flags & SWS_AREA)
{
- double srcPixelSize= 1.0/xInc1;
- if (d + srcPixelSize/2 < 0.5) coeff= 1.0;
- else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
+ int64_t d2= d - (1<<29);
+ if (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
+ else if (d2*xInc < (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
else coeff=0.0;
+ coeff *= fone>>(30+16);
}
else if (flags & SWS_GAUSS)
{
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
- coeff = pow(2.0, - p*d*d);
+ coeff = (pow(2.0, - p*floatd*floatd))*fone;
}
else if (flags & SWS_SINC)
{
- coeff = d ? sin(d*PI)/(d*PI) : 1.0;
+ coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
}
else if (flags & SWS_LANCZOS)
{
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
- coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
- if (d>p) coeff=0;
+ coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
+ if (floatd>p) coeff=0;
}
else if (flags & SWS_BILINEAR)
{
- coeff= 1.0 - d;
+ coeff= (1<<30) - d;
if (coeff<0) coeff=0;
+ coeff *= fone >> 30;
}
else if (flags & SWS_SPLINE)
{
double p=-2.196152422706632;
- coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
+ coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
}
else {
coeff= 0.0; //GCC warning killer
filter[i*filterSize + j]= coeff;
xx++;
}
- xDstInSrc+= xInc1;
+ xDstInSrc+= 2*xInc;
}
}
if (srcFilter) filter2Size+= srcFilter->length - 1;
if (dstFilter) filter2Size+= dstFilter->length - 1;
assert(filter2Size>0);
- filter2= av_malloc(filter2Size*dstW*sizeof(double));
+ filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
for (i=0; i<dstW; i++)
{
- int j;
- SwsVector scaleFilter;
- SwsVector *outVec;
-
- scaleFilter.coeff= filter + i*filterSize;
- scaleFilter.length= filterSize;
-
- if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
- else outVec= &scaleFilter;
+ int j, k;
- assert(outVec->length == filter2Size);
- //FIXME dstFilter
-
- for (j=0; j<outVec->length; j++)
- {
- filter2[i*filter2Size + j]= outVec->coeff[j];
+ if(srcFilter){
+ for (k=0; k<srcFilter->length; k++){
+ for (j=0; j<filterSize; j++)
+ filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
+ }
+ }else{
+ for (j=0; j<filterSize; j++)
+ filter2[i*filter2Size + j]= filter[i*filterSize + j];
}
+ //FIXME dstFilter
(*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
-
- if (outVec != &scaleFilter) sws_freeVec(outVec);
}
av_freep(&filter);
{
int min= filter2Size;
int j;
- double cutOff=0.0;
+ int64_t cutOff=0.0;
/* get rid off near zero elements on the left by shifting left */
for (j=0; j<filter2Size; j++)
int k;
cutOff += FFABS(filter2[i*filter2Size]);
- if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
+ if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
/* preserve monotonicity because the core can't handle the filter otherwise */
if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
- // Move filter coeffs left
+ // move filter coefficients left
for (k=1; k<filter2Size; k++)
filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
- filter2[i*filter2Size + k - 1]= 0.0;
+ filter2[i*filter2Size + k - 1]= 0;
(*filterPos)[i]++;
}
- cutOff=0.0;
+ cutOff=0;
/* count near zeros on the right */
for (j=filter2Size-1; j>0; j--)
{
cutOff += FFABS(filter2[i*filter2Size + j]);
- if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
+ if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
min--;
}
if (minFilterSize < 5)
filterAlign = 4;
- // we really don't want to waste our time
- // doing useless computation, so fall-back on
- // the scalar C code for very small filter.
- // vectorizing is worth it only if you have
+ // We really don't want to waste our time
+ // doing useless computation, so fall back on
+ // the scalar C code for very small filters.
+ // Vectorizing is worth it only if you have a
// decent-sized vector.
if (minFilterSize < 3)
filterAlign = 1;
assert(minFilterSize > 0);
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
assert(filterSize > 0);
- filter= av_malloc(filterSize*dstW*sizeof(double));
+ filter= av_malloc(filterSize*dstW*sizeof(*filter));
if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
goto error;
*outFilterSize= filterSize;
for (j=0; j<filterSize; j++)
{
- if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
+ if (j>=filter2Size) filter[i*filterSize + j]= 0;
else filter[i*filterSize + j]= filter2[i*filter2Size + j];
+ if((flags & SWS_BITEXACT) && j>=minFilterSize)
+ filter[i*filterSize + j]= 0;
}
}
- //FIXME try to align filterpos if possible
+ //FIXME try to align filterPos if possible
//fix borders
for (i=0; i<dstW; i++)
int j;
if ((*filterPos)[i] < 0)
{
- // Move filter coeffs left to compensate for filterPos
+ // move filter coefficients left to compensate for filterPos
for (j=1; j<filterSize; j++)
{
int left= FFMAX(j + (*filterPos)[i], 0);
if ((*filterPos)[i] + filterSize > srcW)
{
int shift= (*filterPos)[i] + filterSize - srcW;
- // Move filter coeffs right to compensate for filterPos
+ // move filter coefficients right to compensate for filterPos
for (j=filterSize-2; j>=0; j--)
{
int right= FFMIN(j + shift, filterSize-1);
}
}
- // Note the +1 is for the MMXscaler which reads over the end
+ // Note the +1 is for the MMX scaler which reads over the end
/* align at 16 for AltiVec (needed by hScale_altivec_real) */
*outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
- /* Normalize & Store in outFilter */
+ /* normalize & store in outFilter */
for (i=0; i<dstW; i++)
{
int j;
- double error=0;
- double sum=0;
- double scale= one;
+ int64_t error=0;
+ int64_t sum=0;
for (j=0; j<filterSize; j++)
{
sum+= filter[i*filterSize + j];
}
- scale/= sum;
+ sum= (sum + one/2)/ one;
for (j=0; j<*outFilterSize; j++)
{
- double v= filter[i*filterSize + j]*scale + error;
- int intV= floor(v + 0.5);
+ int64_t v= filter[i*filterSize + j] + error;
+ int intV= ROUNDED_DIV(v, sum);
(*outFilter)[i*(*outFilterSize) + j]= intV;
- error = v - intV;
+ error= v - intV*sum;
}
}
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
{
uint8_t *fragmentA;
- long imm8OfPShufW1A;
- long imm8OfPShufW2A;
- long fragmentLengthA;
+ x86_reg imm8OfPShufW1A;
+ x86_reg imm8OfPShufW2A;
+ x86_reg fragmentLengthA;
uint8_t *fragmentB;
- long imm8OfPShufW1B;
- long imm8OfPShufW2B;
- long fragmentLengthB;
+ x86_reg imm8OfPShufW1B;
+ x86_reg imm8OfPShufW2B;
+ x86_reg fragmentLengthB;
int fragmentPos;
int xpos, i;
//code fragment
- asm volatile(
+ __asm__ volatile(
"jmp 9f \n\t"
// Begin
"0: \n\t"
"=r" (fragmentLengthA)
);
- asm volatile(
+ __asm__ volatile(
"jmp 9f \n\t"
// Begin
"0: \n\t"
}
xpos+=xInc;
}
- filterPos[i/2]= xpos>>16; // needed to jump to the next part
+ filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part
}
#endif /* COMPILE_MMX2 */
static SwsFunc getSwsFunc(int flags){
-#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
-#if defined(ARCH_X86)
+#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
+#if ARCH_X86
// ordered per speed fastest first
if (flags & SWS_CPU_CAPS_MMX2)
return swScale_MMX2;
return swScale_C;
#else
-#ifdef ARCH_POWERPC
+#if ARCH_PPC
if (flags & SWS_CPU_CAPS_ALTIVEC)
return swScale_altivec;
else
return swScale_C;
#endif
return swScale_C;
-#endif /* defined(ARCH_X86) */
+#endif /* ARCH_X86 */
#else //RUNTIME_CPUDETECT
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
return swScale_MMX2;
-#elif defined (HAVE_3DNOW)
+#elif HAVE_AMD3DNOW
return swScale_3DNow;
-#elif defined (HAVE_MMX)
+#elif HAVE_MMX
return swScale_MMX;
-#elif defined (HAVE_ALTIVEC)
+#elif HAVE_ALTIVEC
return swScale_altivec;
#else
return swScale_C;
return srcSliceH;
}
+static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+ uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+ uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+ uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+ yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+ if (dstParam[3])
+ fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
+ return srcSliceH;
+}
+
+static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+ uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+ uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
+ uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
+
+ yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+ return srcSliceH;
+}
+
+static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+ uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+ uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+ uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+ uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+ if (dstParam[3])
+ fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
+ return srcSliceH;
+}
+
+static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+ uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+ uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
+ uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
+
+ uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+ return srcSliceH;
+}
+
+static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[]){
+ const enum PixelFormat srcFormat= c->srcFormat;
+ const enum PixelFormat dstFormat= c->dstFormat;
+ void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
+ const uint8_t *palette)=NULL;
+ int i;
+ uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+ uint8_t *srcPtr= src[0];
+
+ if (!usePal(srcFormat))
+ av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+ sws_format_name(srcFormat), sws_format_name(dstFormat));
+
+ switch(dstFormat){
+ case PIX_FMT_RGB32 : conv = palette8topacked32; break;
+ case PIX_FMT_BGR32 : conv = palette8topacked32; break;
+ case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
+ case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
+ case PIX_FMT_RGB24 : conv = palette8topacked24; break;
+ case PIX_FMT_BGR24 : conv = palette8topacked24; break;
+ default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+ sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
+ }
+
+
+ for (i=0; i<srcSliceH; i++) {
+ conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
+ srcPtr+= srcStride[0];
+ dstPtr+= dstStride[0];
+ }
+
+ return srcSliceH;
+}
+
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
- const int srcFormat= c->srcFormat;
- const int dstFormat= c->dstFormat;
+ const enum PixelFormat srcFormat= c->srcFormat;
+ const enum PixelFormat dstFormat= c->dstFormat;
const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
dst[2]+(srcSliceY>>1)*dstStride[2],
c->srcW, srcSliceH,
dstStride[0], dstStride[1], srcStride[0]);
+ if (dst[3])
+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
return srcSliceH;
}
}
}
- if (c->dstFormat==PIX_FMT_YUV420P){
+ if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P){
planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
}else{
planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
}
+ if (dst[3])
+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
return srcSliceH;
}
int srcSliceH, uint8_t* dst[], int dstStride[])
{
int plane;
- for (plane=0; plane<3; plane++)
+ for (plane=0; plane<4; plane++)
{
- int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
- int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
- int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
-
- if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
- {
- if (!isGray(c->dstFormat))
- memset(dst[plane], 128, dstStride[plane]*height);
- }
+ int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
+ int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
+ int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
+
+ if (!dst[plane]) continue;
+ // ignore palette for GRAY8
+ if (plane == 1 && !dst[2]) continue;
+ if (!src[plane] || (plane == 1 && !src[2]))
+ fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
else
{
if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
+ if (dst[3])
+ fillPlane(dst[3], dstStride[3], length, height, y, 255);
return srcSliceH;
}
}
/**
- * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
+ * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
* @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
* @return -1 if not supported
*/
c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL;
c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL;
- yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
+ c->yuv2rgb_y_coeff = (int16_t)roundToInt16(cy <<13);
+ c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
+ c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
+ c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
+ c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
+ c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
+
+ ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
//FIXME factorize
#ifdef COMPILE_ALTIVEC
if (c->flags & SWS_CPU_CAPS_ALTIVEC)
- yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
+ ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
#endif
return 0;
}
return 0;
}
-static int handle_jpeg(int *format)
+static int handle_jpeg(enum PixelFormat *format)
{
switch (*format) {
case PIX_FMT_YUVJ420P:
}
}
-SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
+SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
SwsContext *c;
int unscaled, needsDither;
int srcRange, dstRange;
SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
-#if defined(ARCH_X86)
+#if ARCH_X86
if (flags & SWS_CPU_CAPS_MMX)
- asm volatile("emms\n\t"::: "memory");
+ __asm__ volatile("emms\n\t"::: "memory");
#endif
-#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
+#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
-#elif defined (HAVE_3DNOW)
+#elif HAVE_AMD3DNOW
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
-#elif defined (HAVE_MMX)
+#elif HAVE_MMX
flags |= SWS_CPU_CAPS_MMX;
-#elif defined (HAVE_ALTIVEC)
+#elif HAVE_ALTIVEC
flags |= SWS_CPU_CAPS_ALTIVEC;
-#elif defined (ARCH_BFIN)
+#elif ARCH_BFIN
flags |= SWS_CPU_CAPS_BFIN;
#endif
#endif /* RUNTIME_CPUDETECT */
|SWS_BICUBLIN);
if(!i || (i & (i-1)))
{
- av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be choosen\n");
+ av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
return NULL;
}
-
/* sanity check */
if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
{
return NULL;
}
if(srcW > VOFW || dstW > VOFW){
- av_log(NULL, AV_LOG_ERROR, "swScaler: Compile time max width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
+ av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
return NULL;
}
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
- // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
+ // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
// drop some chroma lines if the user wants it
c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
c->chrSrcVSubSample+= c->vChrDrop;
- // drop every 2. pixel for chroma calculation unless user wants full chroma
+ // drop every other pixel for chroma calculation unless user wants full chroma
if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
&& srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8
&& srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4
c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
- sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
+ sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
- /* unscaled special Cases */
+ /* unscaled special cases */
if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
{
/* yv12_to_nv12 */
- if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
+ if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
{
c->swScale= PlanarToNV12Wrapper;
}
-#ifdef CONFIG_GPL
/* yuv2bgr */
- if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
+ if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
+ && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
{
- c->swScale= yuv2rgb_get_func_ptr(c);
+ c->swScale= ff_yuv2rgb_get_func_ptr(c);
}
-#endif
- if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P)
+ if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT))
{
c->swScale= yvu9toyv12Wrapper;
}
/* bgr24toYV12 */
- if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
+ if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
c->swScale= bgr24toyv12Wrapper;
- /* rgb/bgr -> rgb/bgr (no dither needed forms) */
+ /* RGB/BGR -> RGB/BGR (no dither needed forms) */
if ( (isBGR(srcFormat) || isRGB(srcFormat))
&& (isBGR(dstFormat) || isRGB(dstFormat))
&& srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8
&& srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
&& srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
&& srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
+ && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
&& dstFormat != PIX_FMT_RGB32_1
&& dstFormat != PIX_FMT_BGR32_1
&& (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
c->swScale= rgb2rgbWrapper;
+ if ((usePal(srcFormat) && (
+ dstFormat == PIX_FMT_RGB32 ||
+ dstFormat == PIX_FMT_RGB32_1 ||
+ dstFormat == PIX_FMT_RGB24 ||
+ dstFormat == PIX_FMT_BGR32 ||
+ dstFormat == PIX_FMT_BGR32_1 ||
+ dstFormat == PIX_FMT_BGR24)))
+ c->swScale= pal2rgbWrapper;
+
if (srcFormat == PIX_FMT_YUV422P)
{
if (dstFormat == PIX_FMT_YUYV422)
/* LQ converters if -sws 0 or -sws 4*/
if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
/* yv12_to_yuy2 */
- if (srcFormat == PIX_FMT_YUV420P)
+ if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P)
{
if (dstFormat == PIX_FMT_YUYV422)
c->swScale= PlanarToYuy2Wrapper;
c->swScale= PlanarToUyvyWrapper;
}
}
+ if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
+ c->swScale= YUYV2YUV420Wrapper;
+ if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
+ c->swScale= UYVY2YUV420Wrapper;
+ if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
+ c->swScale= YUYV2YUV422Wrapper;
+ if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
+ c->swScale= UYVY2YUV422Wrapper;
#ifdef COMPILE_ALTIVEC
if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+ !(c->flags & SWS_BITEXACT) &&
srcFormat == PIX_FMT_YUV420P) {
// unscaled YV12 -> packed YUV, we want speed
if (dstFormat == PIX_FMT_YUYV422)
/* simple copy */
if ( srcFormat == dstFormat
+ || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
+ || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
|| (isPlanarYUV(srcFormat) && isGray(dstFormat))
|| (isPlanarYUV(dstFormat) && isGray(srcFormat)))
{
c->swScale= gray16swap;
}
-#ifdef ARCH_BFIN
+#if ARCH_BFIN
if (flags & SWS_CPU_CAPS_BFIN)
ff_bfin_get_unscaled_swscale (c);
#endif
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
{
if (flags&SWS_PRINT_INFO)
- av_log(c, AV_LOG_INFO, "output Width is not a multiple of 32 -> no MMX2 scaler\n");
+ av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
}
if (usesHFilter) c->canMMX2BeUsed=0;
}
c->lumXInc+= 20;
c->chrXInc+= 20;
}
- //we don't use the x86asm scaler if mmx is available
+ //we don't use the x86 asm scaler if MMX is available
else if (flags & SWS_CPU_CAPS_MMX)
{
c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
{
#ifdef MAP_ANONYMOUS
- c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
- c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ c->funnyYCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+#elif HAVE_VIRTUALALLOC
+ c->funnyYCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
- c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
+ c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
#endif
initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
}
#endif /* defined(COMPILE_MMX2) */
- } // Init Horizontal stuff
+ } // initialize horizontal stuff
1;
initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
- srcH , dstH, filterAlign, (1<<12)-4,
+ srcH , dstH, filterAlign, (1<<12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
srcFilter->lumV, dstFilter->lumV, c->param);
initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
- c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
+ c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
srcFilter->chrV, dstFilter->chrV, c->param);
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
#endif
}
- // Calculate Buffer Sizes so that they won't run out while handling these damn slices
+ // calculate buffer sizes so that they won't run out while handling these damn slices
c->vLumBufSize= c->vLumFilterSize;
c->vChrBufSize= c->vChrFilterSize;
for (i=0; i<dstH; i++)
// allocate pixbufs (we use dynamic allocation because otherwise we would need to
c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
- //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
+ if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
+ c->alpPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
+ //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
/* align at 16 bytes for AltiVec */
for (i=0; i<c->vLumBufSize; i++)
c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
for (i=0; i<c->vChrBufSize; i++)
c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+ for (i=0; i<c->vLumBufSize; i++)
+ c->alpPixBuf[i]= c->alpPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
//try to avoid drawing green stuff between the right end and the stride end
for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
}
else
{
-#if defined(ARCH_X86)
- av_log(c, AV_LOG_VERBOSE, "using X86-Asm scaler for horizontal scaling\n");
+#if ARCH_X86
+ av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
#else
if (flags & SWS_FAST_BILINEAR)
av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
}
if (dstFormat==PIX_FMT_BGR24)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 Converter\n",
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
(flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
else if (dstFormat==PIX_FMT_RGB32)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
else if (dstFormat==PIX_FMT_BGR565)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
else if (dstFormat==PIX_FMT_BGR555)
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
}
if (flags & SWS_PRINT_INFO)
{
- av_log(c, AV_LOG_DEBUG, "Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+ av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
- av_log(c, AV_LOG_DEBUG, "Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+ av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
}
/**
* swscale wrapper, so we don't need to export the SwsContext.
- * assumes planar YUV to be in YUV order instead of YVU
+ * Assumes planar YUV to be in YUV order instead of YVU.
*/
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int i;
- uint8_t* src2[4]= {src[0], src[1], src[2]};
- uint32_t pal[256];
- int use_pal= c->srcFormat == PIX_FMT_PAL8
- || c->srcFormat == PIX_FMT_BGR4_BYTE
- || c->srcFormat == PIX_FMT_RGB4_BYTE
- || c->srcFormat == PIX_FMT_BGR8
- || c->srcFormat == PIX_FMT_RGB8;
+ uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
}
- if (use_pal){
+ if (usePal(c->srcFormat)){
for (i=0; i<256; i++){
int p, r, g, b,y,u,v;
if(c->srcFormat == PIX_FMT_PAL8){
r= (i>>3 )*255;
g= ((i>>1)&3)*85;
b= (i&1 )*255;
- }else if(c->srcFormat == PIX_FMT_BGR4_BYTE){
+ }else {
+ assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
b= (i>>3 )*255;
g= ((i>>1)&3)*85;
r= (i&1 )*255;
y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
- pal[i]= y + (u<<8) + (v<<16);
+ c->pal_yuv[i]= y + (u<<8) + (v<<16);
+
+
+ switch(c->dstFormat) {
+ case PIX_FMT_BGR32:
+#ifndef WORDS_BIGENDIAN
+ case PIX_FMT_RGB24:
+#endif
+ c->pal_rgb[i]= r + (g<<8) + (b<<16);
+ break;
+ case PIX_FMT_BGR32_1:
+#ifdef WORDS_BIGENDIAN
+ case PIX_FMT_BGR24:
+#endif
+ c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
+ break;
+ case PIX_FMT_RGB32_1:
+#ifdef WORDS_BIGENDIAN
+ case PIX_FMT_RGB24:
+#endif
+ c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
+ break;
+ case PIX_FMT_RGB32:
+#ifndef WORDS_BIGENDIAN
+ case PIX_FMT_BGR24:
+#endif
+ default:
+ c->pal_rgb[i]= b + (g<<8) + (r<<16);
+ }
}
- src2[1]= (uint8_t*)pal;
}
// copy strides, so they can safely be modified
if (c->sliceDir == 1) {
// slices go from top to bottom
- int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
- int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
+ int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
+ int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
} else {
// slices go from bottom to top => we flip the image internally
uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
- dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
- int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
- int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
+ dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2],
+ dst[3] + (c->dstH-1)*dstStride[3]};
+ int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
+ int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
src2[0] += (srcSliceH-1)*srcStride[0];
- if (!use_pal)
+ if (!usePal(c->srcFormat))
src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
+ src2[3] += (srcSliceH-1)*srcStride[3];
return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
}
}
-/**
- * swscale wrapper, so we don't need to export the SwsContext
- */
+#if LIBSWSCALE_VERSION_MAJOR < 1
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
}
+#endif
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
float lumaSharpen, float chromaSharpen,
sws_normalizeVec(filter->lumH, 1.0);
sws_normalizeVec(filter->lumV, 1.0);
- if (verbose) sws_printVec(filter->chrH);
- if (verbose) sws_printVec(filter->lumH);
+ if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
+ if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
return filter;
}
-/**
- * returns a normalized gaussian curve used to filter stuff
- * quality=3 is high quality, lowwer is lowwer quality
- */
SwsVector *sws_getGaussianVec(double variance, double quality){
const int length= (int)(variance*quality + 0.5) | 1;
int i;
return vec;
}
-void sws_printVec(SwsVector *a){
+void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
int i;
double max=0;
double min=0;
for (i=0; i<a->length; i++)
{
int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
- av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
- for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
- av_log(NULL, AV_LOG_DEBUG, "|\n");
+ av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
+ for (;x>0; x--) av_log(log_ctx, log_level, " ");
+ av_log(log_ctx, log_level, "|\n");
}
}
+#if LIBSWSCALE_VERSION_MAJOR < 1
+void sws_printVec(SwsVector *a){
+ sws_printVec2(a, NULL, AV_LOG_DEBUG);
+}
+#endif
+
void sws_freeVec(SwsVector *a){
if (!a) return;
av_freep(&a->coeff);
av_freep(&c->chrPixBuf);
}
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
+ for (i=0; i<c->vLumBufSize; i++)
+ av_freep(&c->alpPixBuf[i]);
+ av_freep(&c->alpPixBuf);
+ }
+
av_freep(&c->vLumFilter);
av_freep(&c->vChrFilter);
av_freep(&c->hLumFilter);
av_freep(&c->hChrFilter);
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
av_freep(&c->vYCoeffsBank);
av_freep(&c->vCCoeffsBank);
#endif
av_freep(&c->hLumFilterPos);
av_freep(&c->hChrFilterPos);
-#if defined(ARCH_X86) && defined(CONFIG_GPL)
+#if ARCH_X86 && CONFIG_GPL
#ifdef MAP_ANONYMOUS
- if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
+ if (c->funnyYCode ) munmap(c->funnyYCode , MAX_FUNNY_CODE_SIZE);
if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
+#elif HAVE_VIRTUALALLOC
+ if (c->funnyYCode ) VirtualFree(c->funnyYCode , MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
+ if (c->funnyUVCode) VirtualFree(c->funnyUVCode, MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
#else
- av_free(c->funnyYCode);
+ av_free(c->funnyYCode );
av_free(c->funnyUVCode);
#endif
c->funnyYCode=NULL;
c->funnyUVCode=NULL;
-#endif /* defined(ARCH_X86) */
+#endif /* ARCH_X86 && CONFIG_GPL */
av_freep(&c->lumMmx2Filter);
av_freep(&c->chrMmx2Filter);
av_free(c);
}
-/**
- * Checks if context is valid or reallocs a new one instead.
- * If context is NULL, just calls sws_getContext() to get a new one.
- * Otherwise, checks if the parameters are the same already saved in context.
- * If that is the case, returns the current context.
- * Otherwise, frees context and gets a new one.
- *
- * Be warned that srcFilter, dstFilter are not checked, they are
- * asumed to remain valid.
- */
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
- int srcW, int srcH, int srcFormat,
- int dstW, int dstH, int dstFormat, int flags,
+ int srcW, int srcH, enum PixelFormat srcFormat,
+ int dstW, int dstH, enum PixelFormat dstFormat, int flags,
SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
{
static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};