X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=libswscale%2Fswscale.c;h=6bf7fcfd9d2f9e61a723df9b4a80d45af092876f;hb=3c194f390fe7e18a6e2e51eb4f29495eb230585e;hp=c0e4db3b2651e7f419c4cc69d42416f4ede8d180;hpb=a3040715e1f0db1af0c27566a306c4a27ad07dcd;p=ffmpeg diff --git a/libswscale/swscale.c b/libswscale/swscale.c index c0e4db3b265..6bf7fcfd9d2 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -60,6 +60,7 @@ untested special converters #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/x86_cpu.h" #include "libavutil/cpu.h" @@ -71,17 +72,13 @@ untested special converters #undef MOVNTQ #undef PAVGB -//#undef HAVE_MMX2 -//#define HAVE_AMD3DNOW -//#undef HAVE_MMX -//#undef ARCH_X86 #define DITHER1XBPP #define isPacked(x) ( \ (x)==PIX_FMT_PAL8 \ || (x)==PIX_FMT_YUYV422 \ || (x)==PIX_FMT_UYVY422 \ - || (x)==PIX_FMT_Y400A \ + || (x)==PIX_FMT_GRAY8A \ || isAnyRGB(x) \ ) @@ -122,63 +119,6 @@ add BGR4 output support write special BGR->BGR scaler */ -#if ARCH_X86 -DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; -DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; -DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; -DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL; -DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL; -DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL; -DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { - 0x0602060206020602LL, - 0x0004000400040004LL,}; - -DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; -DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; -DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; -DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; - -DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; - -#ifdef FAST_BGR2YV12 -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; -#else -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; -#endif /* FAST_BGR2YV12 */ -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; - -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; -DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL; -DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL; - -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = { - {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL}, - {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL}, -}; - -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL; - -#endif /* ARCH_X86 */ - DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={ { 1, 3, 1, 3, 1, 3, 1, 3, }, { 2, 0, 2, 0, 2, 0, 2, 0, }, @@ -267,88 +207,216 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ }; #endif +DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ +{ + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, +},{ + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, +},{ + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, +},{ + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, +},{ + { 9, 17, 15, 23, 8, 16, 14, 22,}, + { 25, 1, 31, 7, 24, 0, 30, 6,}, + { 13, 21, 11, 19, 12, 20, 10, 18,}, + { 29, 5, 27, 3, 28, 4, 26, 2,}, + { 8, 16, 14, 22, 9, 17, 15, 23,}, + { 24, 0, 30, 6, 25, 1, 31, 7,}, + { 12, 20, 10, 18, 13, 21, 11, 19,}, + { 28, 4, 26, 2, 29, 5, 27, 3,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 36, 68, 60, 92, 34, 66, 58, 90,}, + { 100, 4,124, 28, 98, 2,122, 26,}, + { 52, 84, 44, 76, 50, 82, 42, 74,}, + { 116, 20,108, 12,114, 18,106, 10,}, + { 32, 64, 56, 88, 38, 70, 62, 94,}, + { 96, 0,120, 24,102, 6,126, 30,}, + { 48, 80, 40, 72, 54, 86, 46, 78,}, + { 112, 16,104, 8,118, 22,110, 14,}, +}}; + +uint16_t dither_scale[15][16]={ +{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, +{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, +{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, +{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, +{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, +{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, +{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, +{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, +{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, +{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, +{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, +{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, +{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, +{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, +{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, +}; + static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, - int dstW, int chrDstW, int big_endian) + int dstW, int chrDstW, int big_endian, int output_bits) { //FIXME Optimize (just quickly written not optimized..) int i; - + int shift = 11 + 16 - output_bits; + +#define output_pixel(pos, val) \ + if (big_endian) { \ + if (output_bits == 16) { \ + AV_WB16(pos, av_clip_uint16(val >> shift)); \ + } else { \ + AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } \ + } else { \ + if (output_bits == 16) { \ + AV_WL16(pos, av_clip_uint16(val >> shift)); \ + } else { \ + AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } \ + } for (i = 0; i < dstW; i++) { - int val = 1 << 10; + int val = 1 << (26-output_bits); int j; for (j = 0; j < lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; - if (big_endian) { - AV_WB16(&dest[i], av_clip_uint16(val >> 11)); - } else { - AV_WL16(&dest[i], av_clip_uint16(val >> 11)); - } + output_pixel(&dest[i], val); } if (uDest) { for (i = 0; i < chrDstW; i++) { - int u = 1 << 10; - int v = 1 << 10; + int u = 1 << (26-output_bits); + int v = 1 << (26-output_bits); int j; for (j = 0; j < chrFilterSize; j++) { - u += chrSrc[j][i ] * chrFilter[j]; - v += chrSrc[j][i + VOFW] * chrFilter[j]; + u += chrUSrc[j][i] * chrFilter[j]; + v += chrVSrc[j][i] * chrFilter[j]; } - if (big_endian) { - AV_WB16(&uDest[i], av_clip_uint16(u >> 11)); - AV_WB16(&vDest[i], av_clip_uint16(v >> 11)); - } else { - AV_WL16(&uDest[i], av_clip_uint16(u >> 11)); - AV_WL16(&vDest[i], av_clip_uint16(v >> 11)); - } + output_pixel(&uDest[i], u); + output_pixel(&vDest[i], v); } } if (CONFIG_SWSCALE_ALPHA && aDest) { for (i = 0; i < dstW; i++) { - int val = 1 << 10; + int val = 1 << (26-output_bits); int j; for (j = 0; j < lumFilterSize; j++) val += alpSrc[j][i] * lumFilter[j]; - if (big_endian) { - AV_WB16(&aDest[i], av_clip_uint16(val >> 11)); - } else { - AV_WL16(&aDest[i], av_clip_uint16(val >> 11)); - } + output_pixel(&aDest[i], val); } } } +#define yuv2NBPS(bits, BE_LE, is_be) \ +static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \ + const int16_t **lumSrc, int lumFilterSize, \ + const int16_t *chrFilter, const int16_t **chrUSrc, \ + const int16_t **chrVSrc, \ + int chrFilterSize, const int16_t **alpSrc, \ + uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \ + uint16_t *aDest, int dstW, int chrDstW) \ +{ \ + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, \ + dest, uDest, vDest, aDest, \ + dstW, chrDstW, is_be, bits); \ +} +yuv2NBPS( 9, BE, 1); +yuv2NBPS( 9, LE, 0); +yuv2NBPS(10, BE, 1); +yuv2NBPS(10, LE, 0); +yuv2NBPS(16, BE, 1); +yuv2NBPS(16, LE, 0); + static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW, enum PixelFormat dstFormat) { - if (isBE(dstFormat)) { + if (isNBPS(dstFormat)) { + const int depth = av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1+1; yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, 1); + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + alpSrc, + dest, uDest, vDest, aDest, + dstW, chrDstW, isBE(dstFormat), depth); } else { - yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, 0); + if (isBE(dstFormat)) { + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + alpSrc, + dest, uDest, vDest, aDest, + dstW, chrDstW, 1, 16); + } else { + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + alpSrc, + dest, uDest, vDest, aDest, + dstW, chrDstW, 0, 16); + } } } static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW) { //FIXME Optimize (just quickly written not optimized..) @@ -368,8 +436,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int v=1<<18; int j; for (j=0; j>19); @@ -389,7 +457,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, } static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) { //FIXME Optimize (just quickly written not optimized..) @@ -412,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc int v=1<<18; int j; for (j=0; j>19); @@ -425,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc int v=1<<18; int j; for (j=0; j>19); @@ -450,8 +519,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc Y2 += lumSrc[j][i2+1] * lumFilter[j];\ }\ for (j=0; j>=19;\ Y2>>=19;\ @@ -488,9 +557,9 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \ for (i=0; i>=10;\ U >>=10;\ @@ -564,8 +633,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc const int i2= 2*i; \ int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \ int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \ - int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \ - int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \ + int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \ + int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \ type av_unused *r, *b, *g; \ int av_unused A1, A2; \ if (alpha) {\ @@ -590,8 +659,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc const int i2= 2*i;\ int Y1= buf0[i2 ]>>7;\ int Y2= buf0[i2+1]>>7;\ - int U= (uvbuf1[i ])>>7;\ - int V= (uvbuf1[i+VOFW])>>7;\ + int U= (ubuf1[i])>>7;\ + int V= (vbuf1[i])>>7;\ type av_unused *r, *b, *g;\ int av_unused A1, A2;\ if (alpha) {\ @@ -616,8 +685,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc const int i2= 2*i;\ int Y1= buf0[i2 ]>>7;\ int Y2= buf0[i2+1]>>7;\ - int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ - int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\ + int U= (ubuf0[i] + ubuf1[i])>>8;\ + int V= (vbuf0[i] + vbuf1[i])>>8;\ type av_unused *r, *b, *g;\ int av_unused A1, A2;\ if (alpha) {\ @@ -899,16 +968,20 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc break;\ } -static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, +static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y) { int i; YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C) } -static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, +static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y) { int i; @@ -1001,96 +1074,58 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, } } -static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width, - uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) { - int r = src[i*6+0]; - int g = src[i*6+2]; - int b = src[i*6+4]; - - dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } -} - -static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) -{ - int i; - assert(src1==src2); - for (i = 0; i < width; i++) { - int r = src1[6*i + 0]; - int g = src1[6*i + 2]; - int b = src1[6*i + 4]; - - dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } -} - -static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) -{ - int i; - assert(src1==src2); - for (i = 0; i < width; i++) { - int r= src1[12*i + 0] + src1[12*i + 6]; - int g= src1[12*i + 2] + src1[12*i + 8]; - int b= src1[12*i + 4] + src1[12*i + 10]; - - dstU[i]= (RU*r + GU*g + BU*b + (257<> (RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<> (RGB2YUV_SHIFT+1); - } -} - -static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width, - uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) { - int b = src[i*6+0]; - int g = src[i*6+2]; - int r = src[i*6+4]; - - dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } -} - -static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) { - int b = src1[6*i + 0]; - int g = src1[6*i + 2]; - int r = src1[6*i + 4]; - - dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - } +#define RGB48(name, R, B, READ)\ +static inline void name ## ToY(int16_t *dst, const uint16_t *src, int width, uint32_t *unused)\ +{\ + int i;\ + for (i = 0; i < width; i++) {\ + int r = READ(&src[i*3+R]);\ + int g = READ(&src[i*3+1]);\ + int b = READ(&src[i*3+B]);\ +\ + dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\ + }\ +}\ +\ +static inline void name ## ToUV(int16_t *dstU, int16_t *dstV,\ + const uint16_t *src1, const uint16_t *src2,\ + int width, uint32_t *unused)\ +{\ + int i;\ + assert(src1==src2);\ + for (i = 0; i < width; i++) {\ + int r = READ(&src1[3*i + R]);\ + int g = READ(&src1[3*i + 1]);\ + int b = READ(&src1[3*i + B]);\ +\ + dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\ + dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\ + }\ +}\ +\ +static inline void name ## ToUV_half(int16_t *dstU, int16_t *dstV,\ + const uint16_t *src1, const uint16_t *src2,\ + int width, uint32_t *unused)\ +{\ + int i;\ + assert(src1==src2);\ + for (i = 0; i < width; i++) {\ + int r= READ(&src1[6*i + R]) + READ(&src1[6*i + 3+R]);\ + int g= READ(&src1[6*i + 1]) + READ(&src1[6*i + 4]);\ + int b= READ(&src1[6*i + B]) + READ(&src1[6*i + 3+B]);\ +\ + dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);\ + dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);\ + }\ } -static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) -{ - int i; - for (i = 0; i < width; i++) { - int b= src1[12*i + 0] + src1[12*i + 6]; - int g= src1[12*i + 2] + src1[12*i + 8]; - int r= src1[12*i + 4] + src1[12*i + 10]; - - dstU[i]= (RU*r + GU*g + BU*b + (257<> (RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<> (RGB2YUV_SHIFT+1); - } -} +RGB48(rgb48LE, 0, 2, AV_RL16) +RGB48(rgb48BE, 0, 2, AV_RB16) +RGB48(bgr48LE, 2, 0, AV_RL16) +RGB48(bgr48BE, 2, 0, AV_RB16) #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\ -static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\ +static inline void name(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)\ {\ int i;\ for (i=0; i>shg)&maskg;\ int r= (((const type*)src)[i]>>shr)&maskr;\ \ - dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\ + dst[i]= (((RY)*r + (GY)*g + (BY)*b + (32<<((S)-1)) + (1<<(S-7)))>>((S)-6));\ }\ } @@ -1111,16 +1146,16 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8) BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7) -static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static inline void abgrToA(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i; for (i=0; i>shp)&maskg)>>shg;\ int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\ \ - dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\ - dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\ + dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\ + dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\ }\ }\ -static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\ +static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\ {\ int i;\ for (i=0; i>=shg;\ \ - dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\ - dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\ + dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\ + dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\ }\ } @@ -1159,214 +1194,114 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<< BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8) BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7) -static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal) +static inline void palToA(int16_t *dst, const uint8_t *src, int width, uint32_t *pal) { int i; for (i=0; i> 24)<<6; } } -static inline void palToUV(uint8_t *dstU, uint8_t *dstV, +static inline void palToY(int16_t *dst, const uint8_t *src, long width, uint32_t *pal) +{ + int i; + for (i=0; i>8; - dstV[i]= p>>16; + dstU[i]= (uint8_t)(p>> 8)<<6; + dstV[i]= (uint8_t)(p>>16)<<6; } } -static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static inline void monowhite2Y(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i, j; for (i=0; i>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } -static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static inline void monoblack2Y(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i, j; for (i=0; i>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } //Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one //Plain C versions -#if CONFIG_RUNTIME_CPUDETECT -# define COMPILE_C 1 -# if ARCH_X86 -# define COMPILE_MMX 1 -# define COMPILE_MMX2 1 -# define COMPILE_3DNOW 1 -# elif ARCH_PPC -# define COMPILE_ALTIVEC HAVE_ALTIVEC -# endif -#else /* CONFIG_RUNTIME_CPUDETECT */ -# if ARCH_X86 -# if HAVE_MMX2 -# define COMPILE_MMX2 1 -# elif HAVE_AMD3DNOW -# define COMPILE_3DNOW 1 -# elif HAVE_MMX -# define COMPILE_MMX 1 -# else -# define COMPILE_C 1 -# endif -# elif ARCH_PPC && HAVE_ALTIVEC -# define COMPILE_ALTIVEC 1 -# else -# define COMPILE_C 1 -# endif -#endif -#ifndef COMPILE_C -# define COMPILE_C 0 -#endif -#ifndef COMPILE_MMX -# define COMPILE_MMX 0 -#endif -#ifndef COMPILE_MMX2 -# define COMPILE_MMX2 0 -#endif -#ifndef COMPILE_3DNOW -# define COMPILE_3DNOW 0 -#endif -#ifndef COMPILE_ALTIVEC -# define COMPILE_ALTIVEC 0 -#endif - -#define COMPILE_TEMPLATE_MMX 0 #define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_ALTIVEC 0 -#if COMPILE_C -#define RENAME(a) a ## _C #include "swscale_template.c" -#endif -#if COMPILE_ALTIVEC +#if HAVE_ALTIVEC #undef RENAME #undef COMPILE_TEMPLATE_ALTIVEC #define COMPILE_TEMPLATE_ALTIVEC 1 #define RENAME(a) a ## _altivec -#include "swscale_template.c" +#include "ppc/swscale_template.c" #endif -#if ARCH_X86 - //MMX versions -#if COMPILE_MMX +#if HAVE_MMX #undef RENAME -#undef COMPILE_TEMPLATE_MMX #undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX 1 #define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 #define RENAME(a) a ## _MMX -#include "swscale_template.c" +#include "x86/swscale_template.c" #endif //MMX2 versions -#if COMPILE_MMX2 +#if HAVE_MMX2 #undef RENAME -#undef COMPILE_TEMPLATE_MMX #undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX 1 #define COMPILE_TEMPLATE_MMX2 1 -#define COMPILE_TEMPLATE_AMD3DNOW 0 #define RENAME(a) a ## _MMX2 -#include "swscale_template.c" +#include "x86/swscale_template.c" #endif -//3DNOW versions -#if COMPILE_3DNOW -#undef RENAME -#undef COMPILE_TEMPLATE_MMX -#undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX 1 -#define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNow -#include "swscale_template.c" -#endif - -#endif //ARCH_X86 - SwsFunc ff_getSwsFunc(SwsContext *c) { -#if CONFIG_RUNTIME_CPUDETECT - int flags = c->flags; + int cpu_flags = av_get_cpu_flags(); - int cpuflags = av_get_cpu_flags(); + sws_init_swScale_c(c); - flags |= (cpuflags & AV_CPU_FLAG_MMX ? SWS_CPU_CAPS_MMX : 0); - flags |= (cpuflags & AV_CPU_FLAG_MMX2 ? SWS_CPU_CAPS_MMX2 : 0); - flags |= (cpuflags & AV_CPU_FLAG_3DNOW ? SWS_CPU_CAPS_3DNOW : 0); - -#if ARCH_X86 - // ordered per speed fastest first - if (flags & SWS_CPU_CAPS_MMX2) { - sws_init_swScale_MMX2(c); - return swScale_MMX2; - } else if (flags & SWS_CPU_CAPS_3DNOW) { - sws_init_swScale_3DNow(c); - return swScale_3DNow; - } else if (flags & SWS_CPU_CAPS_MMX) { +#if HAVE_MMX + if (cpu_flags & AV_CPU_FLAG_MMX) sws_init_swScale_MMX(c); - return swScale_MMX; - } else { - sws_init_swScale_C(c); - return swScale_C; - } - -#else -#if COMPILE_ALTIVEC - if (flags & SWS_CPU_CAPS_ALTIVEC) { - sws_init_swScale_altivec(c); - return swScale_altivec; - } else { - sws_init_swScale_C(c); - return swScale_C; - } #endif - sws_init_swScale_C(c); - return swScale_C; -#endif /* ARCH_X86 */ -#else //CONFIG_RUNTIME_CPUDETECT -#if COMPILE_TEMPLATE_MMX2 - sws_init_swScale_MMX2(c); - return swScale_MMX2; -#elif COMPILE_TEMPLATE_AMD3DNOW - sws_init_swScale_3DNow(c); - return swScale_3DNow; -#elif COMPILE_TEMPLATE_MMX - sws_init_swScale_MMX(c); - return swScale_MMX; -#elif COMPILE_TEMPLATE_ALTIVEC - sws_init_swScale_altivec(c); - return swScale_altivec; -#else - sws_init_swScale_C(c); - return swScale_C; +#if HAVE_MMX2 + if (cpu_flags & AV_CPU_FLAG_MMX2) + sws_init_swScale_MMX2(c); +#endif +#if HAVE_ALTIVEC + if (cpu_flags & AV_CPU_FLAG_ALTIVEC) + sws_init_swScale_altivec(c); #endif -#endif //!CONFIG_RUNTIME_CPUDETECT + + return swScale_c; } static void copyPlane(const uint8_t *src, int srcStride, @@ -1496,24 +1431,24 @@ static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStrid return srcSliceH; } -static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) { - long i; + int i; for (i=0; isrcFormat; const enum PixelFormat dstFormat= c->dstFormat; - void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels, + void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)=NULL; int i; uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; const uint8_t *srcPtr= src[0]; - if (srcFormat == PIX_FMT_Y400A) { + if (srcFormat == PIX_FMT_GRAY8A) { switch (dstFormat) { case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; @@ -1557,7 +1492,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if (!conv) av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); else { for (i=0; isrcW, (uint8_t *) c->pal_rgb); @@ -1586,7 +1521,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], const int dstBpp= (c->dstFormatBpp + 7) >> 3; const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */ const int dstId= c->dstFormatBpp >> 2; - void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL; + void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL; #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst) @@ -1644,7 +1579,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if (!conv) { av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); } else { const uint8_t *srcPtr= src[0]; uint8_t *dstPtr= dst[0]; @@ -1654,7 +1589,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat)) dstPtr += ALT32_CORR; - if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0) + if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0 && !(srcStride[0]%srcBpp)) conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); else { int i; @@ -1726,6 +1661,28 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ return srcSliceH; } +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ + int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[src_depth-9][i&7];\ + for (j = 0; j < length-7; j+=8){\ + dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ + dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ + dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ + dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ + dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ + dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ + dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ + dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ + }\ + for (; j < length; j++)\ + dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ + dst += dstStride;\ + src += srcStride;\ + } + + static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) { @@ -1745,42 +1702,72 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ length*=2; fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); } else { - if(isNBPS(c->srcFormat)) { - const int depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; - uint16_t *srcPtr2 = (uint16_t*)srcPtr; - - if (is16BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t*)dstPtr; + if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) + || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) + ) { + const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; + const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; + const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; + + if (dst_depth == 8) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) + } + } else if (src_depth == 8) { for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) - dstPtr2[j] = (srcPtr2[j]<<(16-depth)) | (srcPtr2[j]>>(2*depth-16)); + if(isBE(c->dstFormat)){ + for (j = 0; j < length; j++) + AV_WB16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | + (srcPtr[j]>>(2*8-dst_depth))); + } else { + for (j = 0; j < length; j++) + AV_WL16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | + (srcPtr[j]>>(2*8-dst_depth))); + } dstPtr2 += dstStride[plane]/2; - srcPtr2 += srcStride[plane]/2; + srcPtr += srcStride[plane]; } - } else { - // FIXME Maybe dither instead. + } else if (src_depth <= dst_depth) { for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) - dstPtr[j] = srcPtr2[j]>>(depth-8); - dstPtr += dstStride[plane]; +#define COPY_UP(r,w) \ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ + (v>>(2*src_depth-dst_depth)));\ + } + if(isBE(c->srcFormat)){ + if(isBE(c->dstFormat)){ + COPY_UP(AV_RB16, AV_WB16) + } else { + COPY_UP(AV_RB16, AV_WL16) + } + } else { + if(isBE(c->dstFormat)){ + COPY_UP(AV_RL16, AV_WB16) + } else { + COPY_UP(AV_RL16, AV_WL16) + } + } + dstPtr2 += dstStride[plane]/2; srcPtr2 += srcStride[plane]/2; } - } - } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) { - if (!isBE(c->srcFormat)) srcPtr++; - for (i=0; isrcFormat) && is16BPS(c->dstFormat)) { - for (i=0; isrcFormat) == HAVE_BIGENDIAN){ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) + } + }else{ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) + } else { + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) + } } - srcPtr+= srcStride[plane]; - dstPtr+= dstStride[plane]; } } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && isBE(c->srcFormat) != isBE(c->dstFormat)) { @@ -1809,23 +1796,6 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ return srcSliceH; } -int ff_hardcodedcpuflags(void) -{ - int flags = 0; -#if COMPILE_TEMPLATE_MMX2 - flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2; -#elif COMPILE_TEMPLATE_AMD3DNOW - flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW; -#elif COMPILE_TEMPLATE_MMX - flags |= SWS_CPU_CAPS_MMX; -#elif COMPILE_TEMPLATE_ALTIVEC - flags |= SWS_CPU_CAPS_ALTIVEC; -#elif ARCH_BFIN - flags |= SWS_CPU_CAPS_BFIN; -#endif - return flags; -} - void ff_get_unscaled_swscale(SwsContext *c) { const enum PixelFormat srcFormat = c->srcFormat; @@ -1909,8 +1879,8 @@ void ff_get_unscaled_swscale(SwsContext *c) if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) c->swScale= uyvyToYuv422Wrapper; -#if COMPILE_ALTIVEC - if ((c->flags & SWS_CPU_CAPS_ALTIVEC) && +#if HAVE_ALTIVEC + if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->flags & SWS_BITEXACT) && srcFormat == PIX_FMT_YUV420P) { // unscaled YV12 -> packed YUV, we want speed @@ -1940,8 +1910,7 @@ void ff_get_unscaled_swscale(SwsContext *c) c->swScale= planarCopyWrapper; } #if ARCH_BFIN - if (flags & SWS_CPU_CAPS_BFIN) - ff_bfin_get_unscaled_swscale (c); + ff_bfin_get_unscaled_swscale (c); #endif } @@ -2006,9 +1975,10 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], if (usePal(c->srcFormat)) { for (i=0; i<256; i++) { - int p, r, g, b,y,u,v; + int p, r, g, b, y, u, v, a = 0xff; if(c->srcFormat == PIX_FMT_PAL8) { p=((const uint32_t*)(src[1]))[i]; + a= (p>>24)&0xFF; r= (p>>16)&0xFF; g= (p>> 8)&0xFF; b= p &0xFF; @@ -2024,7 +1994,7 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], r= (i>>3 )*255; g= ((i>>1)&3)*85; b= (i&1 )*255; - } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) { + } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { r = g = b = i; } else { assert(c->srcFormat == PIX_FMT_BGR4_BYTE); @@ -2035,33 +2005,33 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - c->pal_yuv[i]= y + (u<<8) + (v<<16); + c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); switch(c->dstFormat) { case PIX_FMT_BGR32: #if !HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i]= r + (g<<8) + (b<<16); + c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); break; case PIX_FMT_BGR32_1: #if HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif - c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8; + c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); break; case PIX_FMT_RGB32_1: #if HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8; + c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); break; case PIX_FMT_RGB32: #if !HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif default: - c->pal_rgb[i]= b + (g<<8) + (r<<16); + c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); } } } @@ -2115,18 +2085,18 @@ int sws_scale_ordered(SwsContext *c, const uint8_t* const src[], int srcStride[] #endif /* Convert the palette to the same packed 32-bit format as the palette */ -void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) { - long i; + int i; for (i=0; i dst format: ABC */ -void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) { - long i; + int i; for (i=0; i