/*
supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
- supported output formats: YV12, I420/IYUV, YUY2, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
+ supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
{BGR,RGB}{1,4,8,15,16} support dithering
unscaled special converters (YV12=I420=IYUV, Y800=Y8)
#else
#include <stdlib.h>
#endif
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
#include "swscale.h"
#include "swscale_internal.h"
#include "../cpudetect.h"
|| (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
-#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2\
+#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
|| isRGB(x) || isBGR(x)\
|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
int i;
for(i=0; i<dstW; i++)
{
- int val=0;
+ int val=1<<18;
int j;
for(j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
if(uDest != NULL)
for(i=0; i<chrDstW; i++)
{
- int u=0;
- int v=0;
+ int u=1<<18;
+ int v=1<<18;
int j;
for(j=0; j<chrFilterSize; j++)
{
#define YSCALE_YUV_2_PACKEDX_C(type) \
for(i=0; i<(dstW>>1); i++){\
int j;\
- int Y1=0;\
- int Y2=0;\
- int U=0;\
- int V=0;\
+ int Y1=1<<18;\
+ int Y2=1<<18;\
+ int U=1<<18;\
+ int V=1<<18;\
type *r, *b, *g;\
const int i2= 2*i;\
\
((uint8_t*)dest)[2*i2+3]= V;\
} \
break;\
+ case IMGFMT_UYVY:\
+ func2\
+ ((uint8_t*)dest)[2*i2+0]= U;\
+ ((uint8_t*)dest)[2*i2+1]= Y1;\
+ ((uint8_t*)dest)[2*i2+2]= V;\
+ ((uint8_t*)dest)[2*i2+3]= Y2;\
+ } \
+ break;\
}\
int acc=0;
for(i=0; i<dstW-1; i+=2){
int j;
- int Y1=0;
- int Y2=0;
+ int Y1=1<<18;
+ int Y2=1<<18;
for(j=0; j<lumFilterSize; j++)
{
((uint8_t*)dest)[2*i2+3]= V;
}
break;
+ case IMGFMT_UYVY:
+ YSCALE_YUV_2_PACKEDX_C(void)
+ ((uint8_t*)dest)[2*i2+0]= U;
+ ((uint8_t*)dest)[2*i2+1]= Y1;
+ ((uint8_t*)dest)[2*i2+2]= V;
+ ((uint8_t*)dest)[2*i2+3]= Y2;
+ }
+ break;
}
}
#define COMPILE_C
#endif
+#ifdef ARCH_POWERPC
+#ifdef HAVE_ALTIVEC
+#define COMPILE_ALTIVEC
+#endif //HAVE_ALTIVEC
+#endif //ARCH_POWERPC
+
#ifdef ARCH_X86
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
+#undef HAVE_ALTIVEC
#define RENAME(a) a ## _C
#include "swscale_template.c"
#endif
+#ifdef ARCH_POWERPC
+#ifdef COMPILE_ALTIVEC
+#undef RENAME
+#define HAVE_ALTIVEC
+#define RENAME(a) a ## _altivec
+#include "swscale_template.c"
+#endif
+#endif //ARCH_POWERPC
+
#ifdef ARCH_X86
//X86 versions
#endif //ARCH_X86
-// minor note: the HAVE_xyz is messed up after that line so dont use it
+// minor note: the HAVE_xyz is messed up after that line so don't use it
static double getSplineCoeff(double a, double b, double c, double d, double dist)
{
if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
- /* preserve Monotonicity because the core cant handle the filter otherwise */
+ /* preserve Monotonicity because the core can't handle the filter otherwise */
if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
// Move filter coeffs left
if(min>minFilterSize) minFilterSize= min;
}
+ if (flags & SWS_CPU_CAPS_ALTIVEC) {
+ // we can handle the special case 4,
+ // so we don't want to go to the full 8
+ if (minFilterSize < 5)
+ filterAlign = 4;
+
+ // we really don't want to waste our time
+ // doing useless computation, so fall-back on
+ // the scalar C code for very small filter.
+ // vectorizing is worth it only if you have
+ // decent-sized vector.
+ if (minFilterSize < 3)
+ filterAlign = 1;
+ }
+
ASSERT(minFilterSize > 0)
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
ASSERT(filterSize > 0)
for(i=0; i<dstW; i++)
{
int j;
+ double error=0;
double sum=0;
double scale= one;
+
for(j=0; j<filterSize; j++)
{
sum+= filter[i*filterSize + j];
scale/= sum;
for(j=0; j<*outFilterSize; j++)
{
- (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
+ double v= filter[i*filterSize + j]*scale + error;
+ int intV= floor(v + 0.5);
+ (*outFilter)[i*(*outFilterSize) + j]= intV;
+ error = v - intV;
}
}
}
#endif // ARCH_X86
-//FIXME remove
-void SwScale_Init(){
-}
-
static void globalInit(){
// generating tables:
int i;
return swScale_C;
#else
+#ifdef ARCH_POWERPC
+ if(flags & SWS_CPU_CAPS_ALTIVEC)
+ return swScale_altivec;
+ else
+ return swScale_C;
+#endif
return swScale_C;
#endif
#else //RUNTIME_CPUDETECT
return swScale_3DNow;
#elif defined (HAVE_MMX)
return swScale_MMX;
+#elif defined (HAVE_ALTIVEC)
+ return swScale_altivec;
#else
return swScale_C;
#endif
return srcSliceH;
}
+static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+ uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+ yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
+
+ return srcSliceH;
+}
+
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
sortedStride[1]= stride[2];
sortedStride[2]= stride[1];
}
- else if(isPacked(format) || isGray(format))
+ else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
{
sortedP[0]= p[0];
sortedP[1]=
SwsContext *c;
int i;
- int usesFilter;
+ int usesVFilter, usesHFilter;
int unscaled, needsDither;
int srcFormat, dstFormat;
SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
#endif
#ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
- flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW);
+ flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
#ifdef HAVE_MMX2
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
#elif defined (HAVE_3DNOW)
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
#elif defined (HAVE_MMX)
flags |= SWS_CPU_CAPS_MMX;
+#elif defined (HAVE_ALTIVEC)
+ flags |= SWS_CPU_CAPS_ALTIVEC;
#endif
#endif
if(clip_table[512] != 255) globalInit();
if(rgb15to16 == NULL) sws_rgb2rgb_init(flags);
- /* avoid dupplicate Formats, so we dont need to check to much */
+ /* avoid duplicate Formats, so we don't need to check to much */
srcFormat = remove_dup_fourcc(origSrcFormat);
dstFormat = remove_dup_fourcc(origDstFormat);
c->srcFormat= srcFormat;
c->origDstFormat= origDstFormat;
c->origSrcFormat= origSrcFormat;
-
- usesFilter=0;
- if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
- if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1;
- if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesFilter=1;
- if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesFilter=1;
- if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesFilter=1;
- if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesFilter=1;
- if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1;
- if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1;
+ c->vRounder= 4* 0x0001000100010001ULL;
+
+ usesHFilter= usesVFilter= 0;
+ if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
+ if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
+ if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
+ if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
+ if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
+ if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
+ if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
+ if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16);
/* unscaled special Cases */
- if(unscaled && !usesFilter)
+ if(unscaled && !usesHFilter && !usesVFilter)
{
/* yv12_to_nv12 */
if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
c->swScale= rgb2rgbWrapper;
/* yv12_to_yuy2 */
- if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_YUY2)
+ if(srcFormat == IMGFMT_YV12 &&
+ (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY))
{
- c->swScale= PlanarToYuy2Wrapper;
+ if (dstFormat == IMGFMT_YUY2)
+ c->swScale= PlanarToYuy2Wrapper;
+ else
+ c->swScale= PlanarToUyvyWrapper;
}
}
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
}
+ if(usesHFilter) c->canMMX2BeUsed=0;
}
else
c->canMMX2BeUsed=0;
c->lumXInc+= 20;
c->chrXInc+= 20;
}
- //we dont use the x86asm scaler if mmx is available
+ //we don't use the x86asm scaler if mmx is available
else if(flags & SWS_CPU_CAPS_MMX)
{
c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
/* precalculate horizontal scaler filter coefficients */
{
- const int filterAlign= (flags & SWS_CPU_CAPS_MMX) ? 4 : 1;
+ const int filterAlign=
+ (flags & SWS_CPU_CAPS_MMX) ? 4 :
+ (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+ 1;
initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
srcW , dstW, filterAlign, 1<<14,
srcFilter->chrH, dstFilter->chrH);
#ifdef ARCH_X86
-// cant downscale !!!
+// can't downscale !!!
if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
{
c->lumMmx2Filter = (int16_t*)memalign(8, (dstW /8+8)*sizeof(int16_t));
/* precalculate vertical scaler filter coefficients */
- initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
- srcH , dstH, 1, (1<<12)-4,
- (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
- srcFilter->lumV, dstFilter->lumV);
- initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
- c->chrSrcH, c->chrDstH, 1, (1<<12)-4,
- (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
- srcFilter->chrV, dstFilter->chrV);
-
- // Calculate Buffer Sizes so that they wont run out while handling these damn slices
+ {
+ const int filterAlign=
+ (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+ 1;
+
+ initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
+ srcH , dstH, filterAlign, (1<<12)-4,
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
+ srcFilter->lumV, dstFilter->lumV);
+ initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
+ c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+ srcFilter->chrV, dstFilter->chrV);
+ }
+
+ // Calculate Buffer Sizes so that they won't run out while handling these damn slices
c->vLumBufSize= c->vLumFilterSize;
c->vChrBufSize= c->vChrFilterSize;
for(i=0; i<dstH; i++)
int chrI= i*c->chrDstH / dstH;
int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
- nextSlice&= ~3; // Slices start at boundaries which are divisable through 4
+
+ nextSlice>>= c->chrSrcVSubSample;
+ nextSlice<<= c->chrSrcVSubSample;
if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
MSG_INFO("using 3DNOW\n");
else if(flags & SWS_CPU_CAPS_MMX)
MSG_INFO("using MMX\n");
- else
+ else if(flags & SWS_CPU_CAPS_ALTIVEC)
+ MSG_INFO("using AltiVec\n");
+ else
MSG_INFO("using C\n");
}
}
/**
- * swscale warper, so we dont need to export the SwsContext.
+ * swscale warper, so we don't need to export the SwsContext.
* assumes planar YUV to be in YUV order instead of YVU
*/
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
- return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+ //copy strides, so they can safely be modified
+ int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
+ int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
+ return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
}
/**
- * swscale warper, so we dont need to export the SwsContext
+ * swscale warper, so we don't need to export the SwsContext
*/
int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
//printf("sws: slice %d %d\n", srcSliceY, srcSliceH);
+
return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
}