#else
#include <stdlib.h>
#endif
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
#include "swscale.h"
#include "swscale_internal.h"
#include "../cpudetect.h"
#define COMPILE_C
#endif
+#ifdef ARCH_POWERPC
+#ifdef HAVE_ALTIVEC
+#define COMPILE_ALTIVEC
+#endif //HAVE_ALTIVEC
+#endif //ARCH_POWERPC
+
#ifdef ARCH_X86
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
+#undef HAVE_ALTIVEC
#define RENAME(a) a ## _C
#include "swscale_template.c"
#endif
+#ifdef ARCH_POWERPC
+#ifdef COMPILE_ALTIVEC
+#undef RENAME
+#define HAVE_ALTIVEC
+#define RENAME(a) a ## _altivec
+#include "swscale_template.c"
+#endif
+#endif //ARCH_POWERPC
+
#ifdef ARCH_X86
//X86 versions
if(min>minFilterSize) minFilterSize= min;
}
+ if (flags & SWS_CPU_CAPS_ALTIVEC) {
+ // we can handle the special case 4,
+ // so we don't want to go to the full 8
+ if (minFilterSize < 5)
+ filterAlign = 4;
+
+ // we really don't want to waste our time
+ // doing useless computation, so fall-back on
+ // the scalar C code for very small filter.
+ // vectorizing is worth it only if you have
+ // decent-sized vector.
+ if (minFilterSize < 3)
+ filterAlign = 1;
+ }
+
ASSERT(minFilterSize > 0)
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
ASSERT(filterSize > 0)
return swScale_C;
#else
+#ifdef ARCH_POWERPC
+ if(flags & SWS_CPU_CAPS_ALTIVEC)
+ return swScale_altivec;
+ else
+ return swScale_C;
+#endif
return swScale_C;
#endif
#else //RUNTIME_CPUDETECT
return swScale_3DNow;
#elif defined (HAVE_MMX)
return swScale_MMX;
+#elif defined (HAVE_ALTIVEC)
+ return swScale_altivec;
#else
return swScale_C;
#endif
SwsContext *c;
int i;
- int usesFilter;
+ int usesVFilter, usesHFilter;
int unscaled, needsDither;
int srcFormat, dstFormat;
SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
#endif
#ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
- flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW);
+ flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
#ifdef HAVE_MMX2
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
#elif defined (HAVE_3DNOW)
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
#elif defined (HAVE_MMX)
flags |= SWS_CPU_CAPS_MMX;
+#elif defined (HAVE_ALTIVEC)
+ flags |= SWS_CPU_CAPS_ALTIVEC;
#endif
#endif
if(clip_table[512] != 255) globalInit();
c->origSrcFormat= origSrcFormat;
c->vRounder= 4* 0x0001000100010001ULL;
- usesFilter=0;
- if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
- if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1;
- if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesFilter=1;
- if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesFilter=1;
- if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesFilter=1;
- if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesFilter=1;
- if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1;
- if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1;
+ usesHFilter= usesVFilter= 0;
+ if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
+ if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
+ if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
+ if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
+ if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
+ if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
+ if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
+ if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16);
/* unscaled special Cases */
- if(unscaled && !usesFilter)
+ if(unscaled && !usesHFilter && !usesVFilter)
{
/* yv12_to_nv12 */
if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
}
+ if(usesHFilter) c->canMMX2BeUsed=0;
}
else
c->canMMX2BeUsed=0;
/* precalculate horizontal scaler filter coefficients */
{
- const int filterAlign= (flags & SWS_CPU_CAPS_MMX) ? 4 : 1;
+ const int filterAlign=
+ (flags & SWS_CPU_CAPS_MMX) ? 4 :
+ (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+ 1;
initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
srcW , dstW, filterAlign, 1<<14,
/* precalculate vertical scaler filter coefficients */
- initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
- srcH , dstH, 1, (1<<12)-4,
- (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
- srcFilter->lumV, dstFilter->lumV);
- initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
- c->chrSrcH, c->chrDstH, 1, (1<<12)-4,
- (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
- srcFilter->chrV, dstFilter->chrV);
+ {
+ const int filterAlign=
+ (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+ 1;
+
+ initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
+ srcH , dstH, filterAlign, (1<<12)-4,
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
+ srcFilter->lumV, dstFilter->lumV);
+ initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
+ c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+ srcFilter->chrV, dstFilter->chrV);
+ }
// Calculate Buffer Sizes so that they won't run out while handling these damn slices
c->vLumBufSize= c->vLumFilterSize;
int chrI= i*c->chrDstH / dstH;
int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
- if(c->chrSrcVSubSample > 1)
- nextSlice&= ~3; // Slices start at boundaries which are divisable through 4
- else
- nextSlice&= ~1; // Slices start at boundaries which are divisable through 2
+
+ nextSlice>>= c->chrSrcVSubSample;
+ nextSlice<<= c->chrSrcVSubSample;
if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
MSG_INFO("using 3DNOW\n");
else if(flags & SWS_CPU_CAPS_MMX)
MSG_INFO("using MMX\n");
- else
+ else if(flags & SWS_CPU_CAPS_ALTIVEC)
+ MSG_INFO("using AltiVec\n");
+ else
MSG_INFO("using C\n");
}
*/
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
- return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+ //copy strides, so they can safely be modified
+ int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
+ int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
+ return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
}
/**