#if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)) {
long p= 4;
- uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
+ const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
uint8_t *dst[4]= {aDest, dest, uDest, vDest};
x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
: "%r8"
);
#else
- *(uint16_t **)(&c->u_temp)=abuf0;
- *(uint16_t **)(&c->v_temp)=abuf1;
+ *(const uint16_t **)(&c->u_temp)=abuf0;
+ *(const uint16_t **)(&c->v_temp)=abuf1;
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
#endif
);
} else {
- uint8_t *offset = src+filterSize;
+ const uint8_t *offset = src+filterSize;
x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2;
filterPos-= counter/2;
{
#if ARCH_X86 && CONFIG_GPL
#if COMPILE_TEMPLATE_MMX2
- int32_t *mmx2FilterPos = c->lumMmx2FilterPos;
- int16_t *mmx2Filter = c->lumMmx2Filter;
+ int32_t *filterPos = c->hLumFilterPos;
+ int16_t *filter = c->hLumFilter;
int canMMX2BeUsed = c->canMMX2BeUsed;
void *mmx2FilterCode= c->lumMmx2FilterCode;
int i;
#if defined(PIC)
"mov %5, %%"REG_b" \n\t"
#endif
- :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
+ :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode)
#if defined(PIC)
,"m" (ebxsave)
src= formatConvBuffer;
}
- if (!c->hyscale_fast)
- {
+ if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
{
#if ARCH_X86 && CONFIG_GPL
#if COMPILE_TEMPLATE_MMX2
- int32_t *mmx2FilterPos = c->chrMmx2FilterPos;
- int16_t *mmx2Filter = c->chrMmx2Filter;
+ int32_t *filterPos = c->hChrFilterPos;
+ int16_t *filter = c->hChrFilter;
int canMMX2BeUsed = c->canMMX2BeUsed;
void *mmx2FilterCode= c->chrMmx2FilterCode;
int i;
#if defined(PIC)
"mov %6, %%"REG_b" \n\t"
#endif
- :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
+ :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode), "m" (src2)
#if defined(PIC)
,"m" (ebxsave)
src2= formatConvBuffer+VOFW;
}
- if (!c->hcscale_fast)
- {
+ if (!c->hcscale_fast) {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
} else { // fast bilinear upscale / crap downscale
#define DEBUG_SWSCALE_BUFFERS 0
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
-static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])
{
/* load a few things into local vars to make the code more readable? and faster */
int16_t *hChrFilter= c->hChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
int32_t *chrMmxFilter= c->chrMmxFilter;
- int32_t *alpMmxFilter= c->alpMmxFilter;
+ int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize;
const int hLumFilterSize= c->hLumFilterSize;
unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+ const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
+ int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
int enough_lines;
assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
- // Do we have enough lines in this slice to output the dstY line
- enough_lines = lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
- if (!enough_lines) {
- lastLumSrcY = srcSliceY + srcSliceH - 1;
- lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
- }
-
DEBUG_BUFFERS("dstY: %d\n", dstY);
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
firstLumSrcY, lastLumSrcY, lastInLumBuf);
DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
firstChrSrcY, lastChrSrcY, lastInChrBuf);
+ // Do we have enough lines in this slice to output the dstY line
+ enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
+
+ if (!enough_lines) {
+ lastLumSrcY = srcSliceY + srcSliceH - 1;
+ lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
+ DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
+ lastLumSrcY, lastChrSrcY);
+ }
+
//Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) {
- uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
- uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
+ const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
+ const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
lumBufIndex++;
- DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
- lumBufIndex, lastInLumBuf);
assert(lumBufIndex < 2*vLumBufSize);
assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
assert(lastInLumBuf + 1 - srcSliceY >= 0);
formatConvBuffer,
pal, 1);
lastInLumBuf++;
+ DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
+ lumBufIndex, lastInLumBuf);
}
while(lastInChrBuf < lastChrSrcY) {
- uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
- uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
+ const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
+ const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++;
- DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
- chrBufIndex, lastInChrBuf);
assert(chrBufIndex < 2*vChrBufSize);
assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
formatConvBuffer,
pal);
lastInChrBuf++;
+ DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
+ chrBufIndex, lastInChrBuf);
}
//wrap buf index around to stay inside the ring buffer
if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2) {
- *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
- *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
+ *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
+ *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
- *(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
- *(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
+ *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
+ *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
}
}
for (i=0; i<vChrFilterSize; i+=2) {
- *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
- *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
+ *(const void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
+ *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat);
} else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
- int16_t *lumBuf = lumSrcPtr[0];
- int16_t *chrBuf= chrSrcPtr[0];
- int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
+ const int16_t *lumBuf = lumSrcPtr[0];
+ const int16_t *chrBuf= chrSrcPtr[0];
+ const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
} else { //General YV12
c->yuv2yuvX(c,
break;
}
- if (c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
+ if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
if (c->srcRange) {
c->lumConvertRange = RENAME(lumRangeFromJpeg);
c->chrConvertRange = RENAME(chrRangeFromJpeg);