/*
- Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at>
+ Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#define YSCALEYUV2YV12X(x, offset) \
"xorl %%eax, %%eax \n\t"\
- "pxor %%mm3, %%mm3 \n\t"\
- "pxor %%mm4, %%mm4 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+ "movq %%mm3, %%mm4 \n\t"\
"leal " offset "(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
".balign 16 \n\t" /* FIXME Unroll? */\
MOVNTQ(%%mm3, (%1, %%eax))\
"addl $8, %%eax \n\t"\
"cmpl %2, %%eax \n\t"\
- "pxor %%mm3, %%mm3 \n\t"\
- "pxor %%mm4, %%mm4 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+ "movq %%mm3, %%mm4 \n\t"\
"leal " offset "(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
"jb 1b \n\t"
"1: \n\t"\
"leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
- "pxor %%mm3, %%mm3 \n\t"\
- "pxor %%mm4, %%mm4 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+ "movq %%mm3, %%mm4 \n\t"\
".balign 16 \n\t"\
"2: \n\t"\
"movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
\
"leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
"movl (%%edx), %%esi \n\t"\
- "pxor %%mm1, %%mm1 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
+ "movq %%mm1, %%mm7 \n\t"\
".balign 16 \n\t"\
"2: \n\t"\
"movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
{
- int dummy=0;
#ifdef HAVE_MMX
if(uDest != NULL)
{
int uvalpha1=uvalpha^4095;
int i;
-#if 0 //isnt used
+#if 0 //isn't used
if(flags&SWS_FULL_CHR_H_INT)
{
switch(dstFormat)
#ifdef HAVE_MMX
switch(c->dstFormat)
{
-//Note 8280 == DSTW_OFFSET but the preprocessor cant handle that there :(
+//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
case IMGFMT_BGR32:
asm volatile(
"movl %%esp, "ESP_OFFSET"(%5) \n\t"
int i;
for(i=0; i<width; i++)
{
- int b= src[i*4+0];
- int g= src[i*4+1];
- int r= src[i*4+2];
+ int b= ((uint32_t*)src)[i]&0xFF;
+ int g= (((uint32_t*)src)[i]>>8)&0xFF;
+ int r= (((uint32_t*)src)[i]>>16)&0xFF;
- dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+ dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
}
#endif
}
int i;
for(i=0; i<width; i++)
{
- int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4];
- int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5];
- int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6];
+ const int a= ((uint32_t*)src1)[2*i+0];
+ const int e= ((uint32_t*)src1)[2*i+1];
+ const int c= ((uint32_t*)src2)[2*i+0];
+ const int d= ((uint32_t*)src2)[2*i+1];
+ const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
+ const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
+ const int b= l&0x3FF;
+ const int g= h>>8;
+ const int r= l>>16;
dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
int g= src[i*3+1];
int r= src[i*3+2];
- dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+ dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
}
#endif
}
int i;
for(i=0; i<width; i++)
{
- int d= src[i*2] + (src[i*2+1]<<8);
+ int d= ((uint16_t*)src)[i];
int b= d&0x1F;
int g= (d>>5)&0x3F;
int r= (d>>11)&0x1F;
int i;
for(i=0; i<width; i++)
{
-#if 1
- int d0= le2me_32( ((uint32_t*)src1)[i] );
- int d1= le2me_32( ((uint32_t*)src2)[i] );
+ int d0= ((uint32_t*)src1)[i];
+ int d1= ((uint32_t*)src2)[i];
int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F);
int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F);
int b= d&0x7F;
int r= (d>>11)&0x7F;
int g= d>>21;
-#else
- int d0= src1[i*4] + (src1[i*4+1]<<8);
- int b0= d0&0x1F;
- int g0= (d0>>5)&0x3F;
- int r0= (d0>>11)&0x1F;
-
- int d1= src1[i*4+2] + (src1[i*4+3]<<8);
- int b1= d1&0x1F;
- int g1= (d1>>5)&0x3F;
- int r1= (d1>>11)&0x1F;
-
- int d2= src2[i*4] + (src2[i*4+1]<<8);
- int b2= d2&0x1F;
- int g2= (d2>>5)&0x3F;
- int r2= (d2>>11)&0x1F;
-
- int d3= src2[i*4+2] + (src2[i*4+3]<<8);
- int b3= d3&0x1F;
- int g3= (d3>>5)&0x3F;
- int r3= (d3>>11)&0x1F;
-
- int b= b0 + b1 + b2 + b3;
- int g= g0 + g1 + g2 + g3;
- int r= r0 + r1 + r2 + r3;
-#endif
dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
}
int i;
for(i=0; i<width; i++)
{
- int d= src[i*2] + (src[i*2+1]<<8);
+ int d= ((uint16_t*)src)[i];
int b= d&0x1F;
int g= (d>>5)&0x1F;
int r= (d>>10)&0x1F;
int i;
for(i=0; i<width; i++)
{
-#if 1
- int d0= le2me_32( ((uint32_t*)src1)[i] );
- int d1= le2me_32( ((uint32_t*)src2)[i] );
+ int d0= ((uint32_t*)src1)[i];
+ int d1= ((uint32_t*)src2)[i];
int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F);
int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F);
int b= d&0x7F;
int r= (d>>10)&0x7F;
int g= d>>21;
-#else
- int d0= src1[i*4] + (src1[i*4+1]<<8);
- int b0= d0&0x1F;
- int g0= (d0>>5)&0x1F;
- int r0= (d0>>10)&0x1F;
-
- int d1= src1[i*4+2] + (src1[i*4+3]<<8);
- int b1= d1&0x1F;
- int g1= (d1>>5)&0x1F;
- int r1= (d1>>10)&0x1F;
-
- int d2= src2[i*4] + (src2[i*4+1]<<8);
- int b2= d2&0x1F;
- int g2= (d2>>5)&0x1F;
- int r2= (d2>>10)&0x1F;
-
- int d3= src2[i*4+2] + (src2[i*4+3]<<8);
- int b3= d3&0x1F;
- int g3= (d3>>5)&0x1F;
- int r3= (d3>>10)&0x1F;
-
- int b= b0 + b1 + b2 + b3;
- int g= g0 + g1 + g2 + g3;
- int r= r0 + r1 + r2 + r3;
-#endif
dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
}
int i;
for(i=0; i<width; i++)
{
- int r= src[i*4+0];
- int g= src[i*4+1];
- int b= src[i*4+2];
+ int r= ((uint32_t*)src)[i]&0xFF;
+ int g= (((uint32_t*)src)[i]>>8)&0xFF;
+ int b= (((uint32_t*)src)[i]>>16)&0xFF;
- dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+ dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
}
}
int i;
for(i=0; i<width; i++)
{
- int r= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4];
- int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5];
- int b= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6];
+ const int a= ((uint32_t*)src1)[2*i+0];
+ const int e= ((uint32_t*)src1)[2*i+1];
+ const int c= ((uint32_t*)src2)[2*i+0];
+ const int d= ((uint32_t*)src2)[2*i+1];
+ const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
+ const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
+ const int r= l&0x3FF;
+ const int g= h>>8;
+ const int b= l>>16;
dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
int g= src[i*3+1];
int b= src[i*3+2];
- dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+ dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
}
}
int16_t *filter, int16_t *filterPos, int filterSize)
{
#ifdef HAVE_MMX
+ assert(filterSize % 4 == 0 && filterSize>0);
if(filterSize==4) // allways true for upscaling, sometimes for down too
{
int counter= -2*dstW;
}
#ifdef HAVE_MMX
- // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
+ // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
#else
if(!(flags&SWS_FAST_BILINEAR))
: "%eax", "%ebx", "%ecx", "%edi", "%esi"
);
#ifdef HAVE_MMX2
- } //if MMX2 cant be used
+ } //if MMX2 can't be used
#endif
#else
int i;
}
#ifdef HAVE_MMX
- // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
+ // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
#else
if(!(flags&SWS_FAST_BILINEAR))
: "%eax", "%ebx", "%ecx", "%edi", "%esi"
);
#ifdef HAVE_MMX2
- } //if MMX2 cant be used
+ } //if MMX2 can't be used
#endif
#else
int i;
}
}
-static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
- int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
+static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[]){
/* load a few things into local vars to make the code more readable? and faster */
const int srcW= c->srcW;
uint8_t *formatConvBuffer= c->formatConvBuffer;
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
+ int lastDstY;
/* vars whch will change and which we need to storw back in the context */
int dstY= c->dstY;
int chrBufIndex= c->chrBufIndex;
int lastInLumBuf= c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf;
- int srcStride[3];
- int dstStride[3];
- uint8_t *src[3];
- uint8_t *dst[3];
- orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam);
- orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam);
-
if(isPacked(c->srcFormat)){
src[0]=
src[1]=
- src[2]= srcParam[0];
+ src[2]= src[0];
srcStride[0]=
srcStride[1]=
- srcStride[2]= srcStrideParam[0];
+ srcStride[2]= srcStride[0];
}
srcStride[1]<<= c->vChrDrop;
srcStride[2]<<= c->vChrDrop;
static int firstTime=1; //FIXME move this into the context perhaps
if(flags & SWS_PRINT_INFO && firstTime)
{
- mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: Warning: dstStride is not aligned!\n"
+ MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
"SwScaler: ->cannot do aligned memory acesses anymore\n");
firstTime=0;
}
lastInChrBuf= -1;
}
+ lastDstY= dstY;
+
for(;dstY < dstH; dstY++){
unsigned char *dest =dst[0]+dstStride[0]*dstY;
const int chrDstY= dstY>>c->chrDstVSubSample;
const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
+//printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
+// dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
//handle holes (FAST_BILINEAR & weird filters)
if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
//wrap buf index around to stay inside the ring buffer
if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
- break; //we cant output a dstY line so lets try with the next slice
+ break; //we can't output a dstY line so let's try with the next slice
}
#ifdef HAVE_MMX
}
}
}
- else // hmm looks like we cant use MMX here without overwriting this arrays tail
+ else // hmm looks like we can't use MMX here without overwriting this array's tail
{
int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
c->chrBufIndex= chrBufIndex;
c->lastInLumBuf= lastInLumBuf;
c->lastInChrBuf= lastInChrBuf;
+
+ return dstY - lastDstY;
}