1000l

[ffmpeg] / postproc / swscale_template.c
diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c

index d4353586d9c3836fb3fcb8f2010b13be627fd67d..ebdb6c33d11d2dba274fb39bbc988d753b038b0d 100644 (file)
--- a/postproc/swscale_template.c
+++ b/postproc/swscale_template.c
@@ -1,5 +1,5 @@
  /*
-    Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at>
+    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  
      This program is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published by
@@ -61,8 +61,8 @@
  
  #define YSCALEYUV2YV12X(x, offset) \
                         "xorl %%eax, %%eax              \n\t"\
-                       "pxor %%mm3, %%mm3              \n\t"\
-                       "pxor %%mm4, %%mm4              \n\t"\
+                       "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+                       "movq %%mm3, %%mm4              \n\t"\
                         "leal " offset "(%0), %%edx     \n\t"\
                         "movl (%%edx), %%esi            \n\t"\
                         ".balign 16                     \n\t" /* FIXME Unroll? */\
@@ -84,8 +84,8 @@
                         MOVNTQ(%%mm3, (%1, %%eax))\
                         "addl $8, %%eax                 \n\t"\
                         "cmpl %2, %%eax                 \n\t"\
-                       "pxor %%mm3, %%mm3              \n\t"\
-                       "pxor %%mm4, %%mm4              \n\t"\
+                       "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+                       "movq %%mm3, %%mm4              \n\t"\
                         "leal " offset "(%0), %%edx     \n\t"\
                         "movl (%%edx), %%esi            \n\t"\
                         "jb 1b                          \n\t"
@@ -117,8 +117,8 @@
                 "1:                             \n\t"\
                 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx        \n\t"\
                 "movl (%%edx), %%esi            \n\t"\
-               "pxor %%mm3, %%mm3              \n\t"\
-               "pxor %%mm4, %%mm4              \n\t"\
+               "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
+               "movq %%mm3, %%mm4              \n\t"\
                 ".balign 16                     \n\t"\
                 "2:                             \n\t"\
                 "movq 8(%%edx), %%mm0           \n\t" /* filterCoeff */\
@@ -135,8 +135,8 @@
  \
                 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx        \n\t"\
                 "movl (%%edx), %%esi            \n\t"\
-               "pxor %%mm1, %%mm1              \n\t"\
-               "pxor %%mm7, %%mm7              \n\t"\
+               "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
+               "movq %%mm1, %%mm7              \n\t"\
                 ".balign 16                     \n\t"\
                 "2:                             \n\t"\
                 "movq 8(%%edx), %%mm0           \n\t" /* filterCoeff */\
@@ -741,7 +741,6 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t *
                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
  {
-       int dummy=0;
  #ifdef HAVE_MMX
         if(uDest != NULL)
         {
@@ -951,7 +950,7 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *
         int uvalpha1=uvalpha^4095;
         int i;
  
-#if 0 //isnt used
+#if 0 //isn't used
         if(flags&SWS_FULL_CHR_H_INT)
         {
                 switch(dstFormat)
@@ -1170,7 +1169,7 @@ FULL_YSCALEYUV2RGB
  #ifdef HAVE_MMX
         switch(c->dstFormat)
         {
-//Note 8280 == DSTW_OFFSET but the preprocessor cant handle that there :(
+//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
         case IMGFMT_BGR32:
                         asm volatile(
                                 "movl %%esp, "ESP_OFFSET"(%5)           \n\t"
@@ -1584,11 +1583,11 @@ static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
         int i;
         for(i=0; i<width; i++)
         {
-               int b= src[i*4+0];
-               int g= src[i*4+1];
-               int r= src[i*4+2];
+               int b=  ((uint32_t*)src)[i]&0xFF;
+               int g= (((uint32_t*)src)[i]>>8)&0xFF;
+               int r= (((uint32_t*)src)[i]>>16)&0xFF;
  
-               dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+               dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
         }
  #endif
  }
@@ -1600,9 +1599,15 @@ static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
         int i;
         for(i=0; i<width; i++)
         {
-               int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4];
-               int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5];
-               int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6];
+               const int a= ((uint32_t*)src1)[2*i+0];
+               const int e= ((uint32_t*)src1)[2*i+1];
+               const int c= ((uint32_t*)src2)[2*i+0];
+               const int d= ((uint32_t*)src2)[2*i+1];
+               const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
+               const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
+               const int b=  l&0x3FF;
+               const int g=  h>>8;
+               const int r=  l>>16;
  
                 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
                 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
@@ -1690,7 +1695,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width)
                 int g= src[i*3+1];
                 int r= src[i*3+2];
  
-               dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+               dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
         }
  #endif
  }
@@ -1866,7 +1871,7 @@ static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
         int i;
         for(i=0; i<width; i++)
         {
-               int d= src[i*2] + (src[i*2+1]<<8);
+               int d= ((uint16_t*)src)[i];
                 int b= d&0x1F;
                 int g= (d>>5)&0x3F;
                 int r= (d>>11)&0x1F;
@@ -1880,9 +1885,8 @@ static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
         int i;
         for(i=0; i<width; i++)
         {
-#if 1
-               int d0= le2me_32( ((uint32_t*)src1)[i] );
-               int d1= le2me_32( ((uint32_t*)src2)[i] );
+               int d0= ((uint32_t*)src1)[i];
+               int d1= ((uint32_t*)src2)[i];
                 
                 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F);
                 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F);
@@ -1893,31 +1897,6 @@ static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
                 int b= d&0x7F;
                 int r= (d>>11)&0x7F;
                 int g= d>>21;
-#else
-               int d0= src1[i*4] + (src1[i*4+1]<<8);
-               int b0= d0&0x1F;
-               int g0= (d0>>5)&0x3F;
-               int r0= (d0>>11)&0x1F;
-
-               int d1= src1[i*4+2] + (src1[i*4+3]<<8);
-               int b1= d1&0x1F;
-               int g1= (d1>>5)&0x3F;
-               int r1= (d1>>11)&0x1F;
-
-               int d2= src2[i*4] + (src2[i*4+1]<<8);
-               int b2= d2&0x1F;
-               int g2= (d2>>5)&0x3F;
-               int r2= (d2>>11)&0x1F;
-
-               int d3= src2[i*4+2] + (src2[i*4+3]<<8);
-               int b3= d3&0x1F;
-               int g3= (d3>>5)&0x3F;
-               int r3= (d3>>11)&0x1F;
-
-               int b= b0 + b1 + b2 + b3;
-               int g= g0 + g1 + g2 + g3;
-               int r= r0 + r1 + r2 + r3;
-#endif
                 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
                 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
         }
@@ -1928,7 +1907,7 @@ static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width)
         int i;
         for(i=0; i<width; i++)
         {
-               int d= src[i*2] + (src[i*2+1]<<8);
+               int d= ((uint16_t*)src)[i];
                 int b= d&0x1F;
                 int g= (d>>5)&0x1F;
                 int r= (d>>10)&0x1F;
@@ -1942,9 +1921,8 @@ static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
         int i;
         for(i=0; i<width; i++)
         {
-#if 1
-               int d0= le2me_32( ((uint32_t*)src1)[i] );
-               int d1= le2me_32( ((uint32_t*)src2)[i] );
+               int d0= ((uint32_t*)src1)[i];
+               int d1= ((uint32_t*)src2)[i];
                 
                 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F);
                 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F);
@@ -1955,31 +1933,6 @@ static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
                 int b= d&0x7F;
                 int r= (d>>10)&0x7F;
                 int g= d>>21;
-#else
-               int d0= src1[i*4] + (src1[i*4+1]<<8);
-               int b0= d0&0x1F;
-               int g0= (d0>>5)&0x1F;
-               int r0= (d0>>10)&0x1F;
-
-               int d1= src1[i*4+2] + (src1[i*4+3]<<8);
-               int b1= d1&0x1F;
-               int g1= (d1>>5)&0x1F;
-               int r1= (d1>>10)&0x1F;
-
-               int d2= src2[i*4] + (src2[i*4+1]<<8);
-               int b2= d2&0x1F;
-               int g2= (d2>>5)&0x1F;
-               int r2= (d2>>10)&0x1F;
-
-               int d3= src2[i*4+2] + (src2[i*4+3]<<8);
-               int b3= d3&0x1F;
-               int g3= (d3>>5)&0x1F;
-               int r3= (d3>>10)&0x1F;
-
-               int b= b0 + b1 + b2 + b3;
-               int g= g0 + g1 + g2 + g3;
-               int r= r0 + r1 + r2 + r3;
-#endif
                 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
                 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
         }
@@ -1991,11 +1944,11 @@ static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
         int i;
         for(i=0; i<width; i++)
         {
-               int r= src[i*4+0];
-               int g= src[i*4+1];
-               int b= src[i*4+2];
+               int r=  ((uint32_t*)src)[i]&0xFF;
+               int g= (((uint32_t*)src)[i]>>8)&0xFF;
+               int b= (((uint32_t*)src)[i]>>16)&0xFF;
  
-               dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+               dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
         }
  }
  
@@ -2004,9 +1957,15 @@ static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
         int i;
         for(i=0; i<width; i++)
         {
-               int r= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4];
-               int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5];
-               int b= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6];
+               const int a= ((uint32_t*)src1)[2*i+0];
+               const int e= ((uint32_t*)src1)[2*i+1];
+               const int c= ((uint32_t*)src2)[2*i+0];
+               const int d= ((uint32_t*)src2)[2*i+1];
+               const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
+               const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
+               const int r=  l&0x3FF;
+               const int g=  h>>8;
+               const int b=  l>>16;
  
                 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
                 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
@@ -2022,7 +1981,7 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
                 int g= src[i*3+1];
                 int b= src[i*3+2];
  
-               dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+               dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
         }
  }
  
@@ -2046,6 +2005,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
                                   int16_t *filter, int16_t *filterPos, int filterSize)
  {
  #ifdef HAVE_MMX
+       assert(filterSize % 4 == 0 && filterSize>0);
         if(filterSize==4) // allways true for upscaling, sometimes for down too
         {
                 int counter= -2*dstW;
@@ -2251,7 +2211,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
      }
  
  #ifdef HAVE_MMX
-       // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
+       // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
      if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
  #else
      if(!(flags&SWS_FAST_BILINEAR))
@@ -2343,7 +2303,7 @@ FUNNY_Y_CODE
                 : "%eax", "%ebx", "%ecx", "%edi", "%esi"
                 );
  #ifdef HAVE_MMX2
-       } //if MMX2 cant be used
+       } //if MMX2 can't be used
  #endif
  #else
         int i;
@@ -2419,7 +2379,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, u
      }
  
  #ifdef HAVE_MMX
-       // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
+       // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
      if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
  #else
      if(!(flags&SWS_FAST_BILINEAR))
@@ -2522,7 +2482,7 @@ FUNNY_UV_CODE
                 : "%eax", "%ebx", "%ecx", "%edi", "%esi"
                 );
  #ifdef HAVE_MMX2
-       } //if MMX2 cant be used
+       } //if MMX2 can't be used
  #endif
  #else
         int i;
@@ -2543,8 +2503,8 @@ FUNNY_UV_CODE
     }
  }
  
-static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
-             int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
+static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+             int srcSliceH, uint8_t* dst[], int dstStride[]){
  
         /* load a few things into local vars to make the code more readable? and faster */
         const int srcW= c->srcW;
@@ -2581,6 +2541,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
         uint8_t *formatConvBuffer= c->formatConvBuffer;
         const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
         const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
+       int lastDstY;
  
         /* vars whch will change and which we need to storw back in the context */
         int dstY= c->dstY;
@@ -2588,21 +2549,14 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
         int chrBufIndex= c->chrBufIndex;
         int lastInLumBuf= c->lastInLumBuf;
         int lastInChrBuf= c->lastInChrBuf;
-       int srcStride[3];
-       int dstStride[3];
-       uint8_t *src[3];
-       uint8_t *dst[3];
         
-       orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam);
-       orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam);
-
         if(isPacked(c->srcFormat)){
                 src[0]=
                 src[1]=
-               src[2]= srcParam[0];
+               src[2]= src[0];
                 srcStride[0]=
                 srcStride[1]=
-               srcStride[2]= srcStrideParam[0];
+               srcStride[2]= srcStride[0];
         }
         srcStride[1]<<= c->vChrDrop;
         srcStride[2]<<= c->vChrDrop;
@@ -2628,7 +2582,7 @@ i--;
                 static int firstTime=1; //FIXME move this into the context perhaps
                 if(flags & SWS_PRINT_INFO && firstTime)
                 {
-                       mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: Warning: dstStride is not aligned!\n"
+                       MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
                                         "SwScaler:          ->cannot do aligned memory acesses anymore\n");
                         firstTime=0;
                 }
@@ -2644,6 +2598,8 @@ i--;
                 lastInChrBuf= -1;
         }
  
+       lastDstY= dstY;
+
         for(;dstY < dstH; dstY++){
                 unsigned char *dest =dst[0]+dstStride[0]*dstY;
                 const int chrDstY= dstY>>c->chrDstVSubSample;
@@ -2655,6 +2611,8 @@ i--;
                 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
                 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
  
+//printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
+// dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize,  c->chrSrcVSubSample);
                 //handle holes (FAST_BILINEAR & weird filters)
                 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
                 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
@@ -2742,7 +2700,7 @@ i--;
                         //wrap buf index around to stay inside the ring buffer
                         if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
                         if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
-                       break; //we cant output a dstY line so lets try with the next slice
+                       break; //we can't output a dstY line so let's try with the next slice
                 }
  
  #ifdef HAVE_MMX
@@ -2816,7 +2774,7 @@ i--;
                         }
                 }
              }
-           else // hmm looks like we cant use MMX here without overwriting this arrays tail
+           else // hmm looks like we can't use MMX here without overwriting this array's tail
             {
                 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
                 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
@@ -2851,4 +2809,6 @@ i--;
         c->chrBufIndex= chrBufIndex;
         c->lastInLumBuf= lastInLumBuf;
         c->lastInChrBuf= lastInChrBuf;
+
+       return dstY - lastDstY;
  }