git.sesse.net Git - ffmpeg/blob - postproc/swscale_template.c

   1
   2 // Software scaling and colorspace conversion routines for MPlayer
   3
   4 // temporary storage for 4 yuv lines:
   5 static unsigned int pix_buf_y[4][2048];
   6 static unsigned int pix_buf_uv[2][2048*2];
   7
   8 // clipping helper table for C implementations:
   9 static unsigned char clip_table[768];
  10
  11 // yuv->rgb conversion tables:
  12 static    int yuvtab_2568[256];
  13 static    int yuvtab_3343[256];
  14 static    int yuvtab_0c92[256];
  15 static    int yuvtab_1a1e[256];
  16 static    int yuvtab_40cf[256];
  17
  18 // *** bilinear scaling and yuv->rgb conversion of yv12 slices:
  19 // *** Note: it's called multiple times while decoding a frame, first time y==0
  20 // *** Designed to upscale, but may work for downscale too.
  21 // s_xinc = (src_width << 8) / dst_width
  22 // s_yinc = (src_height << 16) / dst_height
  23 void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h,
  24                              unsigned char* dstptr, int dststride, int dstw, int dstbpp,
  25                              unsigned int s_xinc,unsigned int s_yinc){
  26
  27 // scaling factors:
  28 //static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;
  29 //static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;
  30
  31 unsigned int s_xinc2=s_xinc>>1;
  32
  33 static int s_srcypos;
  34 static int s_ypos;
  35 static int s_last_ypos;
  36
  37   if(y==0){
  38       s_srcypos=-2*s_yinc;
  39       s_ypos=-2;
  40       s_last_ypos=-2;
  41   } // reset counters
  42
  43   while(1){
  44     unsigned char *dest=dstptr+dststride*s_ypos;
  45     int y0=2+(s_srcypos>>16);
  46     int y1=1+(s_srcypos>>17);
  47     int yalpha=(s_srcypos&0xFFFF)>>8;
  48     int yalpha1=yalpha^255;
  49     int uvalpha=((s_srcypos>>1)&0xFFFF)>>8;
  50     int uvalpha1=uvalpha^255;
  51     unsigned int *buf0=pix_buf_y[y0&3];
  52     unsigned int *buf1=pix_buf_y[((y0+1)&3)];
  53     unsigned int *uvbuf0=pix_buf_uv[y1&1];
  54     unsigned int *uvbuf1=pix_buf_uv[(y1&1)^1];
  55     int i;
  56
  57     if(y0>=y+h) break;
  58
  59     s_ypos++; s_srcypos+=s_yinc;
  60
  61     if(s_last_ypos!=y0){
  62       unsigned char *src=srcptr[0]+(y0-y)*stride[0];
  63       unsigned int xpos=0;
  64       s_last_ypos=y0;
  65       // *** horizontal scale Y line to temp buffer
  66       // this loop should be rewritten in MMX assembly!!!!
  67       for(i=0;i<dstw;i++){
  68         register unsigned int xx=xpos>>8;
  69         register unsigned int xalpha=xpos&0xFF;
  70         buf1[i]=(src[xx]*(xalpha^255)+src[xx+1]*xalpha);
  71         xpos+=s_xinc;
  72       }
  73       // *** horizontal scale U and V lines to temp buffer
  74       if(!(y0&1)){
  75         unsigned char *src1=srcptr[1]+(y1-y/2)*stride[1];
  76         unsigned char *src2=srcptr[2]+(y1-y/2)*stride[2];
  77         xpos=0;
  78         // this loop should be rewritten in MMX assembly!!!!
  79         for(i=0;i<dstw;i++){
  80           register unsigned int xx=xpos>>8;
  81           register unsigned int xalpha=xpos&0xFF;
  82           uvbuf1[i]=(src1[xx]*(xalpha^255)+src1[xx+1]*xalpha);
  83           uvbuf1[i+2048]=(src2[xx]*(xalpha^255)+src2[xx+1]*xalpha);
  84           xpos+=s_xinc2;
  85         }
  86       }
  87       if(!y0) continue;
  88     }
  89
  90     // this loop should be rewritten in MMX assembly!!!!
  91     // Note1: this code can be resticted to n*8 (or n*16) width lines to simplify optimization...
  92     // Note2: instead of using lookup tabs, mmx version could do the multiply...
  93     // Note3: maybe we should make separated 15/16, 24 and 32bpp version of this:
  94     for(i=0;i<dstw;i++){
  95         // vertical linear interpolation && yuv2rgb in a single step:
  96         int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>16)];
  97         int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>16);
  98         int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>16);
  99 #if 1
 100         // 24/32 bpp
 101         dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];
 102         dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
 103         dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];
 104 #else
 105         unsigned short *d=dest;
 106         unsigned int b=clip_table[((Y + yuvtab_3343[U]) >>13)];
 107         unsigned int g=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
 108         unsigned int r=clip_table[((Y + yuvtab_40cf[V]) >>13)];
 109 //      d[0]=((r>>3)<<10)|((g>>3)<<5)|((b>>3)); // 15bpp
 110         d[0]=((r>>3)<<11)|((g>>2)<<5)|((b>>3)); // 16bpp
 111 #endif
 112         dest+=dstbpp;
 113     }
 114
 115   }
 116
 117 }
 118
 119
 120 void SwScale_Init(){
 121     // generating tables:
 122     int i;
 123     for(i=0;i<256;i++){
 124         clip_table[i]=0;
 125         clip_table[i+256]=i;
 126         clip_table[i+512]=255;
 127         yuvtab_2568[i]=(0x2568*(i-16))+(256<<13);
 128         yuvtab_3343[i]=0x3343*(i-128);
 129         yuvtab_0c92[i]=-0x0c92*(i-128);
 130         yuvtab_1a1e[i]=-0x1a1e*(i-128);
 131         yuvtab_40cf[i]=0x40cf*(i-128);
 132     }
 133
 134 }