2 // Software scaling and colorspace conversion routines for MPlayer
4 // temporary storage for 4 yuv lines:
5 static unsigned int pix_buf_y[4][2048];
6 static unsigned int pix_buf_uv[2][2048*2];
8 // clipping helper table for C implementations:
9 static unsigned char clip_table[768];
11 // yuv->rgb conversion tables:
12 static int yuvtab_2568[256];
13 static int yuvtab_3343[256];
14 static int yuvtab_0c92[256];
15 static int yuvtab_1a1e[256];
16 static int yuvtab_40cf[256];
18 // *** bilinear scaling and yuv->rgb conversion of yv12 slices:
19 // *** Note: it's called multiple times while decoding a frame, first time y==0
20 // *** Designed to upscale, but may work for downscale too.
21 // s_xinc = (src_width << 8) / dst_width
22 // s_yinc = (src_height << 16) / dst_height
23 void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h,
24 unsigned char* dstptr, int dststride, int dstw, int dstbpp,
25 unsigned int s_xinc,unsigned int s_yinc){
28 //static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;
29 //static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;
31 unsigned int s_xinc2=s_xinc>>1;
35 static int s_last_ypos;
44 unsigned char *dest=dstptr+dststride*s_ypos;
45 int y0=2+(s_srcypos>>16);
46 int y1=1+(s_srcypos>>17);
47 int yalpha=(s_srcypos&0xFFFF)>>8;
48 int yalpha1=yalpha^255;
49 int uvalpha=((s_srcypos>>1)&0xFFFF)>>8;
50 int uvalpha1=uvalpha^255;
51 unsigned int *buf0=pix_buf_y[y0&3];
52 unsigned int *buf1=pix_buf_y[((y0+1)&3)];
53 unsigned int *uvbuf0=pix_buf_uv[y1&1];
54 unsigned int *uvbuf1=pix_buf_uv[(y1&1)^1];
59 s_ypos++; s_srcypos+=s_yinc;
62 unsigned char *src=srcptr[0]+(y0-y)*stride[0];
65 // *** horizontal scale Y line to temp buffer
66 // this loop should be rewritten in MMX assembly!!!!
68 register unsigned int xx=xpos>>8;
69 register unsigned int xalpha=xpos&0xFF;
70 buf1[i]=(src[xx]*(xalpha^255)+src[xx+1]*xalpha);
73 // *** horizontal scale U and V lines to temp buffer
75 unsigned char *src1=srcptr[1]+(y1-y/2)*stride[1];
76 unsigned char *src2=srcptr[2]+(y1-y/2)*stride[2];
78 // this loop should be rewritten in MMX assembly!!!!
80 register unsigned int xx=xpos>>8;
81 register unsigned int xalpha=xpos&0xFF;
82 uvbuf1[i]=(src1[xx]*(xalpha^255)+src1[xx+1]*xalpha);
83 uvbuf1[i+2048]=(src2[xx]*(xalpha^255)+src2[xx+1]*xalpha);
90 // this loop should be rewritten in MMX assembly!!!!
91 // Note1: this code can be resticted to n*8 (or n*16) width lines to simplify optimization...
92 // Note2: instead of using lookup tabs, mmx version could do the multiply...
93 // Note3: maybe we should make separated 15/16, 24 and 32bpp version of this:
95 // vertical linear interpolation && yuv2rgb in a single step:
96 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>16)];
97 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>16);
98 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>16);
101 dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];
102 dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
103 dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];
105 unsigned short *d=dest;
106 unsigned int b=clip_table[((Y + yuvtab_3343[U]) >>13)];
107 unsigned int g=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
108 unsigned int r=clip_table[((Y + yuvtab_40cf[V]) >>13)];
109 // d[0]=((r>>3)<<10)|((g>>3)<<5)|((b>>3)); // 15bpp
110 d[0]=((r>>3)<<11)|((g>>2)<<5)|((b>>3)); // 16bpp
121 // generating tables:
126 clip_table[i+512]=255;
127 yuvtab_2568[i]=(0x2568*(i-16))+(256<<13);
128 yuvtab_3343[i]=0x3343*(i-128);
129 yuvtab_0c92[i]=-0x0c92*(i-128);
130 yuvtab_1a1e[i]=-0x1a1e*(i-128);
131 yuvtab_40cf[i]=0x40cf*(i-128);