git.sesse.net Git - ffmpeg/blob - libswscale/swscale_template.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
  22                               const int16_t **lumSrc, int lumFilterSize,
  23                               const int16_t *chrFilter, const int16_t **chrUSrc,
  24                               const int16_t **chrVSrc,
  25                               int chrFilterSize, const int16_t **alpSrc,
  26                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
  27                               uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
  28 {
  29     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
  30                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
  31                 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
  32 }
  33
  34 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
  35                                const int16_t **lumSrc, int lumFilterSize,
  36                                const int16_t *chrFilter, const int16_t **chrUSrc,
  37                                const int16_t **chrVSrc,
  38                                int chrFilterSize, uint8_t *dest, uint8_t *uDest,
  39                                int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither)
  40 {
  41     yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
  42                  chrFilter, chrUSrc, chrVSrc, chrFilterSize,
  43                  dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither);
  44 }
  45
  46 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
  47                               const int16_t *chrUSrc, const int16_t *chrVSrc,
  48                               const int16_t *alpSrc,
  49                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
  50                               uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
  51 {
  52     int i;
  53
  54     for (i=0; i<dstW; i++) {
  55         int val= (lumSrc[i]+lumDither[i&7])>>7;
  56         dest[i]= av_clip_uint8(val);
  57     }
  58
  59     if (uDest)
  60         for (i=0; i<chrDstW; i++) {
  61             int u=(chrUSrc[i]+chrDither[i&7])>>7;
  62             int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
  63             uDest[i]= av_clip_uint8(u);
  64             vDest[i]= av_clip_uint8(v);
  65         }
  66
  67     if (CONFIG_SWSCALE_ALPHA && aDest)
  68         for (i=0; i<dstW; i++) {
  69             int val= (alpSrc[i]+lumDither[i&7])>>7;
  70             aDest[i]= av_clip_uint8(val);
  71         }
  72 }
  73
  74
  75 /**
  76  * vertical scale YV12 to RGB
  77  */
  78 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
  79                                  const int16_t **lumSrc, int lumFilterSize,
  80                                  const int16_t *chrFilter, const int16_t **chrUSrc,
  81                                  const int16_t **chrVSrc,
  82                                  int chrFilterSize, const int16_t **alpSrc,
  83                                  uint8_t *dest, int dstW, int dstY)
  84 {
  85         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
  86                        chrFilter, chrUSrc, chrVSrc, chrFilterSize,
  87                        alpSrc, dest, dstW, dstY);
  88 }
  89
  90 /**
  91  * vertical bilinear scale YV12 to RGB
  92  */
  93 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
  94                                  const uint16_t *buf1, const uint16_t *ubuf0,
  95                                  const uint16_t *ubuf1, const uint16_t *vbuf0,
  96                                  const uint16_t *vbuf1, const uint16_t *abuf0,
  97                                  const uint16_t *abuf1, uint8_t *dest, int dstW,
  98                                  int yalpha, int uvalpha, int y)
  99 {
 100     int  yalpha1=4095- yalpha;
 101     int uvalpha1=4095-uvalpha;
 102     int i;
 103
 104     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
 105 }
 106
 107 /**
 108  * YV12 to RGB without scaling or interpolating
 109  */
 110 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
 111                                  const uint16_t *ubuf0, const uint16_t *ubuf1,
 112                                  const uint16_t *vbuf0, const uint16_t *vbuf1,
 113                                  const uint16_t *abuf0, uint8_t *dest, int dstW,
 114                                  int uvalpha, enum PixelFormat dstFormat,
 115                                  int flags, int y)
 116 {
 117     const int yalpha1=0;
 118     int i;
 119
 120     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 121     const int yalpha= 4096; //FIXME ...
 122
 123     if (uvalpha < 2048) {
 124         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
 125     } else {
 126         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
 127     }
 128 }
 129
 130 //FIXME yuy2* can read up to 7 samples too much
 131
 132 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
 133                              uint32_t *unused)
 134 {
 135     int i;
 136     for (i=0; i<width; i++)
 137         dst[i]= src[2*i];
 138 }
 139
 140 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 141                               const uint8_t *src2, int width, uint32_t *unused)
 142 {
 143     int i;
 144     for (i=0; i<width; i++) {
 145         dstU[i]= src1[4*i + 1];
 146         dstV[i]= src1[4*i + 3];
 147     }
 148     assert(src1 == src2);
 149 }
 150
 151 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 152                             const uint8_t *src2, int width, uint32_t *unused)
 153 {
 154     int i;
 155     // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
 156     // we need to skip each second pixel. Same for BEToUV.
 157     for (i=0; i<width; i++) {
 158         dstU[i]= src1[2*i + 1];
 159         dstV[i]= src2[2*i + 1];
 160     }
 161 }
 162
 163 /* This is almost identical to the previous, end exists only because
 164  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
 165 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
 166                              uint32_t *unused)
 167 {
 168     int i;
 169     for (i=0; i<width; i++)
 170         dst[i]= src[2*i+1];
 171 }
 172
 173 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 174                               const uint8_t *src2, int width, uint32_t *unused)
 175 {
 176     int i;
 177     for (i=0; i<width; i++) {
 178         dstU[i]= src1[4*i + 0];
 179         dstV[i]= src1[4*i + 2];
 180     }
 181     assert(src1 == src2);
 182 }
 183
 184 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 185                             const uint8_t *src2, int width, uint32_t *unused)
 186 {
 187     int i;
 188     for (i=0; i<width; i++) {
 189         dstU[i]= src1[2*i];
 190         dstV[i]= src2[2*i];
 191     }
 192 }
 193
 194 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
 195                               const uint8_t *src, int width)
 196 {
 197     int i;
 198     for (i = 0; i < width; i++) {
 199         dst1[i] = src[2*i+0];
 200         dst2[i] = src[2*i+1];
 201     }
 202 }
 203
 204 // FIXME Maybe dither instead.
 205 #ifndef YUV_NBPS
 206 #define YUV_NBPS(depth, endianness, rfunc) \
 207 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
 208                                           const uint16_t *srcU, const uint16_t *srcV, \
 209                                           int width, uint32_t *unused) \
 210 { \
 211     int i; \
 212     for (i = 0; i < width; i++) { \
 213         dstU[i] = rfunc(&srcU[i])>>(depth-8); \
 214         dstV[i] = rfunc(&srcV[i])>>(depth-8); \
 215     } \
 216 } \
 217 \
 218 static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \
 219 { \
 220     int i; \
 221     for (i = 0; i < width; i++) \
 222         dstY[i] = rfunc(&srcY[i])>>(depth-8); \
 223 } \
 224
 225 YUV_NBPS( 9, LE, AV_RL16)
 226 YUV_NBPS( 9, BE, AV_RB16)
 227 YUV_NBPS(10, LE, AV_RL16)
 228 YUV_NBPS(10, BE, AV_RB16)
 229 #endif // YUV_NBPS
 230
 231 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
 232                               const uint8_t *src1, const uint8_t *src2,
 233                               int width, uint32_t *unused)
 234 {
 235     nvXXtoUV_c(dstU, dstV, src1, width);
 236 }
 237
 238 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
 239                               const uint8_t *src1, const uint8_t *src2,
 240                               int width, uint32_t *unused)
 241 {
 242     nvXXtoUV_c(dstV, dstU, src1, width);
 243 }
 244
 245 // bilinear / bicubic scaling
 246 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
 247                             int srcW, int xInc,
 248                             const int16_t *filter, const int16_t *filterPos,
 249                             int filterSize)
 250 {
 251     int i;
 252     for (i=0; i<dstW; i++) {
 253         int j;
 254         int srcPos= filterPos[i];
 255         int val=0;
 256         for (j=0; j<filterSize; j++) {
 257             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
 258         }
 259         //filter += hFilterSize;
 260         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
 261         //dst[i] = val>>7;
 262     }
 263 }
 264
 265
 266 #define DEBUG_SWSCALE_BUFFERS 0
 267 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 268
 269 #if HAVE_MMX
 270 static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
 271                                   int lastInLumBuf, int lastInChrBuf);
 272 #endif
 273
 274 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
 275                      int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 276 {
 277     /* load a few things into local vars to make the code more readable? and faster */
 278     const int srcW= c->srcW;
 279     const int dstW= c->dstW;
 280     const int dstH= c->dstH;
 281     const int chrDstW= c->chrDstW;
 282     const int chrSrcW= c->chrSrcW;
 283     const int lumXInc= c->lumXInc;
 284     const int chrXInc= c->chrXInc;
 285     const enum PixelFormat dstFormat= c->dstFormat;
 286     const int flags= c->flags;
 287     int16_t *vLumFilterPos= c->vLumFilterPos;
 288     int16_t *vChrFilterPos= c->vChrFilterPos;
 289     int16_t *hLumFilterPos= c->hLumFilterPos;
 290     int16_t *hChrFilterPos= c->hChrFilterPos;
 291     int16_t *vLumFilter= c->vLumFilter;
 292     int16_t *vChrFilter= c->vChrFilter;
 293     int16_t *hLumFilter= c->hLumFilter;
 294     int16_t *hChrFilter= c->hChrFilter;
 295     int32_t *lumMmxFilter= c->lumMmxFilter;
 296     int32_t *chrMmxFilter= c->chrMmxFilter;
 297     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
 298     const int vLumFilterSize= c->vLumFilterSize;
 299     const int vChrFilterSize= c->vChrFilterSize;
 300     const int hLumFilterSize= c->hLumFilterSize;
 301     const int hChrFilterSize= c->hChrFilterSize;
 302     int16_t **lumPixBuf= c->lumPixBuf;
 303     int16_t **chrUPixBuf= c->chrUPixBuf;
 304     int16_t **chrVPixBuf= c->chrVPixBuf;
 305     int16_t **alpPixBuf= c->alpPixBuf;
 306     const int vLumBufSize= c->vLumBufSize;
 307     const int vChrBufSize= c->vChrBufSize;
 308     uint8_t *formatConvBuffer= c->formatConvBuffer;
 309     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
 310     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
 311     int lastDstY;
 312     uint32_t *pal=c->pal_yuv;
 313
 314     /* vars which will change and which we need to store back in the context */
 315     int dstY= c->dstY;
 316     int lumBufIndex= c->lumBufIndex;
 317     int chrBufIndex= c->chrBufIndex;
 318     int lastInLumBuf= c->lastInLumBuf;
 319     int lastInChrBuf= c->lastInChrBuf;
 320
 321     if (isPacked(c->srcFormat)) {
 322         src[0]=
 323         src[1]=
 324         src[2]=
 325         src[3]= src[0];
 326         srcStride[0]=
 327         srcStride[1]=
 328         srcStride[2]=
 329         srcStride[3]= srcStride[0];
 330     }
 331     srcStride[1]<<= c->vChrDrop;
 332     srcStride[2]<<= c->vChrDrop;
 333
 334     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
 335                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
 336                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
 337     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
 338                    srcSliceY,    srcSliceH,    dstY,    dstH);
 339     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
 340                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
 341
 342     if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
 343         static int warnedAlready=0; //FIXME move this into the context perhaps
 344         if (flags & SWS_PRINT_INFO && !warnedAlready) {
 345             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
 346                    "         ->cannot do aligned memory accesses anymore\n");
 347             warnedAlready=1;
 348         }
 349     }
 350
 351     /* Note the user might start scaling the picture in the middle so this
 352        will not get executed. This is not really intended but works
 353        currently, so people might do it. */
 354     if (srcSliceY ==0) {
 355         lumBufIndex=-1;
 356         chrBufIndex=-1;
 357         dstY=0;
 358         lastInLumBuf= -1;
 359         lastInChrBuf= -1;
 360     }
 361
 362     lastDstY= dstY;
 363
 364     for (;dstY < dstH; dstY++) {
 365         unsigned char *dest =dst[0]+dstStride[0]*dstY;
 366         const int chrDstY= dstY>>c->chrDstVSubSample;
 367         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
 368         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
 369         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
 370         const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY   &7] : flat64;
 371         const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64;
 372
 373         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
 374         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
 375         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
 376         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
 377         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
 378         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
 379         int enough_lines;
 380
 381         //handle holes (FAST_BILINEAR & weird filters)
 382         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
 383         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
 384         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
 385         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
 386
 387         DEBUG_BUFFERS("dstY: %d\n", dstY);
 388         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
 389                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
 390         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
 391                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
 392
 393         // Do we have enough lines in this slice to output the dstY line
 394         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
 395
 396         if (!enough_lines) {
 397             lastLumSrcY = srcSliceY + srcSliceH - 1;
 398             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
 399             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
 400                                             lastLumSrcY, lastChrSrcY);
 401         }
 402
 403         //Do horizontal scaling
 404         while(lastInLumBuf < lastLumSrcY) {
 405             const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
 406             const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
 407             lumBufIndex++;
 408             assert(lumBufIndex < 2*vLumBufSize);
 409             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
 410             assert(lastInLumBuf + 1 - srcSliceY >= 0);
 411             hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
 412                       hLumFilter, hLumFilterPos, hLumFilterSize,
 413                       formatConvBuffer,
 414                       pal, 0);
 415             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
 416                 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
 417                           lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
 418                           formatConvBuffer,
 419                           pal, 1);
 420             lastInLumBuf++;
 421             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
 422                                lumBufIndex,    lastInLumBuf);
 423         }
 424         while(lastInChrBuf < lastChrSrcY) {
 425             const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
 426             const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
 427             chrBufIndex++;
 428             assert(chrBufIndex < 2*vChrBufSize);
 429             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
 430             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
 431             //FIXME replace parameters through context struct (some at least)
 432
 433             if (c->needs_hcscale)
 434                 hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
 435                           chrDstW, src1, src2, chrSrcW, chrXInc,
 436                           hChrFilter, hChrFilterPos, hChrFilterSize,
 437                           formatConvBuffer, pal);
 438             lastInChrBuf++;
 439             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
 440                                chrBufIndex,    lastInChrBuf);
 441         }
 442         //wrap buf index around to stay inside the ring buffer
 443         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
 444         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
 445         if (!enough_lines)
 446             break; //we can't output a dstY line so let's try with the next slice
 447
 448 #if HAVE_MMX
 449         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
 450 #endif
 451         if (dstY < dstH-2) {
 452             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
 453             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
 454             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
 455             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
 456             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
 457                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
 458                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
 459                 c->yuv2nv12X(c,
 460                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
 461                              vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 462                              dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
 463             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
 464                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
 465                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
 466                 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
 467                     yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
 468                                   vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
 469                                   chrVSrcPtr, vChrFilterSize,
 470                                   alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
 471                                   (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
 472                                   dstFormat);
 473                 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
 474                     const int16_t *lumBuf = lumSrcPtr[0];
 475                     const int16_t *chrUBuf= chrUSrcPtr[0];
 476                     const int16_t *chrVBuf= chrVSrcPtr[0];
 477                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
 478                     c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
 479                                 uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
 480                 } else { //General YV12
 481                     c->yuv2yuvX(c,
 482                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
 483                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
 484                                 chrVSrcPtr, vChrFilterSize,
 485                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
 486                 }
 487             } else {
 488                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
 489                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
 490                 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
 491                     int chrAlpha= vChrFilter[2*dstY+1];
 492                     if(flags & SWS_FULL_CHR_H_INT) {
 493                         yuv2rgbXinC_full(c, //FIXME write a packed1_full function
 494                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
 495                                          vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
 496                                          chrVSrcPtr, vChrFilterSize,
 497                                          alpSrcPtr, dest, dstW, dstY);
 498                     } else {
 499                         c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
 500                                        *chrVSrcPtr, *(chrVSrcPtr+1),
 501                                        alpPixBuf ? *alpSrcPtr : NULL,
 502                                        dest, dstW, chrAlpha, dstFormat, flags, dstY);
 503                     }
 504                 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
 505                     int lumAlpha= vLumFilter[2*dstY+1];
 506                     int chrAlpha= vChrFilter[2*dstY+1];
 507                     lumMmxFilter[2]=
 508                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
 509                     chrMmxFilter[2]=
 510                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
 511                     if(flags & SWS_FULL_CHR_H_INT) {
 512                         yuv2rgbXinC_full(c, //FIXME write a packed2_full function
 513                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
 514                                          vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 515                                          alpSrcPtr, dest, dstW, dstY);
 516                     } else {
 517                         c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
 518                                        *chrVSrcPtr, *(chrVSrcPtr+1),
 519                                        alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
 520                                        dest, dstW, lumAlpha, chrAlpha, dstY);
 521                     }
 522                 } else { //general RGB
 523                     if(flags & SWS_FULL_CHR_H_INT) {
 524                         yuv2rgbXinC_full(c,
 525                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
 526                                          vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 527                                          alpSrcPtr, dest, dstW, dstY);
 528                     } else {
 529                         c->yuv2packedX(c,
 530                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
 531                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 532                                        alpSrcPtr, dest, dstW, dstY);
 533                     }
 534                 }
 535             }
 536         } else { // hmm looks like we can't use MMX here without overwriting this array's tail
 537             const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
 538             const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
 539             const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
 540             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
 541             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
 542                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
 543                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
 544                 yuv2nv12XinC(
 545                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
 546                              vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 547                              dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
 548             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
 549                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
 550                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
 551                 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
 552                     yuv2yuvX16inC(
 553                                   vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
 554                                   vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 555                                   alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
 556                                   dstFormat);
 557                 } else {
 558                     yuv2yuvXinC(
 559                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
 560                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 561                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
 562                 }
 563             } else {
 564                 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
 565                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
 566                 if(flags & SWS_FULL_CHR_H_INT) {
 567                     yuv2rgbXinC_full(c,
 568                                      vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
 569                                      vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 570                                      alpSrcPtr, dest, dstW, dstY);
 571                 } else {
 572                     yuv2packedXinC(c,
 573                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
 574                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 575                                    alpSrcPtr, dest, dstW, dstY);
 576                 }
 577             }
 578         }
 579     }
 580
 581     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
 582         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 583
 584 #if HAVE_MMX2
 585     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
 586         __asm__ volatile("sfence":::"memory");
 587 #endif
 588     emms_c();
 589
 590     /* store changed local vars back in the context */
 591     c->dstY= dstY;
 592     c->lumBufIndex= lumBufIndex;
 593     c->chrBufIndex= chrBufIndex;
 594     c->lastInLumBuf= lastInLumBuf;
 595     c->lastInChrBuf= lastInChrBuf;
 596
 597     return dstY - lastDstY;
 598 }
 599
 600 static void sws_init_swScale_c(SwsContext *c)
 601 {
 602     enum PixelFormat srcFormat = c->srcFormat;
 603
 604     c->yuv2nv12X    = yuv2nv12X_c;
 605     c->yuv2yuv1     = yuv2yuv1_c;
 606     c->yuv2yuvX     = yuv2yuvX_c;
 607     c->yuv2packed1  = yuv2packed1_c;
 608     c->yuv2packed2  = yuv2packed2_c;
 609     c->yuv2packedX  = yuv2packedX_c;
 610
 611     c->hScale       = hScale_c;
 612
 613     if (c->flags & SWS_FAST_BILINEAR)
 614     {
 615         c->hyscale_fast = hyscale_fast_c;
 616         c->hcscale_fast = hcscale_fast_c;
 617     }
 618
 619     c->chrToYV12 = NULL;
 620     switch(srcFormat) {
 621         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
 622         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
 623         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
 624         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
 625         case PIX_FMT_RGB8     :
 626         case PIX_FMT_BGR8     :
 627         case PIX_FMT_PAL8     :
 628         case PIX_FMT_BGR4_BYTE:
 629         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
 630         case PIX_FMT_GRAY16BE :
 631         case PIX_FMT_YUV420P9BE:
 632         case PIX_FMT_YUV422P10BE:
 633         case PIX_FMT_YUV420P10BE:
 634         case PIX_FMT_YUV420P16BE:
 635         case PIX_FMT_YUV422P16BE:
 636         case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
 637         case PIX_FMT_GRAY16LE :
 638         case PIX_FMT_YUV420P9LE:
 639         case PIX_FMT_YUV422P10LE:
 640         case PIX_FMT_YUV420P10LE:
 641         case PIX_FMT_YUV420P16LE:
 642         case PIX_FMT_YUV422P16LE:
 643         case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
 644     }
 645     if (c->chrSrcHSubSample) {
 646         switch(srcFormat) {
 647         case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half; break;
 648         case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half; break;
 649         case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half; break;
 650         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half; break;
 651         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
 652         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
 653         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
 654         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
 655         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
 656         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
 657         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
 658         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
 659         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
 660         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
 661         }
 662     } else {
 663         switch(srcFormat) {
 664         case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV; break;
 665         case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV; break;
 666         case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV; break;
 667         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV; break;
 668         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
 669         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
 670         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
 671         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
 672         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
 673         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
 674         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
 675         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
 676         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
 677         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
 678         }
 679     }
 680
 681     c->lumToYV12 = NULL;
 682     c->alpToYV12 = NULL;
 683     switch (srcFormat) {
 684     case PIX_FMT_YUYV422  :
 685     case PIX_FMT_GRAY8A   :
 686                             c->lumToYV12 = yuy2ToY_c; break;
 687     case PIX_FMT_UYVY422  :
 688                             c->lumToYV12 = uyvyToY_c; break;
 689     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
 690     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
 691     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
 692     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
 693     case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
 694     case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
 695     case PIX_FMT_RGB8     :
 696     case PIX_FMT_BGR8     :
 697     case PIX_FMT_PAL8     :
 698     case PIX_FMT_BGR4_BYTE:
 699     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
 700     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
 701     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
 702     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY;  break;
 703     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
 704     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
 705     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
 706     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY; break;
 707     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY; break;
 708     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY; break;
 709     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY; break;
 710     }
 711     if (c->alpPixBuf) {
 712         switch (srcFormat) {
 713         case PIX_FMT_RGB32  :
 714         case PIX_FMT_RGB32_1:
 715         case PIX_FMT_BGR32  :
 716         case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
 717         case PIX_FMT_GRAY8A : c->alpToYV12 = yuy2ToY_c; break;
 718         case PIX_FMT_PAL8   : c->alpToYV12 = palToA; break;
 719         }
 720     }
 721
 722     if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
 723         c->hScale16= hScale16_c;
 724
 725     switch (srcFormat) {
 726     case PIX_FMT_GRAY8A :
 727         c->alpSrcOffset = 1;
 728         break;
 729     case PIX_FMT_RGB32  :
 730     case PIX_FMT_BGR32  :
 731         c->alpSrcOffset = 3;
 732         break;
 733     }
 734
 735     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
 736         if (c->srcRange) {
 737             c->lumConvertRange = lumRangeFromJpeg_c;
 738             c->chrConvertRange = chrRangeFromJpeg_c;
 739         } else {
 740             c->lumConvertRange = lumRangeToJpeg_c;
 741             c->chrConvertRange = chrRangeToJpeg_c;
 742         }
 743     }
 744
 745     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
 746           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
 747         c->needs_hcscale = 1;
 748 }