git.sesse.net Git - ffmpeg/blob - libswscale/swscale.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of Libav.
   5  *
   6  * Libav is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * Libav is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with Libav; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /*
  22   supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
  23   supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
  24   {BGR,RGB}{1,4,8,15,16} support dithering
  25
  26   unscaled special converters (YV12=I420=IYUV, Y800=Y8)
  27   YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
  28   x -> x
  29   YUV9 -> YV12
  30   YUV9/YV12 -> Y800
  31   Y800 -> YUV9/YV12
  32   BGR24 -> BGR32 & RGB24 -> RGB32
  33   BGR32 -> BGR24 & RGB32 -> RGB24
  34   BGR15 -> BGR16
  35 */
  36
  37 /*
  38 tested special converters (most are tested actually, but I did not write it down ...)
  39  YV12 -> BGR12/BGR16
  40  YV12 -> YV12
  41  BGR15 -> BGR16
  42  BGR16 -> BGR16
  43  YVU9 -> YV12
  44
  45 untested special converters
  46   YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
  47   YV12/I420 -> YV12/I420
  48   YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
  49   BGR24 -> BGR32 & RGB24 -> RGB32
  50   BGR32 -> BGR24 & RGB32 -> RGB24
  51   BGR24 -> YV12
  52 */
  53
  54 #include <inttypes.h>
  55 #include <string.h>
  56 #include <math.h>
  57 #include <stdio.h>
  58 #include "config.h"
  59 #include <assert.h>
  60 #include "swscale.h"
  61 #include "swscale_internal.h"
  62 #include "rgb2rgb.h"
  63 #include "libavutil/intreadwrite.h"
  64 #include "libavutil/cpu.h"
  65 #include "libavutil/avutil.h"
  66 #include "libavutil/mathematics.h"
  67 #include "libavutil/bswap.h"
  68 #include "libavutil/pixdesc.h"
  69
  70 #define DITHER1XBPP
  71
  72 #define RGB2YUV_SHIFT 15
  73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
  74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
  77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
  80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  82
  83 static const double rgb2yuv_table[8][9]={
  84     {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
  85     {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
  86     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
  87     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
  88     {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
  89     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
  90     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
  91     {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
  92 };
  93
  94 /*
  95 NOTES
  96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
  97
  98 TODO
  99 more intelligent misalignment avoidance for the horizontal scaler
 100 write special vertical cubic upscale version
 101 optimize C code (YV12 / minmax)
 102 add support for packed pixel YUV input & output
 103 add support for Y8 output
 104 optimize BGR24 & BGR32
 105 add BGR4 output support
 106 write special BGR->BGR scaler
 107 */
 108
 109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
 110 {  1,   3,   1,   3,   1,   3,   1,   3, },
 111 {  2,   0,   2,   0,   2,   0,   2,   0, },
 112 };
 113
 114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
 115 {  6,   2,   6,   2,   6,   2,   6,   2, },
 116 {  0,   4,   0,   4,   0,   4,   0,   4, },
 117 };
 118
 119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
 120 {  8,   4,  11,   7,   8,   4,  11,   7, },
 121 {  2,  14,   1,  13,   2,  14,   1,  13, },
 122 { 10,   6,   9,   5,  10,   6,   9,   5, },
 123 {  0,  12,   3,  15,   0,  12,   3,  15, },
 124 };
 125
 126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
 127 { 17,   9,  23,  15,  16,   8,  22,  14, },
 128 {  5,  29,   3,  27,   4,  28,   2,  26, },
 129 { 21,  13,  19,  11,  20,  12,  18,  10, },
 130 {  0,  24,   6,  30,   1,  25,   7,  31, },
 131 { 16,   8,  22,  14,  17,   9,  23,  15, },
 132 {  4,  28,   2,  26,   5,  29,   3,  27, },
 133 { 20,  12,  18,  10,  21,  13,  19,  11, },
 134 {  1,  25,   7,  31,   0,  24,   6,  30, },
 135 };
 136
 137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
 138 {  0,  55,  14,  68,   3,  58,  17,  72, },
 139 { 37,  18,  50,  32,  40,  22,  54,  35, },
 140 {  9,  64,   5,  59,  13,  67,   8,  63, },
 141 { 46,  27,  41,  23,  49,  31,  44,  26, },
 142 {  2,  57,  16,  71,   1,  56,  15,  70, },
 143 { 39,  21,  52,  34,  38,  19,  51,  33, },
 144 { 11,  66,   7,  62,  10,  65,   6,  60, },
 145 { 48,  30,  43,  25,  47,  29,  42,  24, },
 146 };
 147
 148 #if 1
 149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
 150 {117,  62, 158, 103, 113,  58, 155, 100, },
 151 { 34, 199,  21, 186,  31, 196,  17, 182, },
 152 {144,  89, 131,  76, 141,  86, 127,  72, },
 153 {  0, 165,  41, 206,  10, 175,  52, 217, },
 154 {110,  55, 151,  96, 120,  65, 162, 107, },
 155 { 28, 193,  14, 179,  38, 203,  24, 189, },
 156 {138,  83, 124,  69, 148,  93, 134,  79, },
 157 {  7, 172,  48, 213,   3, 168,  45, 210, },
 158 };
 159 #elif 1
 160 // tries to correct a gamma of 1.5
 161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
 162 {  0, 143,  18, 200,   2, 156,  25, 215, },
 163 { 78,  28, 125,  64,  89,  36, 138,  74, },
 164 { 10, 180,   3, 161,  16, 195,   8, 175, },
 165 {109,  51,  93,  38, 121,  60, 105,  47, },
 166 {  1, 152,  23, 210,   0, 147,  20, 205, },
 167 { 85,  33, 134,  71,  81,  30, 130,  67, },
 168 { 14, 190,   6, 171,  12, 185,   5, 166, },
 169 {117,  57, 101,  44, 113,  54,  97,  41, },
 170 };
 171 #elif 1
 172 // tries to correct a gamma of 2.0
 173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
 174 {  0, 124,   8, 193,   0, 140,  12, 213, },
 175 { 55,  14, 104,  42,  66,  19, 119,  52, },
 176 {  3, 168,   1, 145,   6, 187,   3, 162, },
 177 { 86,  31,  70,  21,  99,  39,  82,  28, },
 178 {  0, 134,  11, 206,   0, 129,   9, 200, },
 179 { 62,  17, 114,  48,  58,  16, 109,  45, },
 180 {  5, 181,   2, 157,   4, 175,   1, 151, },
 181 { 95,  36,  78,  26,  90,  34,  74,  24, },
 182 };
 183 #else
 184 // tries to correct a gamma of 2.5
 185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
 186 {  0, 107,   3, 187,   0, 125,   6, 212, },
 187 { 39,   7,  86,  28,  49,  11, 102,  36, },
 188 {  1, 158,   0, 131,   3, 180,   1, 151, },
 189 { 68,  19,  52,  12,  81,  25,  64,  17, },
 190 {  0, 119,   5, 203,   0, 113,   4, 195, },
 191 { 45,   9,  96,  33,  42,   8,  91,  30, },
 192 {  2, 172,   1, 144,   2, 165,   0, 137, },
 193 { 77,  23,  60,  15,  72,  21,  56,  14, },
 194 };
 195 #endif
 196
 197 static av_always_inline void
 198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
 199                       int lumFilterSize, const int16_t *chrFilter,
 200                       const int16_t **chrUSrc, const int16_t **chrVSrc,
 201                       int chrFilterSize, const int16_t **alpSrc,
 202                       uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
 203                       uint16_t *aDest, int dstW, int chrDstW,
 204                       int big_endian, int output_bits)
 205 {
 206     //FIXME Optimize (just quickly written not optimized..)
 207     int i;
 208     int shift = 11 + 16 - output_bits;
 209
 210 #define output_pixel(pos, val) \
 211     if (big_endian) { \
 212         if (output_bits == 16) { \
 213             AV_WB16(pos, av_clip_uint16(val >> shift)); \
 214         } else { \
 215             AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
 216         } \
 217     } else { \
 218         if (output_bits == 16) { \
 219             AV_WL16(pos, av_clip_uint16(val >> shift)); \
 220         } else { \
 221             AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
 222         } \
 223     }
 224     for (i = 0; i < dstW; i++) {
 225         int val = 1 << (26-output_bits);
 226         int j;
 227
 228         for (j = 0; j < lumFilterSize; j++)
 229             val += lumSrc[j][i] * lumFilter[j];
 230
 231         output_pixel(&dest[i], val);
 232     }
 233
 234     if (uDest) {
 235         for (i = 0; i < chrDstW; i++) {
 236             int u = 1 << (26-output_bits);
 237             int v = 1 << (26-output_bits);
 238             int j;
 239
 240             for (j = 0; j < chrFilterSize; j++) {
 241                 u += chrUSrc[j][i] * chrFilter[j];
 242                 v += chrVSrc[j][i] * chrFilter[j];
 243             }
 244
 245             output_pixel(&uDest[i], u);
 246             output_pixel(&vDest[i], v);
 247         }
 248     }
 249
 250     if (CONFIG_SWSCALE_ALPHA && aDest) {
 251         for (i = 0; i < dstW; i++) {
 252             int val = 1 << (26-output_bits);
 253             int j;
 254
 255             for (j = 0; j < lumFilterSize; j++)
 256                 val += alpSrc[j][i] * lumFilter[j];
 257
 258             output_pixel(&aDest[i], val);
 259         }
 260     }
 261 #undef output_pixel
 262 }
 263
 264 #define yuv2NBPS(bits, BE_LE, is_be) \
 265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
 266                               const int16_t **lumSrc, int lumFilterSize, \
 267                               const int16_t *chrFilter, const int16_t **chrUSrc, \
 268                               const int16_t **chrVSrc, \
 269                               int chrFilterSize, const int16_t **alpSrc, \
 270                               uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
 271                               uint8_t *_aDest, int dstW, int chrDstW) \
 272 { \
 273     uint16_t *dest  = (uint16_t *) _dest,  *uDest = (uint16_t *) _uDest, \
 274              *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
 275     yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
 276                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
 277                           alpSrc, \
 278                           dest, uDest, vDest, aDest, \
 279                           dstW, chrDstW, is_be, bits); \
 280 }
 281 yuv2NBPS( 9, BE, 1);
 282 yuv2NBPS( 9, LE, 0);
 283 yuv2NBPS(10, BE, 1);
 284 yuv2NBPS(10, LE, 0);
 285 yuv2NBPS(16, BE, 1);
 286 yuv2NBPS(16, LE, 0);
 287
 288 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
 289                               const int16_t **lumSrc, int lumFilterSize,
 290                               const int16_t *chrFilter, const int16_t **chrUSrc,
 291                               const int16_t **chrVSrc,
 292                               int chrFilterSize, const int16_t **alpSrc,
 293                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
 294                               uint8_t *aDest, int dstW, int chrDstW)
 295 {
 296     //FIXME Optimize (just quickly written not optimized..)
 297     int i;
 298     for (i=0; i<dstW; i++) {
 299         int val=1<<18;
 300         int j;
 301         for (j=0; j<lumFilterSize; j++)
 302             val += lumSrc[j][i] * lumFilter[j];
 303
 304         dest[i]= av_clip_uint8(val>>19);
 305     }
 306
 307     if (uDest)
 308         for (i=0; i<chrDstW; i++) {
 309             int u=1<<18;
 310             int v=1<<18;
 311             int j;
 312             for (j=0; j<chrFilterSize; j++) {
 313                 u += chrUSrc[j][i] * chrFilter[j];
 314                 v += chrVSrc[j][i] * chrFilter[j];
 315             }
 316
 317             uDest[i]= av_clip_uint8(u>>19);
 318             vDest[i]= av_clip_uint8(v>>19);
 319         }
 320
 321     if (CONFIG_SWSCALE_ALPHA && aDest)
 322         for (i=0; i<dstW; i++) {
 323             int val=1<<18;
 324             int j;
 325             for (j=0; j<lumFilterSize; j++)
 326                 val += alpSrc[j][i] * lumFilter[j];
 327
 328             aDest[i]= av_clip_uint8(val>>19);
 329         }
 330
 331 }
 332
 333 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
 334                                const int16_t **lumSrc, int lumFilterSize,
 335                                const int16_t *chrFilter, const int16_t **chrUSrc,
 336                                const int16_t **chrVSrc, int chrFilterSize,
 337                                const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
 338                                uint8_t *vDest, uint8_t *aDest,
 339                                int dstW, int chrDstW)
 340 {
 341     enum PixelFormat dstFormat = c->dstFormat;
 342
 343     //FIXME Optimize (just quickly written not optimized..)
 344     int i;
 345     for (i=0; i<dstW; i++) {
 346         int val=1<<18;
 347         int j;
 348         for (j=0; j<lumFilterSize; j++)
 349             val += lumSrc[j][i] * lumFilter[j];
 350
 351         dest[i]= av_clip_uint8(val>>19);
 352     }
 353
 354     if (!uDest)
 355         return;
 356
 357     if (dstFormat == PIX_FMT_NV12)
 358         for (i=0; i<chrDstW; i++) {
 359             int u=1<<18;
 360             int v=1<<18;
 361             int j;
 362             for (j=0; j<chrFilterSize; j++) {
 363                 u += chrUSrc[j][i] * chrFilter[j];
 364                 v += chrVSrc[j][i] * chrFilter[j];
 365             }
 366
 367             uDest[2*i]= av_clip_uint8(u>>19);
 368             uDest[2*i+1]= av_clip_uint8(v>>19);
 369         }
 370     else
 371         for (i=0; i<chrDstW; i++) {
 372             int u=1<<18;
 373             int v=1<<18;
 374             int j;
 375             for (j=0; j<chrFilterSize; j++) {
 376                 u += chrUSrc[j][i] * chrFilter[j];
 377                 v += chrVSrc[j][i] * chrFilter[j];
 378             }
 379
 380             uDest[2*i]= av_clip_uint8(v>>19);
 381             uDest[2*i+1]= av_clip_uint8(u>>19);
 382         }
 383 }
 384
 385 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
 386     for (i=0; i<(dstW>>1); i++) {\
 387         int j;\
 388         int Y1 = 1<<18;\
 389         int Y2 = 1<<18;\
 390         int U  = 1<<18;\
 391         int V  = 1<<18;\
 392         int av_unused A1, A2;\
 393         type av_unused *r, *b, *g;\
 394         const int i2= 2*i;\
 395         \
 396         for (j=0; j<lumFilterSize; j++) {\
 397             Y1 += lumSrc[j][i2] * lumFilter[j];\
 398             Y2 += lumSrc[j][i2+1] * lumFilter[j];\
 399         }\
 400         for (j=0; j<chrFilterSize; j++) {\
 401             U += chrUSrc[j][i] * chrFilter[j];\
 402             V += chrVSrc[j][i] * chrFilter[j];\
 403         }\
 404         Y1>>=19;\
 405         Y2>>=19;\
 406         U >>=19;\
 407         V >>=19;\
 408         if (alpha) {\
 409             A1 = 1<<18;\
 410             A2 = 1<<18;\
 411             for (j=0; j<lumFilterSize; j++) {\
 412                 A1 += alpSrc[j][i2  ] * lumFilter[j];\
 413                 A2 += alpSrc[j][i2+1] * lumFilter[j];\
 414             }\
 415             A1>>=19;\
 416             A2>>=19;\
 417         }
 418
 419 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
 420         YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
 421         if ((Y1|Y2|U|V)&256) {\
 422             if (Y1>255)   Y1=255; \
 423             else if (Y1<0)Y1=0;   \
 424             if (Y2>255)   Y2=255; \
 425             else if (Y2<0)Y2=0;   \
 426             if (U>255)    U=255;  \
 427             else if (U<0) U=0;    \
 428             if (V>255)    V=255;  \
 429             else if (V<0) V=0;    \
 430         }\
 431         if (alpha && ((A1|A2)&256)) {\
 432             A1=av_clip_uint8(A1);\
 433             A2=av_clip_uint8(A2);\
 434         }
 435
 436 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
 437     for (i=0; i<dstW; i++) {\
 438         int j;\
 439         int Y = 0;\
 440         int U = -128<<19;\
 441         int V = -128<<19;\
 442         int av_unused A;\
 443         int R,G,B;\
 444         \
 445         for (j=0; j<lumFilterSize; j++) {\
 446             Y += lumSrc[j][i     ] * lumFilter[j];\
 447         }\
 448         for (j=0; j<chrFilterSize; j++) {\
 449             U += chrUSrc[j][i] * chrFilter[j];\
 450             V += chrVSrc[j][i] * chrFilter[j];\
 451         }\
 452         Y >>=10;\
 453         U >>=10;\
 454         V >>=10;\
 455         if (alpha) {\
 456             A = rnd;\
 457             for (j=0; j<lumFilterSize; j++)\
 458                 A += alpSrc[j][i     ] * lumFilter[j];\
 459             A >>=19;\
 460             if (A&256)\
 461                 A = av_clip_uint8(A);\
 462         }
 463
 464 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
 465     YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
 466         Y-= c->yuv2rgb_y_offset;\
 467         Y*= c->yuv2rgb_y_coeff;\
 468         Y+= rnd;\
 469         R= Y + V*c->yuv2rgb_v2r_coeff;\
 470         G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
 471         B= Y +                          U*c->yuv2rgb_u2b_coeff;\
 472         if ((R|G|B)&(0xC0000000)) {\
 473             if (R>=(256<<22))   R=(256<<22)-1; \
 474             else if (R<0)R=0;   \
 475             if (G>=(256<<22))   G=(256<<22)-1; \
 476             else if (G<0)G=0;   \
 477             if (B>=(256<<22))   B=(256<<22)-1; \
 478             else if (B<0)B=0;   \
 479         }
 480
 481 #define YSCALE_YUV_2_GRAY16_C \
 482     for (i=0; i<(dstW>>1); i++) {\
 483         int j;\
 484         int Y1 = 1<<18;\
 485         int Y2 = 1<<18;\
 486         int U  = 1<<18;\
 487         int V  = 1<<18;\
 488         \
 489         const int i2= 2*i;\
 490         \
 491         for (j=0; j<lumFilterSize; j++) {\
 492             Y1 += lumSrc[j][i2] * lumFilter[j];\
 493             Y2 += lumSrc[j][i2+1] * lumFilter[j];\
 494         }\
 495         Y1>>=11;\
 496         Y2>>=11;\
 497         if ((Y1|Y2|U|V)&65536) {\
 498             if (Y1>65535)   Y1=65535; \
 499             else if (Y1<0)Y1=0;   \
 500             if (Y2>65535)   Y2=65535; \
 501             else if (Y2<0)Y2=0;   \
 502         }
 503
 504 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
 505     YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
 506     r = (type *)c->table_rV[V];   \
 507     g = (type *)(c->table_gU[U] + c->table_gV[V]); \
 508     b = (type *)c->table_bU[U];
 509
 510 #define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
 511     for (i=0; i<(dstW>>1); i++) { \
 512         const int i2= 2*i;       \
 513         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
 514         int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
 515         int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;              \
 516         int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;              \
 517         type av_unused *r, *b, *g;                                    \
 518         int av_unused A1, A2;                                         \
 519         if (alpha) {\
 520             A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
 521             A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
 522         }
 523
 524 #define YSCALE_YUV_2_GRAY16_2_C   \
 525     for (i=0; i<(dstW>>1); i++) { \
 526         const int i2= 2*i;       \
 527         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
 528         int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
 529
 530 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
 531     YSCALE_YUV_2_PACKED2_C(type,alpha)\
 532     r = (type *)c->table_rV[V];\
 533     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
 534     b = (type *)c->table_bU[U];
 535
 536 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
 537     for (i=0; i<(dstW>>1); i++) {\
 538         const int i2= 2*i;\
 539         int Y1= buf0[i2  ]>>7;\
 540         int Y2= buf0[i2+1]>>7;\
 541         int U= (ubuf1[i])>>7;\
 542         int V= (vbuf1[i])>>7;\
 543         type av_unused *r, *b, *g;\
 544         int av_unused A1, A2;\
 545         if (alpha) {\
 546             A1= abuf0[i2  ]>>7;\
 547             A2= abuf0[i2+1]>>7;\
 548         }
 549
 550 #define YSCALE_YUV_2_GRAY16_1_C \
 551     for (i=0; i<(dstW>>1); i++) {\
 552         const int i2= 2*i;\
 553         int Y1= buf0[i2  ]<<1;\
 554         int Y2= buf0[i2+1]<<1;
 555
 556 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
 557     YSCALE_YUV_2_PACKED1_C(type,alpha)\
 558     r = (type *)c->table_rV[V];\
 559     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
 560     b = (type *)c->table_bU[U];
 561
 562 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
 563     for (i=0; i<(dstW>>1); i++) {\
 564         const int i2= 2*i;\
 565         int Y1= buf0[i2  ]>>7;\
 566         int Y2= buf0[i2+1]>>7;\
 567         int U= (ubuf0[i] + ubuf1[i])>>8;\
 568         int V= (vbuf0[i] + vbuf1[i])>>8;\
 569         type av_unused *r, *b, *g;\
 570         int av_unused A1, A2;\
 571         if (alpha) {\
 572             A1= abuf0[i2  ]>>7;\
 573             A2= abuf0[i2+1]>>7;\
 574         }
 575
 576 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
 577     YSCALE_YUV_2_PACKED1B_C(type,alpha)\
 578     r = (type *)c->table_rV[V];\
 579     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
 580     b = (type *)c->table_bU[U];
 581
 582 #define YSCALE_YUV_2_MONO2_C \
 583     const uint8_t * const d128=dither_8x8_220[y&7];\
 584     uint8_t *g= c->table_gU[128] + c->table_gV[128];\
 585     for (i=0; i<dstW-7; i+=8) {\
 586         int acc;\
 587         acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
 588         acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
 589         acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
 590         acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
 591         acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
 592         acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
 593         acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
 594         acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
 595         ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
 596         dest++;\
 597     }
 598
 599 #define YSCALE_YUV_2_MONOX_C \
 600     const uint8_t * const d128=dither_8x8_220[y&7];\
 601     uint8_t *g= c->table_gU[128] + c->table_gV[128];\
 602     int acc=0;\
 603     for (i=0; i<dstW-1; i+=2) {\
 604         int j;\
 605         int Y1=1<<18;\
 606         int Y2=1<<18;\
 607 \
 608         for (j=0; j<lumFilterSize; j++) {\
 609             Y1 += lumSrc[j][i] * lumFilter[j];\
 610             Y2 += lumSrc[j][i+1] * lumFilter[j];\
 611         }\
 612         Y1>>=19;\
 613         Y2>>=19;\
 614         if ((Y1|Y2)&256) {\
 615             if (Y1>255)   Y1=255;\
 616             else if (Y1<0)Y1=0;\
 617             if (Y2>255)   Y2=255;\
 618             else if (Y2<0)Y2=0;\
 619         }\
 620         acc+= acc + g[Y1+d128[(i+0)&7]];\
 621         acc+= acc + g[Y2+d128[(i+1)&7]];\
 622         if ((i&7)==6) {\
 623             ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
 624             dest++;\
 625         }\
 626     }
 627
 628 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
 629     switch(c->dstFormat) {\
 630     case PIX_FMT_RGB48BE:\
 631     case PIX_FMT_RGB48LE:\
 632         func(uint8_t,0)\
 633             ((uint8_t*)dest)[ 0]= r[Y1];\
 634             ((uint8_t*)dest)[ 1]= r[Y1];\
 635             ((uint8_t*)dest)[ 2]= g[Y1];\
 636             ((uint8_t*)dest)[ 3]= g[Y1];\
 637             ((uint8_t*)dest)[ 4]= b[Y1];\
 638             ((uint8_t*)dest)[ 5]= b[Y1];\
 639             ((uint8_t*)dest)[ 6]= r[Y2];\
 640             ((uint8_t*)dest)[ 7]= r[Y2];\
 641             ((uint8_t*)dest)[ 8]= g[Y2];\
 642             ((uint8_t*)dest)[ 9]= g[Y2];\
 643             ((uint8_t*)dest)[10]= b[Y2];\
 644             ((uint8_t*)dest)[11]= b[Y2];\
 645             dest+=12;\
 646         }\
 647         break;\
 648     case PIX_FMT_BGR48BE:\
 649     case PIX_FMT_BGR48LE:\
 650         func(uint8_t,0)\
 651             ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
 652             ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
 653             ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
 654             ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
 655             ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
 656             ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
 657             dest+=12;\
 658         }\
 659         break;\
 660     case PIX_FMT_RGBA:\
 661     case PIX_FMT_BGRA:\
 662         if (CONFIG_SMALL) {\
 663             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
 664             func(uint32_t,needAlpha)\
 665                 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
 666                 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
 667             }\
 668         } else {\
 669             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
 670                 func(uint32_t,1)\
 671                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
 672                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
 673                 }\
 674             } else {\
 675                 func(uint32_t,0)\
 676                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
 677                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
 678                 }\
 679             }\
 680         }\
 681         break;\
 682     case PIX_FMT_ARGB:\
 683     case PIX_FMT_ABGR:\
 684         if (CONFIG_SMALL) {\
 685             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
 686             func(uint32_t,needAlpha)\
 687                 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
 688                 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
 689             }\
 690         } else {\
 691             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
 692                 func(uint32_t,1)\
 693                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
 694                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
 695                 }\
 696             } else {\
 697                 func(uint32_t,0)\
 698                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
 699                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
 700                 }\
 701             }\
 702         }                \
 703         break;\
 704     case PIX_FMT_RGB24:\
 705         func(uint8_t,0)\
 706             ((uint8_t*)dest)[0]= r[Y1];\
 707             ((uint8_t*)dest)[1]= g[Y1];\
 708             ((uint8_t*)dest)[2]= b[Y1];\
 709             ((uint8_t*)dest)[3]= r[Y2];\
 710             ((uint8_t*)dest)[4]= g[Y2];\
 711             ((uint8_t*)dest)[5]= b[Y2];\
 712             dest+=6;\
 713         }\
 714         break;\
 715     case PIX_FMT_BGR24:\
 716         func(uint8_t,0)\
 717             ((uint8_t*)dest)[0]= b[Y1];\
 718             ((uint8_t*)dest)[1]= g[Y1];\
 719             ((uint8_t*)dest)[2]= r[Y1];\
 720             ((uint8_t*)dest)[3]= b[Y2];\
 721             ((uint8_t*)dest)[4]= g[Y2];\
 722             ((uint8_t*)dest)[5]= r[Y2];\
 723             dest+=6;\
 724         }\
 725         break;\
 726     case PIX_FMT_RGB565BE:\
 727     case PIX_FMT_RGB565LE:\
 728     case PIX_FMT_BGR565BE:\
 729     case PIX_FMT_BGR565LE:\
 730         {\
 731             const int dr1= dither_2x2_8[y&1    ][0];\
 732             const int dg1= dither_2x2_4[y&1    ][0];\
 733             const int db1= dither_2x2_8[(y&1)^1][0];\
 734             const int dr2= dither_2x2_8[y&1    ][1];\
 735             const int dg2= dither_2x2_4[y&1    ][1];\
 736             const int db2= dither_2x2_8[(y&1)^1][1];\
 737             func(uint16_t,0)\
 738                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
 739                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
 740             }\
 741         }\
 742         break;\
 743     case PIX_FMT_RGB555BE:\
 744     case PIX_FMT_RGB555LE:\
 745     case PIX_FMT_BGR555BE:\
 746     case PIX_FMT_BGR555LE:\
 747         {\
 748             const int dr1= dither_2x2_8[y&1    ][0];\
 749             const int dg1= dither_2x2_8[y&1    ][1];\
 750             const int db1= dither_2x2_8[(y&1)^1][0];\
 751             const int dr2= dither_2x2_8[y&1    ][1];\
 752             const int dg2= dither_2x2_8[y&1    ][0];\
 753             const int db2= dither_2x2_8[(y&1)^1][1];\
 754             func(uint16_t,0)\
 755                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
 756                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
 757             }\
 758         }\
 759         break;\
 760     case PIX_FMT_RGB444BE:\
 761     case PIX_FMT_RGB444LE:\
 762     case PIX_FMT_BGR444BE:\
 763     case PIX_FMT_BGR444LE:\
 764         {\
 765             const int dr1= dither_4x4_16[y&3    ][0];\
 766             const int dg1= dither_4x4_16[y&3    ][1];\
 767             const int db1= dither_4x4_16[(y&3)^3][0];\
 768             const int dr2= dither_4x4_16[y&3    ][1];\
 769             const int dg2= dither_4x4_16[y&3    ][0];\
 770             const int db2= dither_4x4_16[(y&3)^3][1];\
 771             func(uint16_t,0)\
 772                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
 773                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
 774             }\
 775         }\
 776         break;\
 777     case PIX_FMT_RGB8:\
 778     case PIX_FMT_BGR8:\
 779         {\
 780             const uint8_t * const d64= dither_8x8_73[y&7];\
 781             const uint8_t * const d32= dither_8x8_32[y&7];\
 782             func(uint8_t,0)\
 783                 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
 784                 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
 785             }\
 786         }\
 787         break;\
 788     case PIX_FMT_RGB4:\
 789     case PIX_FMT_BGR4:\
 790         {\
 791             const uint8_t * const d64= dither_8x8_73 [y&7];\
 792             const uint8_t * const d128=dither_8x8_220[y&7];\
 793             func(uint8_t,0)\
 794                 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
 795                                  + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
 796             }\
 797         }\
 798         break;\
 799     case PIX_FMT_RGB4_BYTE:\
 800     case PIX_FMT_BGR4_BYTE:\
 801         {\
 802             const uint8_t * const d64= dither_8x8_73 [y&7];\
 803             const uint8_t * const d128=dither_8x8_220[y&7];\
 804             func(uint8_t,0)\
 805                 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
 806                 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
 807             }\
 808         }\
 809         break;\
 810     case PIX_FMT_MONOBLACK:\
 811     case PIX_FMT_MONOWHITE:\
 812         {\
 813             func_monoblack\
 814         }\
 815         break;\
 816     case PIX_FMT_YUYV422:\
 817         func2\
 818             ((uint8_t*)dest)[2*i2+0]= Y1;\
 819             ((uint8_t*)dest)[2*i2+1]= U;\
 820             ((uint8_t*)dest)[2*i2+2]= Y2;\
 821             ((uint8_t*)dest)[2*i2+3]= V;\
 822         }                \
 823         break;\
 824     case PIX_FMT_UYVY422:\
 825         func2\
 826             ((uint8_t*)dest)[2*i2+0]= U;\
 827             ((uint8_t*)dest)[2*i2+1]= Y1;\
 828             ((uint8_t*)dest)[2*i2+2]= V;\
 829             ((uint8_t*)dest)[2*i2+3]= Y2;\
 830         }                \
 831         break;\
 832     case PIX_FMT_GRAY16BE:\
 833         func_g16\
 834             ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
 835             ((uint8_t*)dest)[2*i2+1]= Y1;\
 836             ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
 837             ((uint8_t*)dest)[2*i2+3]= Y2;\
 838         }                \
 839         break;\
 840     case PIX_FMT_GRAY16LE:\
 841         func_g16\
 842             ((uint8_t*)dest)[2*i2+0]= Y1;\
 843             ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
 844             ((uint8_t*)dest)[2*i2+2]= Y2;\
 845             ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
 846         }                \
 847         break;\
 848     }
 849
 850 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
 851                           const int16_t **lumSrc, int lumFilterSize,
 852                           const int16_t *chrFilter, const int16_t **chrUSrc,
 853                           const int16_t **chrVSrc, int chrFilterSize,
 854                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 855 {
 856     int i;
 857     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
 858 }
 859
 860 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
 861                                    const int16_t **lumSrc, int lumFilterSize,
 862                                    const int16_t *chrFilter, const int16_t **chrUSrc,
 863                                    const int16_t **chrVSrc, int chrFilterSize,
 864                                    const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 865 {
 866     int i;
 867     int step= c->dstFormatBpp/8;
 868     int aidx= 3;
 869
 870     switch(c->dstFormat) {
 871     case PIX_FMT_ARGB:
 872         dest++;
 873         aidx= 0;
 874     case PIX_FMT_RGB24:
 875         aidx--;
 876     case PIX_FMT_RGBA:
 877         if (CONFIG_SMALL) {
 878             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
 879             YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
 880                 dest[aidx]= needAlpha ? A : 255;
 881                 dest[0]= R>>22;
 882                 dest[1]= G>>22;
 883                 dest[2]= B>>22;
 884                 dest+= step;
 885             }
 886         } else {
 887             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 888                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
 889                     dest[aidx]= A;
 890                     dest[0]= R>>22;
 891                     dest[1]= G>>22;
 892                     dest[2]= B>>22;
 893                     dest+= step;
 894                 }
 895             } else {
 896                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
 897                     dest[aidx]= 255;
 898                     dest[0]= R>>22;
 899                     dest[1]= G>>22;
 900                     dest[2]= B>>22;
 901                     dest+= step;
 902                 }
 903             }
 904         }
 905         break;
 906     case PIX_FMT_ABGR:
 907         dest++;
 908         aidx= 0;
 909     case PIX_FMT_BGR24:
 910         aidx--;
 911     case PIX_FMT_BGRA:
 912         if (CONFIG_SMALL) {
 913             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
 914             YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
 915                 dest[aidx]= needAlpha ? A : 255;
 916                 dest[0]= B>>22;
 917                 dest[1]= G>>22;
 918                 dest[2]= R>>22;
 919                 dest+= step;
 920             }
 921         } else {
 922             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 923                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
 924                     dest[aidx]= A;
 925                     dest[0]= B>>22;
 926                     dest[1]= G>>22;
 927                     dest[2]= R>>22;
 928                     dest+= step;
 929                 }
 930             } else {
 931                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
 932                     dest[aidx]= 255;
 933                     dest[0]= B>>22;
 934                     dest[1]= G>>22;
 935                     dest[2]= R>>22;
 936                     dest+= step;
 937                 }
 938             }
 939         }
 940         break;
 941     default:
 942         assert(0);
 943     }
 944 }
 945
 946 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
 947 {
 948     int i;
 949     uint8_t *ptr = plane + stride*y;
 950     for (i=0; i<height; i++) {
 951         memset(ptr, val, width);
 952         ptr += stride;
 953     }
 954 }
 955
 956 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
 957 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
 958                        uint8_t *dst, const uint8_t *src, int width, \
 959                        uint32_t *unused) \
 960 { \
 961     int i; \
 962     for (i = 0; i < width; i++) { \
 963         int compA = rfunc(&src[i*6+0]) >> 8; \
 964         int compB = rfunc(&src[i*6+2]) >> 8; \
 965         int compC = rfunc(&src[i*6+4]) >> 8; \
 966  \
 967         dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
 968     } \
 969 } \
 970  \
 971 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
 972                         uint8_t *dstU, uint8_t *dstV, \
 973                         const uint8_t *src1, const uint8_t *src2, \
 974                         int width, uint32_t *unused) \
 975 { \
 976     int i; \
 977     assert(src1==src2); \
 978     for (i = 0; i < width; i++) { \
 979         int compA = rfunc(&src1[6*i + 0]) >> 8; \
 980         int compB = rfunc(&src1[6*i + 2]) >> 8; \
 981         int compC = rfunc(&src1[6*i + 4]) >> 8; \
 982  \
 983         dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
 984         dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
 985     } \
 986 } \
 987  \
 988 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
 989                             uint8_t *dstU, uint8_t *dstV, \
 990                             const uint8_t *src1, const uint8_t *src2, \
 991                             int width, uint32_t *unused) \
 992 { \
 993     int i; \
 994     assert(src1==src2); \
 995     for (i = 0; i < width; i++) { \
 996         int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
 997         int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
 998         int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
 999  \
1000         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1001         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1002     } \
1003 }
1004 rgb48funcs(LE, AV_RL16, r, g, b);
1005 rgb48funcs(BE, AV_RB16, r, g, b);
1006 rgb48funcs(LE, AV_RL16, b, g, r);
1007 rgb48funcs(BE, AV_RB16, b, g, r);
1008
1009 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1010 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1011                        int width, uint32_t *unused)\
1012 {\
1013     int i;\
1014     for (i=0; i<width; i++) {\
1015         int b= (((const type*)src)[i]>>shb)&maskb;\
1016         int g= (((const type*)src)[i]>>shg)&maskg;\
1017         int r= (((const type*)src)[i]>>shr)&maskr;\
1018 \
1019         dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1020     }\
1021 }
1022
1023 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1024 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY    , GY<<8, BY    , RGB2YUV_SHIFT+8)
1025 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1026 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY    , GY<<8, BY    , RGB2YUV_SHIFT+8)
1027 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
1028 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
1029 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1030 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1031
1032 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1033 {
1034     int i;
1035     for (i=0; i<width; i++) {
1036         dst[i]= src[4*i];
1037     }
1038 }
1039
1040 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1041 {
1042     int i;
1043     for (i=0; i<width; i++) {
1044         dst[i]= src[4*i+3];
1045     }
1046 }
1047
1048 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1049 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1050                        const uint8_t *src, const uint8_t *dummy, \
1051                        int width, uint32_t *unused)\
1052 {\
1053     int i;\
1054     for (i=0; i<width; i++) {\
1055         int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1056         int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1057         int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1058 \
1059         dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1060         dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1061     }\
1062 }\
1063 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1064                             const uint8_t *src, const uint8_t *dummy, \
1065                             int width, uint32_t *unused)\
1066 {\
1067     int i;\
1068     for (i=0; i<width; i++) {\
1069         int pix0= ((const type*)src)[2*i+0]>>shp;\
1070         int pix1= ((const type*)src)[2*i+1]>>shp;\
1071         int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1072         int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1073         int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1074         g&= maskg|(2*maskg);\
1075 \
1076         g>>=shg;\
1077 \
1078         dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1079         dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1080     }\
1081 }
1082
1083 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1084 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1085 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1086 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1087 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
1088 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
1089 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1090 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1091
1092 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1093 {
1094     int i;
1095     for (i=0; i<width; i++) {
1096         int d= src[i];
1097
1098         dst[i]= pal[d] & 0xFF;
1099     }
1100 }
1101
1102 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1103                       const uint8_t *src1, const uint8_t *src2,
1104                       int width, uint32_t *pal)
1105 {
1106     int i;
1107     assert(src1 == src2);
1108     for (i=0; i<width; i++) {
1109         int p= pal[src1[i]];
1110
1111         dstU[i]= p>>8;
1112         dstV[i]= p>>16;
1113     }
1114 }
1115
1116 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1117                           int width, uint32_t *unused)
1118 {
1119     int i, j;
1120     for (i=0; i<width/8; i++) {
1121         int d= ~src[i];
1122         for(j=0; j<8; j++)
1123             dst[8*i+j]= ((d>>(7-j))&1)*255;
1124     }
1125 }
1126
1127 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1128                           int width, uint32_t *unused)
1129 {
1130     int i, j;
1131     for (i=0; i<width/8; i++) {
1132         int d= src[i];
1133         for(j=0; j<8; j++)
1134             dst[8*i+j]= ((d>>(7-j))&1)*255;
1135     }
1136 }
1137
1138 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1139                        const int16_t *chrUSrc, const int16_t *chrVSrc,
1140                        const int16_t *alpSrc,
1141                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1142                        uint8_t *aDest, int dstW, int chrDstW)
1143 {
1144     int i;
1145     for (i=0; i<dstW; i++) {
1146         int val= (lumSrc[i]+64)>>7;
1147         dest[i]= av_clip_uint8(val);
1148     }
1149
1150     if (uDest)
1151         for (i=0; i<chrDstW; i++) {
1152             int u=(chrUSrc[i]+64)>>7;
1153             int v=(chrVSrc[i]+64)>>7;
1154             uDest[i]= av_clip_uint8(u);
1155             vDest[i]= av_clip_uint8(v);
1156         }
1157
1158     if (CONFIG_SWSCALE_ALPHA && aDest)
1159         for (i=0; i<dstW; i++) {
1160             int val= (alpSrc[i]+64)>>7;
1161             aDest[i]= av_clip_uint8(val);
1162         }
1163 }
1164
1165 /**
1166  * vertical bilinear scale YV12 to RGB
1167  */
1168 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1169                           const uint16_t *buf1, const uint16_t *ubuf0,
1170                           const uint16_t *ubuf1, const uint16_t *vbuf0,
1171                           const uint16_t *vbuf1, const uint16_t *abuf0,
1172                           const uint16_t *abuf1, uint8_t *dest, int dstW,
1173                           int yalpha, int uvalpha, int y)
1174 {
1175     int  yalpha1=4095- yalpha;
1176     int uvalpha1=4095-uvalpha;
1177     int i;
1178
1179     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1180 }
1181
1182 /**
1183  * YV12 to RGB without scaling or interpolating
1184  */
1185 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1186                           const uint16_t *ubuf0, const uint16_t *ubuf1,
1187                           const uint16_t *vbuf0, const uint16_t *vbuf1,
1188                           const uint16_t *abuf0, uint8_t *dest, int dstW,
1189                           int uvalpha, enum PixelFormat dstFormat,
1190                           int flags, int y)
1191 {
1192     const int yalpha1=0;
1193     int i;
1194
1195     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1196     const int yalpha= 4096; //FIXME ...
1197
1198     if (uvalpha < 2048) {
1199         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1200     } else {
1201         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1202     }
1203 }
1204
1205 //FIXME yuy2* can read up to 7 samples too much
1206
1207 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1208                       uint32_t *unused)
1209 {
1210     int i;
1211     for (i=0; i<width; i++)
1212         dst[i]= src[2*i];
1213 }
1214
1215 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1216                        const uint8_t *src2, int width, uint32_t *unused)
1217 {
1218     int i;
1219     for (i=0; i<width; i++) {
1220         dstU[i]= src1[4*i + 1];
1221         dstV[i]= src1[4*i + 3];
1222     }
1223     assert(src1 == src2);
1224 }
1225
1226 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1227                      const uint8_t *src2, int width, uint32_t *unused)
1228 {
1229     int i;
1230     for (i=0; i<width; i++) {
1231         dstU[i]= src1[2*i + 1];
1232         dstV[i]= src2[2*i + 1];
1233     }
1234 }
1235
1236 /* This is almost identical to the previous, end exists only because
1237  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1238 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1239                       uint32_t *unused)
1240 {
1241     int i;
1242     for (i=0; i<width; i++)
1243         dst[i]= src[2*i+1];
1244 }
1245
1246 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1247                        const uint8_t *src2, int width, uint32_t *unused)
1248 {
1249     int i;
1250     for (i=0; i<width; i++) {
1251         dstU[i]= src1[4*i + 0];
1252         dstV[i]= src1[4*i + 2];
1253     }
1254     assert(src1 == src2);
1255 }
1256
1257 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1258                      const uint8_t *src2, int width, uint32_t *unused)
1259 {
1260     int i;
1261     for (i=0; i<width; i++) {
1262         dstU[i]= src1[2*i];
1263         dstV[i]= src2[2*i];
1264     }
1265 }
1266
1267 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1268                                         const uint8_t *src, int width)
1269 {
1270     int i;
1271     for (i = 0; i < width; i++) {
1272         dst1[i] = src[2*i+0];
1273         dst2[i] = src[2*i+1];
1274     }
1275 }
1276
1277 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1278                        const uint8_t *src1, const uint8_t *src2,
1279                        int width, uint32_t *unused)
1280 {
1281     nvXXtoUV_c(dstU, dstV, src1, width);
1282 }
1283
1284 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1285                        const uint8_t *src1, const uint8_t *src2,
1286                        int width, uint32_t *unused)
1287 {
1288     nvXXtoUV_c(dstV, dstU, src1, width);
1289 }
1290
1291 // FIXME Maybe dither instead.
1292 #define YUV_NBPS(depth, endianness, rfunc) \
1293 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1294                                           const uint8_t *_srcU, const uint8_t *_srcV, \
1295                                           int width, uint32_t *unused) \
1296 { \
1297     int i; \
1298     const uint16_t *srcU = (const uint16_t*)_srcU; \
1299     const uint16_t *srcV = (const uint16_t*)_srcV; \
1300     for (i = 0; i < width; i++) { \
1301         dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1302         dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1303     } \
1304 } \
1305 \
1306 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1307                                          int width, uint32_t *unused) \
1308 { \
1309     int i; \
1310     const uint16_t *srcY = (const uint16_t*)_srcY; \
1311     for (i = 0; i < width; i++) \
1312         dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1313 } \
1314
1315 YUV_NBPS( 9, LE, AV_RL16)
1316 YUV_NBPS( 9, BE, AV_RB16)
1317 YUV_NBPS(10, LE, AV_RL16)
1318 YUV_NBPS(10, BE, AV_RB16)
1319
1320 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1321                        int width, uint32_t *unused)
1322 {
1323     int i;
1324     for (i=0; i<width; i++) {
1325         int b= src[i*3+0];
1326         int g= src[i*3+1];
1327         int r= src[i*3+2];
1328
1329         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1330     }
1331 }
1332
1333 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1334                         const uint8_t *src2, int width, uint32_t *unused)
1335 {
1336     int i;
1337     for (i=0; i<width; i++) {
1338         int b= src1[3*i + 0];
1339         int g= src1[3*i + 1];
1340         int r= src1[3*i + 2];
1341
1342         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1343         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1344     }
1345     assert(src1 == src2);
1346 }
1347
1348 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1349                              const uint8_t *src2, int width, uint32_t *unused)
1350 {
1351     int i;
1352     for (i=0; i<width; i++) {
1353         int b= src1[6*i + 0] + src1[6*i + 3];
1354         int g= src1[6*i + 1] + src1[6*i + 4];
1355         int r= src1[6*i + 2] + src1[6*i + 5];
1356
1357         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1358         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1359     }
1360     assert(src1 == src2);
1361 }
1362
1363 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1364                        uint32_t *unused)
1365 {
1366     int i;
1367     for (i=0; i<width; i++) {
1368         int r= src[i*3+0];
1369         int g= src[i*3+1];
1370         int b= src[i*3+2];
1371
1372         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1373     }
1374 }
1375
1376 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1377                         const uint8_t *src2, int width, uint32_t *unused)
1378 {
1379     int i;
1380     assert(src1==src2);
1381     for (i=0; i<width; i++) {
1382         int r= src1[3*i + 0];
1383         int g= src1[3*i + 1];
1384         int b= src1[3*i + 2];
1385
1386         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1387         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1388     }
1389 }
1390
1391 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1392                              const uint8_t *src2, int width, uint32_t *unused)
1393 {
1394     int i;
1395     assert(src1==src2);
1396     for (i=0; i<width; i++) {
1397         int r= src1[6*i + 0] + src1[6*i + 3];
1398         int g= src1[6*i + 1] + src1[6*i + 4];
1399         int b= src1[6*i + 2] + src1[6*i + 5];
1400
1401         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1402         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1403     }
1404 }
1405
1406
1407 // bilinear / bicubic scaling
1408 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1409                      int srcW, int xInc,
1410                      const int16_t *filter, const int16_t *filterPos,
1411                      int filterSize)
1412 {
1413     int i;
1414     for (i=0; i<dstW; i++) {
1415         int j;
1416         int srcPos= filterPos[i];
1417         int val=0;
1418         for (j=0; j<filterSize; j++) {
1419             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1420         }
1421         //filter += hFilterSize;
1422         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1423         //dst[i] = val>>7;
1424     }
1425 }
1426
1427 //FIXME all pal and rgb srcFormats could do this convertion as well
1428 //FIXME all scalers more complex than bilinear could do half of this transform
1429 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1430 {
1431     int i;
1432     for (i = 0; i < width; i++) {
1433         dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1434         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1435     }
1436 }
1437 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1438 {
1439     int i;
1440     for (i = 0; i < width; i++) {
1441         dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1442         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1443     }
1444 }
1445 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1446 {
1447     int i;
1448     for (i = 0; i < width; i++)
1449         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1450 }
1451 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1452 {
1453     int i;
1454     for (i = 0; i < width; i++)
1455         dst[i] = (dst[i]*14071 + 33561947)>>14;
1456 }
1457
1458 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1459                            const uint8_t *src, int srcW, int xInc)
1460 {
1461     int i;
1462     unsigned int xpos=0;
1463     for (i=0;i<dstWidth;i++) {
1464         register unsigned int xx=xpos>>16;
1465         register unsigned int xalpha=(xpos&0xFFFF)>>9;
1466         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1467         xpos+=xInc;
1468     }
1469 }
1470
1471 // *** horizontal scale Y line to temp buffer
1472 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1473                            const uint8_t *src, int srcW, int xInc,
1474                            const int16_t *hLumFilter,
1475                            const int16_t *hLumFilterPos, int hLumFilterSize,
1476                            uint8_t *formatConvBuffer,
1477                            uint32_t *pal, int isAlpha)
1478 {
1479     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1480     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1481
1482     if (toYV12) {
1483         toYV12(formatConvBuffer, src, srcW, pal);
1484         src= formatConvBuffer;
1485     }
1486
1487     if (!c->hyscale_fast) {
1488         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1489     } else { // fast bilinear upscale / crap downscale
1490         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1491     }
1492
1493     if (convertRange)
1494         convertRange(dst, dstWidth);
1495 }
1496
1497 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1498                            int dstWidth, const uint8_t *src1,
1499                            const uint8_t *src2, int srcW, int xInc)
1500 {
1501     int i;
1502     unsigned int xpos=0;
1503     for (i=0;i<dstWidth;i++) {
1504         register unsigned int xx=xpos>>16;
1505         register unsigned int xalpha=(xpos&0xFFFF)>>9;
1506         dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1507         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1508         xpos+=xInc;
1509     }
1510 }
1511
1512 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1513                            const uint8_t *src1, const uint8_t *src2,
1514                            int srcW, int xInc, const int16_t *hChrFilter,
1515                            const int16_t *hChrFilterPos, int hChrFilterSize,
1516                            uint8_t *formatConvBuffer, uint32_t *pal)
1517 {
1518     if (c->chrToYV12) {
1519         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1520         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1521         src1= formatConvBuffer;
1522         src2= buf2;
1523     }
1524
1525     if (!c->hcscale_fast) {
1526         c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1527         c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1528     } else { // fast bilinear upscale / crap downscale
1529         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1530     }
1531
1532     if (c->chrConvertRange)
1533         c->chrConvertRange(dst1, dst2, dstWidth);
1534 }
1535
1536 static av_always_inline void
1537 find_c_packed_planar_out_funcs(SwsContext *c,
1538                                yuv2planar1_fn *yuv2yuv1,    yuv2planarX_fn *yuv2yuvX,
1539                                yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1540                                yuv2packedX_fn *yuv2packedX)
1541 {
1542     enum PixelFormat dstFormat = c->dstFormat;
1543
1544     if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1545         *yuv2yuvX     = yuv2nv12X_c;
1546     } else if (is16BPS(dstFormat)) {
1547         *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
1548     } else if (is9_OR_10BPS(dstFormat)) {
1549         if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1550             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
1551         } else {
1552             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1553         }
1554     } else {
1555         *yuv2yuv1     = yuv2yuv1_c;
1556         *yuv2yuvX     = yuv2yuvX_c;
1557     }
1558     if(c->flags & SWS_FULL_CHR_H_INT) {
1559         *yuv2packedX = yuv2rgbX_c_full;
1560     } else {
1561         *yuv2packed1  = yuv2packed1_c;
1562         *yuv2packed2  = yuv2packed2_c;
1563         *yuv2packedX  = yuv2packedX_c;
1564     }
1565 }
1566
1567 #define DEBUG_SWSCALE_BUFFERS 0
1568 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1569
1570 static int swScale(SwsContext *c, const uint8_t* src[],
1571                    int srcStride[], int srcSliceY,
1572                    int srcSliceH, uint8_t* dst[], int dstStride[])
1573 {
1574     /* load a few things into local vars to make the code more readable? and faster */
1575     const int srcW= c->srcW;
1576     const int dstW= c->dstW;
1577     const int dstH= c->dstH;
1578     const int chrDstW= c->chrDstW;
1579     const int chrSrcW= c->chrSrcW;
1580     const int lumXInc= c->lumXInc;
1581     const int chrXInc= c->chrXInc;
1582     const enum PixelFormat dstFormat= c->dstFormat;
1583     const int flags= c->flags;
1584     int16_t *vLumFilterPos= c->vLumFilterPos;
1585     int16_t *vChrFilterPos= c->vChrFilterPos;
1586     int16_t *hLumFilterPos= c->hLumFilterPos;
1587     int16_t *hChrFilterPos= c->hChrFilterPos;
1588     int16_t *vLumFilter= c->vLumFilter;
1589     int16_t *vChrFilter= c->vChrFilter;
1590     int16_t *hLumFilter= c->hLumFilter;
1591     int16_t *hChrFilter= c->hChrFilter;
1592     int32_t *lumMmxFilter= c->lumMmxFilter;
1593     int32_t *chrMmxFilter= c->chrMmxFilter;
1594     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1595     const int vLumFilterSize= c->vLumFilterSize;
1596     const int vChrFilterSize= c->vChrFilterSize;
1597     const int hLumFilterSize= c->hLumFilterSize;
1598     const int hChrFilterSize= c->hChrFilterSize;
1599     int16_t **lumPixBuf= c->lumPixBuf;
1600     int16_t **chrUPixBuf= c->chrUPixBuf;
1601     int16_t **chrVPixBuf= c->chrVPixBuf;
1602     int16_t **alpPixBuf= c->alpPixBuf;
1603     const int vLumBufSize= c->vLumBufSize;
1604     const int vChrBufSize= c->vChrBufSize;
1605     uint8_t *formatConvBuffer= c->formatConvBuffer;
1606     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1607     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1608     int lastDstY;
1609     uint32_t *pal=c->pal_yuv;
1610     yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1611     yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1612     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1613     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1614     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1615
1616     /* vars which will change and which we need to store back in the context */
1617     int dstY= c->dstY;
1618     int lumBufIndex= c->lumBufIndex;
1619     int chrBufIndex= c->chrBufIndex;
1620     int lastInLumBuf= c->lastInLumBuf;
1621     int lastInChrBuf= c->lastInChrBuf;
1622
1623     if (isPacked(c->srcFormat)) {
1624         src[0]=
1625         src[1]=
1626         src[2]=
1627         src[3]= src[0];
1628         srcStride[0]=
1629         srcStride[1]=
1630         srcStride[2]=
1631         srcStride[3]= srcStride[0];
1632     }
1633     srcStride[1]<<= c->vChrDrop;
1634     srcStride[2]<<= c->vChrDrop;
1635
1636     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1637                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1638                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1639     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1640                    srcSliceY,    srcSliceH,    dstY,    dstH);
1641     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1642                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
1643
1644     if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1645         static int warnedAlready=0; //FIXME move this into the context perhaps
1646         if (flags & SWS_PRINT_INFO && !warnedAlready) {
1647             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1648                    "         ->cannot do aligned memory accesses anymore\n");
1649             warnedAlready=1;
1650         }
1651     }
1652
1653     /* Note the user might start scaling the picture in the middle so this
1654        will not get executed. This is not really intended but works
1655        currently, so people might do it. */
1656     if (srcSliceY ==0) {
1657         lumBufIndex=-1;
1658         chrBufIndex=-1;
1659         dstY=0;
1660         lastInLumBuf= -1;
1661         lastInChrBuf= -1;
1662     }
1663
1664     lastDstY= dstY;
1665
1666     for (;dstY < dstH; dstY++) {
1667         unsigned char *dest =dst[0]+dstStride[0]*dstY;
1668         const int chrDstY= dstY>>c->chrDstVSubSample;
1669         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1670         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1671         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1672
1673         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1674         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1675         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1676         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1677         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1678         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1679         int enough_lines;
1680
1681         //handle holes (FAST_BILINEAR & weird filters)
1682         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1683         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1684         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1685         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1686
1687         DEBUG_BUFFERS("dstY: %d\n", dstY);
1688         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1689                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
1690         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1691                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
1692
1693         // Do we have enough lines in this slice to output the dstY line
1694         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1695
1696         if (!enough_lines) {
1697             lastLumSrcY = srcSliceY + srcSliceH - 1;
1698             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1699             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1700                                             lastLumSrcY, lastChrSrcY);
1701         }
1702
1703         //Do horizontal scaling
1704         while(lastInLumBuf < lastLumSrcY) {
1705             const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1706             const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1707             lumBufIndex++;
1708             assert(lumBufIndex < 2*vLumBufSize);
1709             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1710             assert(lastInLumBuf + 1 - srcSliceY >= 0);
1711             hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1712                     hLumFilter, hLumFilterPos, hLumFilterSize,
1713                     formatConvBuffer,
1714                     pal, 0);
1715             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1716                 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1717                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1718                         formatConvBuffer,
1719                         pal, 1);
1720             lastInLumBuf++;
1721             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1722                                lumBufIndex,    lastInLumBuf);
1723         }
1724         while(lastInChrBuf < lastChrSrcY) {
1725             const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1726             const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1727             chrBufIndex++;
1728             assert(chrBufIndex < 2*vChrBufSize);
1729             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1730             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1731             //FIXME replace parameters through context struct (some at least)
1732
1733             if (c->needs_hcscale)
1734                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1735                           chrDstW, src1, src2, chrSrcW, chrXInc,
1736                           hChrFilter, hChrFilterPos, hChrFilterSize,
1737                           formatConvBuffer, pal);
1738             lastInChrBuf++;
1739             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1740                                chrBufIndex,    lastInChrBuf);
1741         }
1742         //wrap buf index around to stay inside the ring buffer
1743         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1744         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1745         if (!enough_lines)
1746             break; //we can't output a dstY line so let's try with the next slice
1747
1748 #if HAVE_MMX
1749         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1750 #endif
1751         if (dstY >= dstH-2) {
1752             // hmm looks like we can't use MMX here without overwriting this array's tail
1753             find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1754                                            &yuv2packed1, &yuv2packed2,
1755                                            &yuv2packedX);
1756         }
1757
1758         {
1759             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1760             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1761             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1762             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1763             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1764                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1765                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1766                 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1767                     const int16_t *lumBuf = lumSrcPtr[0];
1768                     const int16_t *chrUBuf= chrUSrcPtr[0];
1769                     const int16_t *chrVBuf= chrVSrcPtr[0];
1770                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1771                     yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1772                                 uDest, vDest, aDest, dstW, chrDstW);
1773                 } else { //General YV12
1774                     yuv2yuvX(c,
1775                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
1776                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1777                                 chrVSrcPtr, vChrFilterSize,
1778                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1779                 }
1780             } else {
1781                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
1782                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1783                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1784                     int chrAlpha= vChrFilter[2*dstY+1];
1785                     yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1786                                    *chrVSrcPtr, *(chrVSrcPtr+1),
1787                                    alpPixBuf ? *alpSrcPtr : NULL,
1788                                    dest, dstW, chrAlpha, dstFormat, flags, dstY);
1789                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1790                     int lumAlpha= vLumFilter[2*dstY+1];
1791                     int chrAlpha= vChrFilter[2*dstY+1];
1792                     lumMmxFilter[2]=
1793                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
1794                     chrMmxFilter[2]=
1795                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1796                     yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1797                                    *chrVSrcPtr, *(chrVSrcPtr+1),
1798                                    alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1799                                    dest, dstW, lumAlpha, chrAlpha, dstY);
1800                 } else { //general RGB
1801                     yuv2packedX(c,
1802                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1803                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1804                                    alpSrcPtr, dest, dstW, dstY);
1805                 }
1806             }
1807         }
1808     }
1809
1810     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1811         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1812
1813 #if HAVE_MMX2
1814     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1815         __asm__ volatile("sfence":::"memory");
1816 #endif
1817     emms_c();
1818
1819     /* store changed local vars back in the context */
1820     c->dstY= dstY;
1821     c->lumBufIndex= lumBufIndex;
1822     c->chrBufIndex= chrBufIndex;
1823     c->lastInLumBuf= lastInLumBuf;
1824     c->lastInChrBuf= lastInChrBuf;
1825
1826     return dstY - lastDstY;
1827 }
1828
1829 static void sws_init_swScale_c(SwsContext *c)
1830 {
1831     enum PixelFormat srcFormat = c->srcFormat;
1832
1833     find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1834                                    &c->yuv2packed1, &c->yuv2packed2,
1835                                    &c->yuv2packedX);
1836
1837     c->hScale       = hScale_c;
1838
1839     if (c->flags & SWS_FAST_BILINEAR) {
1840         c->hyscale_fast = hyscale_fast_c;
1841         c->hcscale_fast = hcscale_fast_c;
1842     }
1843
1844     c->chrToYV12 = NULL;
1845     switch(srcFormat) {
1846         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
1847         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
1848         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
1849         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
1850         case PIX_FMT_RGB8     :
1851         case PIX_FMT_BGR8     :
1852         case PIX_FMT_PAL8     :
1853         case PIX_FMT_BGR4_BYTE:
1854         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1855         case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1856         case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1857         case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1858         case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1859         case PIX_FMT_YUV420P16BE:
1860         case PIX_FMT_YUV422P16BE:
1861         case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1862         case PIX_FMT_YUV420P16LE:
1863         case PIX_FMT_YUV422P16LE:
1864         case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1865     }
1866     if (c->chrSrcHSubSample) {
1867         switch(srcFormat) {
1868         case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1869         case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1870         case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1871         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1872         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half_c;  break;
1873         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1874         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
1875         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1876         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1877         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half_c;  break;
1878         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1879         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
1880         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1881         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1882         }
1883     } else {
1884         switch(srcFormat) {
1885         case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1886         case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1887         case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1888         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1889         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_c;  break;
1890         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1891         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
1892         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1893         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1894         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_c;  break;
1895         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1896         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
1897         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1898         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1899         }
1900     }
1901
1902     c->lumToYV12 = NULL;
1903     c->alpToYV12 = NULL;
1904     switch (srcFormat) {
1905     case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1906     case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1907     case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1908     case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1909     case PIX_FMT_YUYV422  :
1910     case PIX_FMT_YUV420P16BE:
1911     case PIX_FMT_YUV422P16BE:
1912     case PIX_FMT_YUV444P16BE:
1913     case PIX_FMT_Y400A    :
1914     case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1915     case PIX_FMT_UYVY422  :
1916     case PIX_FMT_YUV420P16LE:
1917     case PIX_FMT_YUV422P16LE:
1918     case PIX_FMT_YUV444P16LE:
1919     case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1920     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
1921     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY_c; break;
1922     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY_c; break;
1923     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
1924     case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY_c; break;
1925     case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY_c; break;
1926     case PIX_FMT_RGB8     :
1927     case PIX_FMT_BGR8     :
1928     case PIX_FMT_PAL8     :
1929     case PIX_FMT_BGR4_BYTE:
1930     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
1931     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
1932     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
1933     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
1934     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
1935     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
1936     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
1937     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
1938     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
1939     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
1940     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
1941     }
1942     if (c->alpPixBuf) {
1943         switch (srcFormat) {
1944         case PIX_FMT_BGRA:
1945         case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
1946         case PIX_FMT_ABGR:
1947         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
1948         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
1949         }
1950     }
1951
1952     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
1953         if (c->srcRange) {
1954             c->lumConvertRange = lumRangeFromJpeg_c;
1955             c->chrConvertRange = chrRangeFromJpeg_c;
1956         } else {
1957             c->lumConvertRange = lumRangeToJpeg_c;
1958             c->chrConvertRange = chrRangeToJpeg_c;
1959         }
1960     }
1961
1962     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
1963           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
1964         c->needs_hcscale = 1;
1965 }
1966
1967 SwsFunc ff_getSwsFunc(SwsContext *c)
1968 {
1969     sws_init_swScale_c(c);
1970
1971     if (HAVE_MMX)
1972         ff_sws_init_swScale_mmx(c);
1973     if (HAVE_ALTIVEC)
1974         ff_sws_init_swScale_altivec(c);
1975
1976     return swScale;
1977 }