git.sesse.net Git - ffmpeg/blob - libswscale/rgb2rgb_template.c

   1 /*
   2  * software RGB to RGB converter
   3  * pluralize by software PAL8 to RGB converter
   4  *              software YUV to YUV converter
   5  *              software YUV to RGB converter
   6  * Written by Nick Kurshev.
   7  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
   8  * lot of big-endian byte order fixes by Alex Beregszaszi
   9  *
  10  * This file is part of FFmpeg.
  11  *
  12  * FFmpeg is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * FFmpeg is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with FFmpeg; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 #include <stddef.h>
  28
  29 static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
  30 {
  31     uint8_t *dest = dst;
  32     const uint8_t *s = src;
  33     const uint8_t *end;
  34     end = s + src_size;
  35
  36     while (s < end) {
  37 #if HAVE_BIGENDIAN
  38         /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
  39         *dest++ = 255;
  40         *dest++ = s[2];
  41         *dest++ = s[1];
  42         *dest++ = s[0];
  43         s+=3;
  44 #else
  45         *dest++ = *s++;
  46         *dest++ = *s++;
  47         *dest++ = *s++;
  48         *dest++ = 255;
  49 #endif
  50     }
  51 }
  52
  53 static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
  54 {
  55     uint8_t *dest = dst;
  56     const uint8_t *s = src;
  57     const uint8_t *end;
  58
  59     end = s + src_size;
  60
  61     while (s < end) {
  62 #if HAVE_BIGENDIAN
  63         /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
  64         s++;
  65         dest[2] = *s++;
  66         dest[1] = *s++;
  67         dest[0] = *s++;
  68         dest += 3;
  69 #else
  70         *dest++ = *s++;
  71         *dest++ = *s++;
  72         *dest++ = *s++;
  73         s++;
  74 #endif
  75     }
  76 }
  77
  78 /*
  79  original by Strepto/Astral
  80  ported to gcc & bugfixed: A'rpi
  81  MMX2, 3DNOW optimization by Nick Kurshev
  82  32-bit C version, and and&add trick by Michael Niedermayer
  83 */
  84 static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
  85 {
  86     register const uint8_t* s=src;
  87     register uint8_t* d=dst;
  88     register const uint8_t *end;
  89     const uint8_t *mm_end;
  90     end = s + src_size;
  91     mm_end = end - 3;
  92     while (s < mm_end) {
  93         register unsigned x= *((const uint32_t *)s);
  94         *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
  95         d+=4;
  96         s+=4;
  97     }
  98     if (s < end) {
  99         register unsigned short x= *((const uint16_t *)s);
 100         *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
 101     }
 102 }
 103
 104 static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
 105 {
 106     register const uint8_t* s=src;
 107     register uint8_t* d=dst;
 108     register const uint8_t *end;
 109     const uint8_t *mm_end;
 110     end = s + src_size;
 111
 112     mm_end = end - 3;
 113     while (s < mm_end) {
 114         register uint32_t x= *((const uint32_t*)s);
 115         *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
 116         s+=4;
 117         d+=4;
 118     }
 119     if (s < end) {
 120         register uint16_t x= *((const uint16_t*)s);
 121         *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
 122     }
 123 }
 124
 125 static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
 126 {
 127     const uint8_t *s = src;
 128     const uint8_t *end;
 129     uint16_t *d = (uint16_t *)dst;
 130     end = s + src_size;
 131
 132     while (s < end) {
 133         register int rgb = *(const uint32_t*)s; s += 4;
 134         *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
 135     }
 136 }
 137
 138 static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
 139 {
 140     const uint8_t *s = src;
 141     const uint8_t *end;
 142     uint16_t *d = (uint16_t *)dst;
 143     end = s + src_size;
 144     while (s < end) {
 145         register int rgb = *(const uint32_t*)s; s += 4;
 146         *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
 147     }
 148 }
 149
 150 static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
 151 {
 152     const uint8_t *s = src;
 153     const uint8_t *end;
 154     uint16_t *d = (uint16_t *)dst;
 155     end = s + src_size;
 156     while (s < end) {
 157         register int rgb = *(const uint32_t*)s; s += 4;
 158         *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
 159     }
 160 }
 161
 162 static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
 163 {
 164     const uint8_t *s = src;
 165     const uint8_t *end;
 166     uint16_t *d = (uint16_t *)dst;
 167     end = s + src_size;
 168     while (s < end) {
 169         register int rgb = *(const uint32_t*)s; s += 4;
 170         *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
 171     }
 172 }
 173
 174 static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
 175 {
 176     const uint8_t *s = src;
 177     const uint8_t *end;
 178     uint16_t *d = (uint16_t *)dst;
 179     end = s + src_size;
 180     while (s < end) {
 181         const int b = *s++;
 182         const int g = *s++;
 183         const int r = *s++;
 184         *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
 185     }
 186 }
 187
 188 static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
 189 {
 190     const uint8_t *s = src;
 191     const uint8_t *end;
 192     uint16_t *d = (uint16_t *)dst;
 193     end = s + src_size;
 194     while (s < end) {
 195         const int r = *s++;
 196         const int g = *s++;
 197         const int b = *s++;
 198         *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
 199     }
 200 }
 201
 202 static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
 203 {
 204     const uint8_t *s = src;
 205     const uint8_t *end;
 206     uint16_t *d = (uint16_t *)dst;
 207     end = s + src_size;
 208     while (s < end) {
 209         const int b = *s++;
 210         const int g = *s++;
 211         const int r = *s++;
 212         *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
 213     }
 214 }
 215
 216 static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
 217 {
 218     const uint8_t *s = src;
 219     const uint8_t *end;
 220     uint16_t *d = (uint16_t *)dst;
 221     end = s + src_size;
 222     while (s < end) {
 223         const int r = *s++;
 224         const int g = *s++;
 225         const int b = *s++;
 226         *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
 227     }
 228 }
 229
 230 /*
 231   I use less accurate approximation here by simply left-shifting the input
 232   value and filling the low order bits with zeroes. This method improves PNG
 233   compression but this scheme cannot reproduce white exactly, since it does
 234   not generate an all-ones maximum value; the net effect is to darken the
 235   image slightly.
 236
 237   The better method should be "left bit replication":
 238
 239    4 3 2 1 0
 240    ---------
 241    1 1 0 1 1
 242
 243    7 6 5 4 3  2 1 0
 244    ----------------
 245    1 1 0 1 1  1 1 0
 246    |=======|  |===|
 247        |      leftmost bits repeated to fill open bits
 248        |
 249    original bits
 250 */
 251 static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 252 {
 253     const uint16_t *end;
 254     uint8_t *d = dst;
 255     const uint16_t *s = (const uint16_t*)src;
 256     end = s + src_size/2;
 257     while (s < end) {
 258         register uint16_t bgr;
 259         bgr = *s++;
 260         *d++ = (bgr&0x1F)<<3;
 261         *d++ = (bgr&0x3E0)>>2;
 262         *d++ = (bgr&0x7C00)>>7;
 263     }
 264 }
 265
 266 static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 267 {
 268     const uint16_t *end;
 269     uint8_t *d = (uint8_t *)dst;
 270     const uint16_t *s = (const uint16_t *)src;
 271     end = s + src_size/2;
 272     while (s < end) {
 273         register uint16_t bgr;
 274         bgr = *s++;
 275         *d++ = (bgr&0x1F)<<3;
 276         *d++ = (bgr&0x7E0)>>3;
 277         *d++ = (bgr&0xF800)>>8;
 278     }
 279 }
 280
 281 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
 282 {
 283     const uint16_t *end;
 284     uint8_t *d = dst;
 285     const uint16_t *s = (const uint16_t *)src;
 286     end = s + src_size/2;
 287     while (s < end) {
 288         register uint16_t bgr;
 289         bgr = *s++;
 290 #if HAVE_BIGENDIAN
 291         *d++ = 255;
 292         *d++ = (bgr&0x7C00)>>7;
 293         *d++ = (bgr&0x3E0)>>2;
 294         *d++ = (bgr&0x1F)<<3;
 295 #else
 296         *d++ = (bgr&0x1F)<<3;
 297         *d++ = (bgr&0x3E0)>>2;
 298         *d++ = (bgr&0x7C00)>>7;
 299         *d++ = 255;
 300 #endif
 301     }
 302 }
 303
 304 static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
 305 {
 306     const uint16_t *end;
 307     uint8_t *d = dst;
 308     const uint16_t *s = (const uint16_t*)src;
 309     end = s + src_size/2;
 310     while (s < end) {
 311         register uint16_t bgr;
 312         bgr = *s++;
 313 #if HAVE_BIGENDIAN
 314         *d++ = 255;
 315         *d++ = (bgr&0xF800)>>8;
 316         *d++ = (bgr&0x7E0)>>3;
 317         *d++ = (bgr&0x1F)<<3;
 318 #else
 319         *d++ = (bgr&0x1F)<<3;
 320         *d++ = (bgr&0x7E0)>>3;
 321         *d++ = (bgr&0xF800)>>8;
 322         *d++ = 255;
 323 #endif
 324     }
 325 }
 326
 327 static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
 328 {
 329     int idx = 15 - src_size;
 330     const uint8_t *s = src-idx;
 331     uint8_t *d = dst-idx;
 332     for (; idx<15; idx+=4) {
 333         register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
 334         v &= 0xff00ff;
 335         *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
 336     }
 337 }
 338
 339 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 340 {
 341     unsigned i;
 342     for (i=0; i<src_size; i+=3) {
 343         register uint8_t x;
 344         x          = src[i + 2];
 345         dst[i + 1] = src[i + 1];
 346         dst[i + 2] = src[i + 0];
 347         dst[i + 0] = x;
 348     }
 349 }
 350
 351 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 352                                      const uint8_t *vsrc, uint8_t *dst,
 353                                      long width, long height,
 354                                      long lumStride, long chromStride,
 355                                      long dstStride, long vertLumPerChroma)
 356 {
 357     long y;
 358     const int chromWidth = width >> 1;
 359     for (y=0; y<height; y++) {
 360 #if HAVE_FAST_64BIT
 361         int i;
 362         uint64_t *ldst = (uint64_t *) dst;
 363         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 364         for (i = 0; i < chromWidth; i += 2) {
 365             uint64_t k, l;
 366             k = yc[0] + (uc[0] << 8) +
 367                 (yc[1] << 16) + (vc[0] << 24);
 368             l = yc[2] + (uc[1] << 8) +
 369                 (yc[3] << 16) + (vc[1] << 24);
 370             *ldst++ = k + (l << 32);
 371             yc += 4;
 372             uc += 2;
 373             vc += 2;
 374         }
 375
 376 #else
 377         int i, *idst = (int32_t *) dst;
 378         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 379         for (i = 0; i < chromWidth; i++) {
 380 #if HAVE_BIGENDIAN
 381             *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
 382                 (yc[1] << 8) + (vc[0] << 0);
 383 #else
 384             *idst++ = yc[0] + (uc[0] << 8) +
 385                 (yc[1] << 16) + (vc[0] << 24);
 386 #endif
 387             yc += 2;
 388             uc++;
 389             vc++;
 390         }
 391 #endif
 392         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
 393             usrc += chromStride;
 394             vsrc += chromStride;
 395         }
 396         ysrc += lumStride;
 397         dst  += dstStride;
 398     }
 399 }
 400
 401 /**
 402  * Height should be a multiple of 2 and width should be a multiple of 16.
 403  * (If this is a problem for anyone then tell me, and I will fix it.)
 404  */
 405 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 406                                 const uint8_t *vsrc, uint8_t *dst,
 407                                 long width, long height,
 408                                 long lumStride, long chromStride,
 409                                 long dstStride)
 410 {
 411     //FIXME interpolate chroma
 412     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 413                       chromStride, dstStride, 2);
 414 }
 415
 416 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 417                                      const uint8_t *vsrc, uint8_t *dst,
 418                                      long width, long height,
 419                                      long lumStride, long chromStride,
 420                                      long dstStride, long vertLumPerChroma)
 421 {
 422     long y;
 423     const int chromWidth = width >> 1;
 424     for (y=0; y<height; y++) {
 425 #if HAVE_FAST_64BIT
 426         int i;
 427         uint64_t *ldst = (uint64_t *) dst;
 428         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 429         for (i = 0; i < chromWidth; i += 2) {
 430             uint64_t k, l;
 431             k = uc[0] + (yc[0] << 8) +
 432                 (vc[0] << 16) + (yc[1] << 24);
 433             l = uc[1] + (yc[2] << 8) +
 434                 (vc[1] << 16) + (yc[3] << 24);
 435             *ldst++ = k + (l << 32);
 436             yc += 4;
 437             uc += 2;
 438             vc += 2;
 439         }
 440
 441 #else
 442         int i, *idst = (int32_t *) dst;
 443         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 444         for (i = 0; i < chromWidth; i++) {
 445 #if HAVE_BIGENDIAN
 446             *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
 447                 (vc[0] << 8) + (yc[1] << 0);
 448 #else
 449             *idst++ = uc[0] + (yc[0] << 8) +
 450                (vc[0] << 16) + (yc[1] << 24);
 451 #endif
 452             yc += 2;
 453             uc++;
 454             vc++;
 455         }
 456 #endif
 457         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
 458             usrc += chromStride;
 459             vsrc += chromStride;
 460         }
 461         ysrc += lumStride;
 462         dst += dstStride;
 463     }
 464 }
 465
 466 /**
 467  * Height should be a multiple of 2 and width should be a multiple of 16
 468  * (If this is a problem for anyone then tell me, and I will fix it.)
 469  */
 470 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 471                                 const uint8_t *vsrc, uint8_t *dst,
 472                                 long width, long height,
 473                                 long lumStride, long chromStride,
 474                                 long dstStride)
 475 {
 476     //FIXME interpolate chroma
 477     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 478                       chromStride, dstStride, 2);
 479 }
 480
 481 /**
 482  * Width should be a multiple of 16.
 483  */
 484 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 485                                    const uint8_t *vsrc, uint8_t *dst,
 486                                    long width, long height,
 487                                    long lumStride, long chromStride,
 488                                    long dstStride)
 489 {
 490     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 491                       chromStride, dstStride, 1);
 492 }
 493
 494 /**
 495  * Width should be a multiple of 16.
 496  */
 497 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 498                                    const uint8_t *vsrc, uint8_t *dst,
 499                                    long width, long height,
 500                                    long lumStride, long chromStride,
 501                                    long dstStride)
 502 {
 503     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 504                       chromStride, dstStride, 1);
 505 }
 506
 507 /**
 508  * Height should be a multiple of 2 and width should be a multiple of 16.
 509  * (If this is a problem for anyone then tell me, and I will fix it.)
 510  */
 511 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
 512                                 uint8_t *udst, uint8_t *vdst,
 513                                 long width, long height,
 514                                 long lumStride, long chromStride,
 515                                 long srcStride)
 516 {
 517     long y;
 518     const int chromWidth = width >> 1;
 519     for (y=0; y<height; y+=2) {
 520         long i;
 521         for (i=0; i<chromWidth; i++) {
 522             ydst[2*i+0]     = src[4*i+0];
 523             udst[i]     = src[4*i+1];
 524             ydst[2*i+1]     = src[4*i+2];
 525             vdst[i]     = src[4*i+3];
 526         }
 527         ydst += lumStride;
 528         src  += srcStride;
 529
 530         for (i=0; i<chromWidth; i++) {
 531             ydst[2*i+0]     = src[4*i+0];
 532             ydst[2*i+1]     = src[4*i+2];
 533         }
 534         udst += chromStride;
 535         vdst += chromStride;
 536         ydst += lumStride;
 537         src  += srcStride;
 538     }
 539 }
 540
 541 static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
 542                               long srcHeight, long srcStride, long dstStride)
 543 {
 544     long x,y;
 545
 546     dst[0]= src[0];
 547
 548     // first line
 549     for (x=0; x<srcWidth-1; x++) {
 550         dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
 551         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
 552     }
 553     dst[2*srcWidth-1]= src[srcWidth-1];
 554
 555     dst+= dstStride;
 556
 557     for (y=1; y<srcHeight; y++) {
 558         const int mmxSize = 1;
 559
 560         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
 561         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
 562
 563         for (x=mmxSize-1; x<srcWidth-1; x++) {
 564             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
 565             dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
 566             dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
 567             dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
 568         }
 569         dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
 570         dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
 571
 572         dst+=dstStride*2;
 573         src+=srcStride;
 574     }
 575
 576     // last line
 577 #if 1
 578     dst[0]= src[0];
 579
 580     for (x=0; x<srcWidth-1; x++) {
 581         dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
 582         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
 583     }
 584     dst[2*srcWidth-1]= src[srcWidth-1];
 585 #else
 586     for (x=0; x<srcWidth; x++) {
 587         dst[2*x+0]=
 588         dst[2*x+1]= src[x];
 589     }
 590 #endif
 591 }
 592
 593 /**
 594  * Height should be a multiple of 2 and width should be a multiple of 16.
 595  * (If this is a problem for anyone then tell me, and I will fix it.)
 596  * Chrominance data is only taken from every second line, others are ignored.
 597  * FIXME: Write HQ version.
 598  */
 599 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
 600                                 uint8_t *udst, uint8_t *vdst,
 601                                 long width, long height,
 602                                 long lumStride, long chromStride,
 603                                 long srcStride)
 604 {
 605     long y;
 606     const int chromWidth = width >> 1;
 607     for (y=0; y<height; y+=2) {
 608         long i;
 609         for (i=0; i<chromWidth; i++) {
 610             udst[i]     = src[4*i+0];
 611             ydst[2*i+0] = src[4*i+1];
 612             vdst[i]     = src[4*i+2];
 613             ydst[2*i+1] = src[4*i+3];
 614         }
 615         ydst += lumStride;
 616         src  += srcStride;
 617
 618         for (i=0; i<chromWidth; i++) {
 619             ydst[2*i+0] = src[4*i+1];
 620             ydst[2*i+1] = src[4*i+3];
 621         }
 622         udst += chromStride;
 623         vdst += chromStride;
 624         ydst += lumStride;
 625         src  += srcStride;
 626     }
 627 }
 628
 629 /**
 630  * Height should be a multiple of 2 and width should be a multiple of 2.
 631  * (If this is a problem for anyone then tell me, and I will fix it.)
 632  * Chrominance data is only taken from every second line,
 633  * others are ignored in the C version.
 634  * FIXME: Write HQ version.
 635  */
 636 static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
 637                                  uint8_t *udst, uint8_t *vdst,
 638                                  long width, long height,
 639                                  long lumStride, long chromStride,
 640                                  long srcStride)
 641 {
 642     long y;
 643     const int chromWidth = width >> 1;
 644     y=0;
 645     for (; y<height; y+=2) {
 646         long i;
 647         for (i=0; i<chromWidth; i++) {
 648             unsigned int b = src[6*i+0];
 649             unsigned int g = src[6*i+1];
 650             unsigned int r = src[6*i+2];
 651
 652             unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
 653             unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
 654             unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
 655
 656             udst[i]     = U;
 657             vdst[i]     = V;
 658             ydst[2*i]   = Y;
 659
 660             b = src[6*i+3];
 661             g = src[6*i+4];
 662             r = src[6*i+5];
 663
 664             Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
 665             ydst[2*i+1]     = Y;
 666         }
 667         ydst += lumStride;
 668         src  += srcStride;
 669
 670         if(y+1 == height)
 671             break;
 672
 673         for (i=0; i<chromWidth; i++) {
 674             unsigned int b = src[6*i+0];
 675             unsigned int g = src[6*i+1];
 676             unsigned int r = src[6*i+2];
 677
 678             unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
 679
 680             ydst[2*i]     = Y;
 681
 682             b = src[6*i+3];
 683             g = src[6*i+4];
 684             r = src[6*i+5];
 685
 686             Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
 687             ydst[2*i+1]     = Y;
 688         }
 689         udst += chromStride;
 690         vdst += chromStride;
 691         ydst += lumStride;
 692         src  += srcStride;
 693     }
 694 }
 695
 696 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
 697                               uint8_t *dest, long width,
 698                               long height, long src1Stride,
 699                               long src2Stride, long dstStride)
 700 {
 701     long h;
 702
 703     for (h=0; h < height; h++) {
 704         long w;
 705         for (w=0; w < width; w++) {
 706             dest[2*w+0] = src1[w];
 707             dest[2*w+1] = src2[w];
 708         }
 709         dest += dstStride;
 710         src1 += src1Stride;
 711         src2 += src2Stride;
 712     }
 713 }
 714
 715 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
 716                                  uint8_t *dst1, uint8_t *dst2,
 717                                  long width, long height,
 718                                  long srcStride1, long srcStride2,
 719                                  long dstStride1, long dstStride2)
 720 {
 721     int y;
 722     long x,w,h;
 723     w=width/2; h=height/2;
 724     for (y=0;y<h;y++) {
 725         const uint8_t* s1=src1+srcStride1*(y>>1);
 726         uint8_t* d=dst1+dstStride1*y;
 727         x=0;
 728         for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
 729     }
 730     for (y=0;y<h;y++) {
 731         const uint8_t* s2=src2+srcStride2*(y>>1);
 732         uint8_t* d=dst2+dstStride2*y;
 733         x=0;
 734         for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
 735     }
 736 }
 737
 738 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
 739                                   const uint8_t *src3, uint8_t *dst,
 740                                   long width, long height,
 741                                   long srcStride1, long srcStride2,
 742                                   long srcStride3, long dstStride)
 743 {
 744     int x;
 745     long y,w,h;
 746     w=width/2; h=height;
 747     for (y=0;y<h;y++) {
 748         const uint8_t* yp=src1+srcStride1*y;
 749         const uint8_t* up=src2+srcStride2*(y>>2);
 750         const uint8_t* vp=src3+srcStride3*(y>>2);
 751         uint8_t* d=dst+dstStride*y;
 752         x=0;
 753         for (; x<w; x++) {
 754             const long x2 = x<<2;
 755             d[8*x+0] = yp[x2];
 756             d[8*x+1] = up[x];
 757             d[8*x+2] = yp[x2+1];
 758             d[8*x+3] = vp[x];
 759             d[8*x+4] = yp[x2+2];
 760             d[8*x+5] = up[x];
 761             d[8*x+6] = yp[x2+3];
 762             d[8*x+7] = vp[x];
 763         }
 764     }
 765 }
 766
 767 static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
 768 {
 769     dst +=   count;
 770     src += 2*count;
 771     count= - count;
 772
 773     while(count<0) {
 774         dst[count]= src[2*count];
 775         count++;
 776     }
 777 }
 778
 779 static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
 780                             int count)
 781 {
 782     dst0+=   count;
 783     dst1+=   count;
 784     src += 4*count;
 785     count= - count;
 786     while(count<0) {
 787         dst0[count]= src[4*count+0];
 788         dst1[count]= src[4*count+2];
 789         count++;
 790     }
 791 }
 792
 793 static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
 794                                uint8_t *dst0, uint8_t *dst1, int count)
 795 {
 796     dst0 +=   count;
 797     dst1 +=   count;
 798     src0 += 4*count;
 799     src1 += 4*count;
 800     count= - count;
 801     while(count<0) {
 802         dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
 803         dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
 804         count++;
 805     }
 806 }
 807
 808 static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
 809                            int count)
 810 {
 811     dst0+=   count;
 812     dst1+=   count;
 813     src += 4*count;
 814     count= - count;
 815     src++;
 816     while(count<0) {
 817         dst0[count]= src[4*count+0];
 818         dst1[count]= src[4*count+2];
 819         count++;
 820     }
 821 }
 822
 823 static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
 824                               uint8_t *dst0, uint8_t *dst1, int count)
 825 {
 826     dst0 +=   count;
 827     dst1 +=   count;
 828     src0 += 4*count;
 829     src1 += 4*count;
 830     count= - count;
 831     src0++;
 832     src1++;
 833     while(count<0) {
 834         dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
 835         dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
 836         count++;
 837     }
 838 }
 839
 840 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 841                            const uint8_t *src, long width, long height,
 842                            long lumStride, long chromStride, long srcStride)
 843 {
 844     long y;
 845     const long chromWidth= -((-width)>>1);
 846
 847     for (y=0; y<height; y++) {
 848         extract_even_c(src, ydst, width);
 849         if(y&1) {
 850             extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
 851             udst+= chromStride;
 852             vdst+= chromStride;
 853         }
 854
 855         src += srcStride;
 856         ydst+= lumStride;
 857     }
 858 }
 859
 860 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 861                            const uint8_t *src, long width, long height,
 862                            long lumStride, long chromStride, long srcStride)
 863 {
 864     long y;
 865     const long chromWidth= -((-width)>>1);
 866
 867     for (y=0; y<height; y++) {
 868         extract_even_c(src, ydst, width);
 869         extract_odd2_c(src, udst, vdst, chromWidth);
 870
 871         src += srcStride;
 872         ydst+= lumStride;
 873         udst+= chromStride;
 874         vdst+= chromStride;
 875     }
 876 }
 877
 878 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 879                            const uint8_t *src, long width, long height,
 880                            long lumStride, long chromStride, long srcStride)
 881 {
 882     long y;
 883     const long chromWidth= -((-width)>>1);
 884
 885     for (y=0; y<height; y++) {
 886         extract_even_c(src + 1, ydst, width);
 887         if(y&1) {
 888             extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
 889             udst+= chromStride;
 890             vdst+= chromStride;
 891         }
 892
 893         src += srcStride;
 894         ydst+= lumStride;
 895     }
 896 }
 897
 898 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 899                            const uint8_t *src, long width, long height,
 900                            long lumStride, long chromStride, long srcStride)
 901 {
 902     long y;
 903     const long chromWidth= -((-width)>>1);
 904
 905     for (y=0; y<height; y++) {
 906         extract_even_c(src + 1, ydst, width);
 907         extract_even2_c(src, udst, vdst, chromWidth);
 908
 909         src += srcStride;
 910         ydst+= lumStride;
 911         udst+= chromStride;
 912         vdst+= chromStride;
 913     }
 914 }
 915
 916 static inline void rgb2rgb_init_c(void)
 917 {
 918     rgb15to16          = rgb15to16_c;
 919     rgb15tobgr24       = rgb15tobgr24_c;
 920     rgb15to32          = rgb15to32_c;
 921     rgb16tobgr24       = rgb16tobgr24_c;
 922     rgb16to32          = rgb16to32_c;
 923     rgb16to15          = rgb16to15_c;
 924     rgb24tobgr16       = rgb24tobgr16_c;
 925     rgb24tobgr15       = rgb24tobgr15_c;
 926     rgb24tobgr32       = rgb24tobgr32_c;
 927     rgb32to16          = rgb32to16_c;
 928     rgb32to15          = rgb32to15_c;
 929     rgb32tobgr24       = rgb32tobgr24_c;
 930     rgb24to15          = rgb24to15_c;
 931     rgb24to16          = rgb24to16_c;
 932     rgb24tobgr24       = rgb24tobgr24_c;
 933     shuffle_bytes_2103 = shuffle_bytes_2103_c;
 934     rgb32tobgr16       = rgb32tobgr16_c;
 935     rgb32tobgr15       = rgb32tobgr15_c;
 936     yv12toyuy2         = yv12toyuy2_c;
 937     yv12touyvy         = yv12touyvy_c;
 938     yuv422ptoyuy2      = yuv422ptoyuy2_c;
 939     yuv422ptouyvy      = yuv422ptouyvy_c;
 940     yuy2toyv12         = yuy2toyv12_c;
 941     planar2x           = planar2x_c;
 942     rgb24toyv12        = rgb24toyv12_c;
 943     interleaveBytes    = interleaveBytes_c;
 944     vu9_to_vu12        = vu9_to_vu12_c;
 945     yvu9_to_yuy2       = yvu9_to_yuy2_c;
 946
 947     uyvytoyuv420       = uyvytoyuv420_c;
 948     uyvytoyuv422       = uyvytoyuv422_c;
 949     yuyvtoyuv420       = yuyvtoyuv420_c;
 950     yuyvtoyuv422       = yuyvtoyuv422_c;
 951 }