git.sesse.net Git - ffmpeg/blob - libswscale/rgb2rgb_template.c

   1 /*
   2  * software RGB to RGB converter
   3  * pluralize by software PAL8 to RGB converter
   4  *              software YUV to YUV converter
   5  *              software YUV to RGB converter
   6  * Written by Nick Kurshev.
   7  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
   8  * lot of big-endian byte order fixes by Alex Beregszaszi
   9  *
  10  * This file is part of FFmpeg.
  11  *
  12  * FFmpeg is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * FFmpeg is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with FFmpeg; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 #include <stddef.h>
  28
  29 #include "libavutil/attributes.h"
  30
  31 static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
  32                                   int src_size)
  33 {
  34     uint8_t *dest      = dst;
  35     const uint8_t *s   = src;
  36     const uint8_t *end = s + src_size;
  37
  38     while (s < end) {
  39 #if HAVE_BIGENDIAN
  40         /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
  41         *dest++  = 255;
  42         *dest++  = s[2];
  43         *dest++  = s[1];
  44         *dest++  = s[0];
  45         s       += 3;
  46 #else
  47         *dest++  = *s++;
  48         *dest++  = *s++;
  49         *dest++  = *s++;
  50         *dest++  = 255;
  51 #endif
  52     }
  53 }
  54
  55 static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
  56                                   int src_size)
  57 {
  58     uint8_t *dest      = dst;
  59     const uint8_t *s   = src;
  60     const uint8_t *end = s + src_size;
  61
  62     while (s < end) {
  63 #if HAVE_BIGENDIAN
  64         /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
  65         s++;
  66         dest[2]  = *s++;
  67         dest[1]  = *s++;
  68         dest[0]  = *s++;
  69         dest    += 3;
  70 #else
  71         *dest++  = *s++;
  72         *dest++  = *s++;
  73         *dest++  = *s++;
  74         s++;
  75 #endif
  76     }
  77 }
  78
  79 /*
  80  * original by Strepto/Astral
  81  * ported to gcc & bugfixed: A'rpi
  82  * MMXEXT, 3DNOW optimization by Nick Kurshev
  83  * 32-bit C version, and and&add trick by Michael Niedermayer
  84  */
  85 static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
  86 {
  87     register uint8_t *d         = dst;
  88     register const uint8_t *s   = src;
  89     register const uint8_t *end = s + src_size;
  90     const uint8_t *mm_end       = end - 3;
  91
  92     while (s < mm_end) {
  93         register unsigned x = *((const uint32_t *)s);
  94         *((uint32_t *)d)    = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
  95         d += 4;
  96         s += 4;
  97     }
  98     if (s < end) {
  99         register unsigned short x = *((const uint16_t *)s);
 100         *((uint16_t *)d)          = (x & 0x7FFF) + (x & 0x7FE0);
 101     }
 102 }
 103
 104 static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 105 {
 106     register uint8_t *d         = dst;
 107     register const uint8_t *s   = src;
 108     register const uint8_t *end = s + src_size;
 109     const uint8_t *mm_end       = end - 3;
 110
 111     while (s < mm_end) {
 112         register uint32_t x  = *((const uint32_t *)s);
 113         *((uint32_t *)d)     = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
 114         s                   += 4;
 115         d                   += 4;
 116     }
 117     if (s < end) {
 118         register uint16_t x = *((const uint16_t *)s);
 119         *((uint16_t *)d)    = ((x >> 1) & 0x7FE0) | (x & 0x001F);
 120     }
 121 }
 122
 123 static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 124 {
 125     uint16_t *d        = (uint16_t *)dst;
 126     const uint8_t *s   = src;
 127     const uint8_t *end = s + src_size;
 128
 129     while (s < end) {
 130         register int rgb  = *(const uint32_t *)s;
 131         s                += 4;
 132         *d++              = ((rgb & 0xFF)     >> 3) +
 133                             ((rgb & 0xFC00)   >> 5) +
 134                             ((rgb & 0xF80000) >> 8);
 135     }
 136 }
 137
 138 static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
 139                                   int src_size)
 140 {
 141     uint16_t *d        = (uint16_t *)dst;
 142     const uint8_t *s   = src;
 143     const uint8_t *end = s + src_size;
 144
 145     while (s < end) {
 146         register int rgb  = *(const uint32_t *)s;
 147         s                += 4;
 148         *d++              = ((rgb & 0xF8)     << 8) +
 149                             ((rgb & 0xFC00)   >> 5) +
 150                             ((rgb & 0xF80000) >> 19);
 151     }
 152 }
 153
 154 static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 155 {
 156     uint16_t *d        = (uint16_t *)dst;
 157     const uint8_t *s   = src;
 158     const uint8_t *end = s + src_size;
 159
 160     while (s < end) {
 161         register int rgb  = *(const uint32_t *)s;
 162         s                += 4;
 163         *d++              = ((rgb & 0xFF)     >> 3) +
 164                             ((rgb & 0xF800)   >> 6) +
 165                             ((rgb & 0xF80000) >> 9);
 166     }
 167 }
 168
 169 static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
 170                                   int src_size)
 171 {
 172     uint16_t *d        = (uint16_t *)dst;
 173     const uint8_t *s   = src;
 174     const uint8_t *end = s + src_size;
 175
 176     while (s < end) {
 177         register int rgb  = *(const uint32_t *)s;
 178         s                += 4;
 179         *d++              = ((rgb & 0xF8)     <<  7) +
 180                             ((rgb & 0xF800)   >>  6) +
 181                             ((rgb & 0xF80000) >> 19);
 182     }
 183 }
 184
 185 static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
 186                                   int src_size)
 187 {
 188     uint16_t *d        = (uint16_t *)dst;
 189     const uint8_t *s   = src;
 190     const uint8_t *end = s + src_size;
 191
 192     while (s < end) {
 193         const int b = *s++;
 194         const int g = *s++;
 195         const int r = *s++;
 196         *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
 197     }
 198 }
 199
 200 static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 201 {
 202     uint16_t *d        = (uint16_t *)dst;
 203     const uint8_t *s   = src;
 204     const uint8_t *end = s + src_size;
 205
 206     while (s < end) {
 207         const int r = *s++;
 208         const int g = *s++;
 209         const int b = *s++;
 210         *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
 211     }
 212 }
 213
 214 static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
 215                                   int src_size)
 216 {
 217     uint16_t *d        = (uint16_t *)dst;
 218     const uint8_t *s   = src;
 219     const uint8_t *end = s + src_size;
 220
 221     while (s < end) {
 222         const int b = *s++;
 223         const int g = *s++;
 224         const int r = *s++;
 225         *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
 226     }
 227 }
 228
 229 static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 230 {
 231     uint16_t *d        = (uint16_t *)dst;
 232     const uint8_t *s   = src;
 233     const uint8_t *end = s + src_size;
 234
 235     while (s < end) {
 236         const int r = *s++;
 237         const int g = *s++;
 238         const int b = *s++;
 239         *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
 240     }
 241 }
 242
 243 static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
 244                                   int src_size)
 245 {
 246     uint8_t *d          = dst;
 247     const uint16_t *s   = (const uint16_t *)src;
 248     const uint16_t *end = s + src_size / 2;
 249
 250     while (s < end) {
 251         register uint16_t bgr = *s++;
 252         *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
 253         *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
 254         *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
 255     }
 256 }
 257
 258 static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
 259                                   int src_size)
 260 {
 261     uint8_t *d          = (uint8_t *)dst;
 262     const uint16_t *s   = (const uint16_t *)src;
 263     const uint16_t *end = s + src_size / 2;
 264
 265     while (s < end) {
 266         register uint16_t bgr = *s++;
 267         *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
 268         *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
 269         *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
 270     }
 271 }
 272
 273 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 274 {
 275     uint8_t *d          = dst;
 276     const uint16_t *s   = (const uint16_t *)src;
 277     const uint16_t *end = s + src_size / 2;
 278
 279     while (s < end) {
 280         register uint16_t bgr = *s++;
 281 #if HAVE_BIGENDIAN
 282         *d++ = 255;
 283         *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
 284         *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
 285         *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
 286 #else
 287         *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
 288         *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
 289         *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
 290         *d++ = 255;
 291 #endif
 292     }
 293 }
 294
 295 static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 296 {
 297     uint8_t *d          = dst;
 298     const uint16_t *s   = (const uint16_t *)src;
 299     const uint16_t *end = s + src_size / 2;
 300
 301     while (s < end) {
 302         register uint16_t bgr = *s++;
 303 #if HAVE_BIGENDIAN
 304         *d++ = 255;
 305         *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
 306         *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
 307         *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
 308 #else
 309         *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
 310         *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
 311         *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
 312         *d++ = 255;
 313 #endif
 314     }
 315 }
 316
 317 static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
 318                                         int src_size)
 319 {
 320     int idx          = 15  - src_size;
 321     const uint8_t *s = src - idx;
 322     uint8_t *d       = dst - idx;
 323
 324     for (; idx < 15; idx += 4) {
 325         register int v        = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
 326         v                    &= 0xff00ff;
 327         *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
 328     }
 329 }
 330
 331 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 332 {
 333     unsigned i;
 334
 335     for (i = 0; i < src_size; i += 3) {
 336         register uint8_t x = src[i + 2];
 337         dst[i + 1]         = src[i + 1];
 338         dst[i + 2]         = src[i + 0];
 339         dst[i + 0]         = x;
 340     }
 341 }
 342
 343 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 344                                      const uint8_t *vsrc, uint8_t *dst,
 345                                      int width, int height,
 346                                      int lumStride, int chromStride,
 347                                      int dstStride, int vertLumPerChroma)
 348 {
 349     int y, i;
 350     const int chromWidth = width >> 1;
 351
 352     for (y = 0; y < height; y++) {
 353 #if HAVE_FAST_64BIT
 354         uint64_t *ldst = (uint64_t *)dst;
 355         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 356         for (i = 0; i < chromWidth; i += 2) {
 357             uint64_t k = yc[0] + (uc[0] << 8) +
 358                          (yc[1] << 16) + (unsigned)(vc[0] << 24);
 359             uint64_t l = yc[2] + (uc[1] << 8) +
 360                          (yc[3] << 16) + (unsigned)(vc[1] << 24);
 361             *ldst++ = k + (l << 32);
 362             yc     += 4;
 363             uc     += 2;
 364             vc     += 2;
 365         }
 366
 367 #else
 368         int *idst = (int32_t *)dst;
 369         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 370
 371         for (i = 0; i < chromWidth; i++) {
 372 #if HAVE_BIGENDIAN
 373             *idst++ = (yc[0] << 24) + (uc[0] << 16) +
 374                       (yc[1] <<  8) + (vc[0] <<  0);
 375 #else
 376             *idst++ = yc[0] + (uc[0] << 8) +
 377                       (yc[1] << 16) + (vc[0] << 24);
 378 #endif
 379             yc += 2;
 380             uc++;
 381             vc++;
 382         }
 383 #endif
 384         if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
 385             usrc += chromStride;
 386             vsrc += chromStride;
 387         }
 388         ysrc += lumStride;
 389         dst  += dstStride;
 390     }
 391 }
 392
 393 /**
 394  * Height should be a multiple of 2 and width should be a multiple of 16.
 395  * (If this is a problem for anyone then tell me, and I will fix it.)
 396  */
 397 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 398                                 const uint8_t *vsrc, uint8_t *dst,
 399                                 int width, int height, int lumStride,
 400                                 int chromStride, int dstStride)
 401 {
 402     //FIXME interpolate chroma
 403     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 404                       chromStride, dstStride, 2);
 405 }
 406
 407 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 408                                      const uint8_t *vsrc, uint8_t *dst,
 409                                      int width, int height,
 410                                      int lumStride, int chromStride,
 411                                      int dstStride, int vertLumPerChroma)
 412 {
 413     int y, i;
 414     const int chromWidth = width >> 1;
 415
 416     for (y = 0; y < height; y++) {
 417 #if HAVE_FAST_64BIT
 418         uint64_t *ldst = (uint64_t *)dst;
 419         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 420         for (i = 0; i < chromWidth; i += 2) {
 421             uint64_t k = uc[0] + (yc[0] << 8) +
 422                          (vc[0] << 16) + (unsigned)(yc[1] << 24);
 423             uint64_t l = uc[1] + (yc[2] << 8) +
 424                          (vc[1] << 16) + (unsigned)(yc[3] << 24);
 425             *ldst++ = k + (l << 32);
 426             yc     += 4;
 427             uc     += 2;
 428             vc     += 2;
 429         }
 430
 431 #else
 432         int *idst = (int32_t *)dst;
 433         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
 434
 435         for (i = 0; i < chromWidth; i++) {
 436 #if HAVE_BIGENDIAN
 437             *idst++ = (uc[0] << 24) + (yc[0] << 16) +
 438                       (vc[0] <<  8) + (yc[1] <<  0);
 439 #else
 440             *idst++ = uc[0] + (yc[0] << 8) +
 441                       (vc[0] << 16) + (yc[1] << 24);
 442 #endif
 443             yc += 2;
 444             uc++;
 445             vc++;
 446         }
 447 #endif
 448         if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
 449             usrc += chromStride;
 450             vsrc += chromStride;
 451         }
 452         ysrc += lumStride;
 453         dst  += dstStride;
 454     }
 455 }
 456
 457 /**
 458  * Height should be a multiple of 2 and width should be a multiple of 16
 459  * (If this is a problem for anyone then tell me, and I will fix it.)
 460  */
 461 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 462                                 const uint8_t *vsrc, uint8_t *dst,
 463                                 int width, int height, int lumStride,
 464                                 int chromStride, int dstStride)
 465 {
 466     //FIXME interpolate chroma
 467     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 468                       chromStride, dstStride, 2);
 469 }
 470
 471 /**
 472  * Width should be a multiple of 16.
 473  */
 474 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 475                                    const uint8_t *vsrc, uint8_t *dst,
 476                                    int width, int height, int lumStride,
 477                                    int chromStride, int dstStride)
 478 {
 479     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 480                       chromStride, dstStride, 1);
 481 }
 482
 483 /**
 484  * Width should be a multiple of 16.
 485  */
 486 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 487                                    const uint8_t *vsrc, uint8_t *dst,
 488                                    int width, int height, int lumStride,
 489                                    int chromStride, int dstStride)
 490 {
 491     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
 492                       chromStride, dstStride, 1);
 493 }
 494
 495 /**
 496  * Height should be a multiple of 2 and width should be a multiple of 16.
 497  * (If this is a problem for anyone then tell me, and I will fix it.)
 498  */
 499 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
 500                                 uint8_t *udst, uint8_t *vdst,
 501                                 int width, int height, int lumStride,
 502                                 int chromStride, int srcStride)
 503 {
 504     int y;
 505     const int chromWidth = width >> 1;
 506
 507     for (y = 0; y < height; y += 2) {
 508         int i;
 509         for (i = 0; i < chromWidth; i++) {
 510             ydst[2 * i + 0] = src[4 * i + 0];
 511             udst[i]         = src[4 * i + 1];
 512             ydst[2 * i + 1] = src[4 * i + 2];
 513             vdst[i]         = src[4 * i + 3];
 514         }
 515         ydst += lumStride;
 516         src  += srcStride;
 517
 518         for (i = 0; i < chromWidth; i++) {
 519             ydst[2 * i + 0] = src[4 * i + 0];
 520             ydst[2 * i + 1] = src[4 * i + 2];
 521         }
 522         udst += chromStride;
 523         vdst += chromStride;
 524         ydst += lumStride;
 525         src  += srcStride;
 526     }
 527 }
 528
 529 static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
 530                               int srcHeight, int srcStride, int dstStride)
 531 {
 532     int x, y;
 533
 534     dst[0] = src[0];
 535
 536     // first line
 537     for (x = 0; x < srcWidth - 1; x++) {
 538         dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
 539         dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
 540     }
 541     dst[2 * srcWidth - 1] = src[srcWidth - 1];
 542
 543     dst += dstStride;
 544
 545     for (y = 1; y < srcHeight; y++) {
 546         const int mmxSize = 1;
 547
 548         dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
 549         dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
 550
 551         for (x = mmxSize - 1; x < srcWidth - 1; x++) {
 552             dst[2 * x + 1]             = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
 553             dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
 554             dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride])     >> 2;
 555             dst[2 * x + 2]             = (src[x + 1] * 3 + src[x + srcStride])     >> 2;
 556         }
 557         dst[srcWidth * 2 - 1]             = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
 558         dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
 559
 560         dst += dstStride * 2;
 561         src += srcStride;
 562     }
 563
 564     // last line
 565     dst[0] = src[0];
 566
 567     for (x = 0; x < srcWidth - 1; x++) {
 568         dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
 569         dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
 570     }
 571     dst[2 * srcWidth - 1] = src[srcWidth - 1];
 572 }
 573
 574 /**
 575  * Height should be a multiple of 2 and width should be a multiple of 16.
 576  * (If this is a problem for anyone then tell me, and I will fix it.)
 577  * Chrominance data is only taken from every second line, others are ignored.
 578  * FIXME: Write HQ version.
 579  */
 580 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
 581                                 uint8_t *udst, uint8_t *vdst,
 582                                 int width, int height, int lumStride,
 583                                 int chromStride, int srcStride)
 584 {
 585     int y;
 586     const int chromWidth = width >> 1;
 587
 588     for (y = 0; y < height; y += 2) {
 589         int i;
 590         for (i = 0; i < chromWidth; i++) {
 591             udst[i]         = src[4 * i + 0];
 592             ydst[2 * i + 0] = src[4 * i + 1];
 593             vdst[i]         = src[4 * i + 2];
 594             ydst[2 * i + 1] = src[4 * i + 3];
 595         }
 596         ydst += lumStride;
 597         src  += srcStride;
 598
 599         for (i = 0; i < chromWidth; i++) {
 600             ydst[2 * i + 0] = src[4 * i + 1];
 601             ydst[2 * i + 1] = src[4 * i + 3];
 602         }
 603         udst += chromStride;
 604         vdst += chromStride;
 605         ydst += lumStride;
 606         src  += srcStride;
 607     }
 608 }
 609
 610 /**
 611  * Height should be a multiple of 2 and width should be a multiple of 2.
 612  * (If this is a problem for anyone then tell me, and I will fix it.)
 613  * Chrominance data is only taken from every second line,
 614  * others are ignored in the C version.
 615  * FIXME: Write HQ version.
 616  */
 617 void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
 618                    uint8_t *vdst, int width, int height, int lumStride,
 619                    int chromStride, int srcStride, int32_t *rgb2yuv)
 620 {
 621     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
 622     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
 623     int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
 624     int y;
 625     const int chromWidth = width >> 1;
 626
 627     for (y = 0; y < height; y += 2) {
 628         int i;
 629         for (i = 0; i < chromWidth; i++) {
 630             unsigned int b = src[6 * i + 0];
 631             unsigned int g = src[6 * i + 1];
 632             unsigned int r = src[6 * i + 2];
 633
 634             unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
 635             unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
 636             unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
 637
 638             udst[i]     = U;
 639             vdst[i]     = V;
 640             ydst[2 * i] = Y;
 641
 642             b = src[6 * i + 3];
 643             g = src[6 * i + 4];
 644             r = src[6 * i + 5];
 645
 646             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
 647             ydst[2 * i + 1] = Y;
 648         }
 649         ydst += lumStride;
 650         src  += srcStride;
 651
 652         if (y+1 == height)
 653             break;
 654
 655         for (i = 0; i < chromWidth; i++) {
 656             unsigned int b = src[6 * i + 0];
 657             unsigned int g = src[6 * i + 1];
 658             unsigned int r = src[6 * i + 2];
 659
 660             unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
 661
 662             ydst[2 * i] = Y;
 663
 664             b = src[6 * i + 3];
 665             g = src[6 * i + 4];
 666             r = src[6 * i + 5];
 667
 668             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
 669             ydst[2 * i + 1] = Y;
 670         }
 671         udst += chromStride;
 672         vdst += chromStride;
 673         ydst += lumStride;
 674         src  += srcStride;
 675     }
 676 }
 677
 678 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
 679                               uint8_t *dest, int width, int height,
 680                               int src1Stride, int src2Stride, int dstStride)
 681 {
 682     int h;
 683
 684     for (h = 0; h < height; h++) {
 685         int w;
 686         for (w = 0; w < width; w++) {
 687             dest[2 * w + 0] = src1[w];
 688             dest[2 * w + 1] = src2[w];
 689         }
 690         dest += dstStride;
 691         src1 += src1Stride;
 692         src2 += src2Stride;
 693     }
 694 }
 695
 696 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
 697                                  uint8_t *dst1, uint8_t *dst2,
 698                                  int width, int height,
 699                                  int srcStride1, int srcStride2,
 700                                  int dstStride1, int dstStride2)
 701 {
 702     int x, y;
 703     int w = width  / 2;
 704     int h = height / 2;
 705
 706     for (y = 0; y < h; y++) {
 707         const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
 708         uint8_t *d        = dst1 + dstStride1 *  y;
 709         for (x = 0; x < w; x++)
 710             d[2 * x] = d[2 * x + 1] = s1[x];
 711     }
 712     for (y = 0; y < h; y++) {
 713         const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
 714         uint8_t *d        = dst2 + dstStride2 *  y;
 715         for (x = 0; x < w; x++)
 716             d[2 * x] = d[2 * x + 1] = s2[x];
 717     }
 718 }
 719
 720 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
 721                                   const uint8_t *src3, uint8_t *dst,
 722                                   int width, int height,
 723                                   int srcStride1, int srcStride2,
 724                                   int srcStride3, int dstStride)
 725 {
 726     int x, y;
 727     int w = width / 2;
 728     int h = height;
 729
 730     for (y = 0; y < h; y++) {
 731         const uint8_t *yp = src1 + srcStride1 *  y;
 732         const uint8_t *up = src2 + srcStride2 * (y >> 2);
 733         const uint8_t *vp = src3 + srcStride3 * (y >> 2);
 734         uint8_t *d        = dst  + dstStride  *  y;
 735         for (x = 0; x < w; x++) {
 736             const int x2 = x << 2;
 737             d[8 * x + 0] = yp[x2];
 738             d[8 * x + 1] = up[x];
 739             d[8 * x + 2] = yp[x2 + 1];
 740             d[8 * x + 3] = vp[x];
 741             d[8 * x + 4] = yp[x2 + 2];
 742             d[8 * x + 5] = up[x];
 743             d[8 * x + 6] = yp[x2 + 3];
 744             d[8 * x + 7] = vp[x];
 745         }
 746     }
 747 }
 748
 749 static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
 750 {
 751     dst   +=  count;
 752     src   +=  count * 2;
 753     count  = -count;
 754     while (count < 0) {
 755         dst[count] = src[2 * count];
 756         count++;
 757     }
 758 }
 759
 760 static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
 761                             int count)
 762 {
 763     dst0  +=  count;
 764     dst1  +=  count;
 765     src   +=  count * 4;
 766     count  = -count;
 767     while (count < 0) {
 768         dst0[count] = src[4 * count + 0];
 769         dst1[count] = src[4 * count + 2];
 770         count++;
 771     }
 772 }
 773
 774 static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
 775                                uint8_t *dst0, uint8_t *dst1, int count)
 776 {
 777     dst0  +=  count;
 778     dst1  +=  count;
 779     src0  +=  count * 4;
 780     src1  +=  count * 4;
 781     count  = -count;
 782     while (count < 0) {
 783         dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
 784         dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
 785         count++;
 786     }
 787 }
 788
 789 static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
 790                            int count)
 791 {
 792     dst0  +=  count;
 793     dst1  +=  count;
 794     src   +=  count * 4;
 795     count  = -count;
 796     src++;
 797     while (count < 0) {
 798         dst0[count] = src[4 * count + 0];
 799         dst1[count] = src[4 * count + 2];
 800         count++;
 801     }
 802 }
 803
 804 static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
 805                               uint8_t *dst0, uint8_t *dst1, int count)
 806 {
 807     dst0  +=  count;
 808     dst1  +=  count;
 809     src0  +=  count * 4;
 810     src1  +=  count * 4;
 811     count  = -count;
 812     src0++;
 813     src1++;
 814     while (count < 0) {
 815         dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
 816         dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
 817         count++;
 818     }
 819 }
 820
 821 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 822                            const uint8_t *src, int width, int height,
 823                            int lumStride, int chromStride, int srcStride)
 824 {
 825     int y;
 826     const int chromWidth = FF_CEIL_RSHIFT(width, 1);
 827
 828     for (y = 0; y < height; y++) {
 829         extract_even_c(src, ydst, width);
 830         if (y & 1) {
 831             extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
 832             udst += chromStride;
 833             vdst += chromStride;
 834         }
 835
 836         src  += srcStride;
 837         ydst += lumStride;
 838     }
 839 }
 840
 841 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 842                            const uint8_t *src, int width, int height,
 843                            int lumStride, int chromStride, int srcStride)
 844 {
 845     int y;
 846     const int chromWidth = FF_CEIL_RSHIFT(width, 1);
 847
 848     for (y = 0; y < height; y++) {
 849         extract_even_c(src, ydst, width);
 850         extract_odd2_c(src, udst, vdst, chromWidth);
 851
 852         src  += srcStride;
 853         ydst += lumStride;
 854         udst += chromStride;
 855         vdst += chromStride;
 856     }
 857 }
 858
 859 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 860                            const uint8_t *src, int width, int height,
 861                            int lumStride, int chromStride, int srcStride)
 862 {
 863     int y;
 864     const int chromWidth = FF_CEIL_RSHIFT(width, 1);
 865
 866     for (y = 0; y < height; y++) {
 867         extract_even_c(src + 1, ydst, width);
 868         if (y & 1) {
 869             extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
 870             udst += chromStride;
 871             vdst += chromStride;
 872         }
 873
 874         src  += srcStride;
 875         ydst += lumStride;
 876     }
 877 }
 878
 879 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 880                            const uint8_t *src, int width, int height,
 881                            int lumStride, int chromStride, int srcStride)
 882 {
 883     int y;
 884     const int chromWidth = FF_CEIL_RSHIFT(width, 1);
 885
 886     for (y = 0; y < height; y++) {
 887         extract_even_c(src + 1, ydst, width);
 888         extract_even2_c(src, udst, vdst, chromWidth);
 889
 890         src  += srcStride;
 891         ydst += lumStride;
 892         udst += chromStride;
 893         vdst += chromStride;
 894     }
 895 }
 896
 897 static av_cold void rgb2rgb_init_c(void)
 898 {
 899     rgb15to16          = rgb15to16_c;
 900     rgb15tobgr24       = rgb15tobgr24_c;
 901     rgb15to32          = rgb15to32_c;
 902     rgb16tobgr24       = rgb16tobgr24_c;
 903     rgb16to32          = rgb16to32_c;
 904     rgb16to15          = rgb16to15_c;
 905     rgb24tobgr16       = rgb24tobgr16_c;
 906     rgb24tobgr15       = rgb24tobgr15_c;
 907     rgb24tobgr32       = rgb24tobgr32_c;
 908     rgb32to16          = rgb32to16_c;
 909     rgb32to15          = rgb32to15_c;
 910     rgb32tobgr24       = rgb32tobgr24_c;
 911     rgb24to15          = rgb24to15_c;
 912     rgb24to16          = rgb24to16_c;
 913     rgb24tobgr24       = rgb24tobgr24_c;
 914     shuffle_bytes_2103 = shuffle_bytes_2103_c;
 915     rgb32tobgr16       = rgb32tobgr16_c;
 916     rgb32tobgr15       = rgb32tobgr15_c;
 917     yv12toyuy2         = yv12toyuy2_c;
 918     yv12touyvy         = yv12touyvy_c;
 919     yuv422ptoyuy2      = yuv422ptoyuy2_c;
 920     yuv422ptouyvy      = yuv422ptouyvy_c;
 921     yuy2toyv12         = yuy2toyv12_c;
 922     planar2x           = planar2x_c;
 923     ff_rgb24toyv12     = ff_rgb24toyv12_c;
 924     interleaveBytes    = interleaveBytes_c;
 925     vu9_to_vu12        = vu9_to_vu12_c;
 926     yvu9_to_yuy2       = yvu9_to_yuy2_c;
 927
 928     uyvytoyuv420       = uyvytoyuv420_c;
 929     uyvytoyuv422       = uyvytoyuv422_c;
 930     yuyvtoyuv420       = yuyvtoyuv420_c;
 931     yuyvtoyuv422       = yuyvtoyuv422_c;
 932 }