git.sesse.net Git - ffmpeg/blob - postproc/yuv2rgb.c

   1 /*
   2  * yuv2rgb.c, Software YUV to RGB coverter
   3  *
   4  *  Copyright (C) 1999, Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
   5  *  All Rights Reserved.
   6  *
   7  *  Functions broken out from display_x11.c and several new modes
   8  *  added by Håkan Hjort <d95hjort@dtek.chalmers.se>
   9  *
  10  *  15 & 16 bpp support by Franck Sicard <Franck.Sicard@solsoft.fr>
  11  *
  12  *  This file is part of mpeg2dec, a free MPEG-2 video decoder
  13  *
  14  *  mpeg2dec is free software; you can redistribute it and/or modify
  15  *  it under the terms of the GNU General Public License as published by
  16  *  the Free Software Foundation; either version 2, or (at your option)
  17  *  any later version.
  18  *
  19  *  mpeg2dec is distributed in the hope that it will be useful,
  20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  *  GNU General Public License for more details.
  23  *
  24  *  You should have received a copy of the GNU General Public License
  25  *  along with GNU Make; see the file COPYING.  If not, write to
  26  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  27  *
  28  * MMX/MMX2 Template stuff from Michael Niedermayer (michaelni@gmx.at) (needed for fast movntq support)
  29  */
  30
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <inttypes.h>
  34
  35 #include "config.h"
  36 //#include "video_out.h"
  37 #include "rgb2rgb.h"
  38 #include "../cpudetect.h"
  39 #include "../mangle.h"
  40 #include "../mp_msg.h"
  41
  42 #ifdef HAVE_MLIB
  43 #include "yuv2rgb_mlib.c"
  44 #endif
  45
  46 #define DITHER1XBPP // only for mmx
  47
  48 #ifdef ARCH_X86
  49 #define CAN_COMPILE_X86_ASM
  50 #endif
  51
  52 #ifdef CAN_COMPILE_X86_ASM
  53
  54 /* hope these constant values are cache line aligned */
  55 uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080;
  56 uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010;
  57 uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff;
  58 uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f;
  59
  60 /* hope these constant values are cache line aligned */
  61 uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d;
  62 uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093;
  63 uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312;
  64 uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc;
  65
  66 /* hope these constant values are cache line aligned */
  67 uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8;
  68 uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
  69
  70 uint64_t __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
  71 uint64_t __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
  72 uint64_t __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
  73
  74 // the volatile is required because gcc otherwise optimizes some writes away not knowing that these
  75 // are read in the asm block
  76 volatile uint64_t __attribute__((aligned(8))) b5Dither;
  77 volatile uint64_t __attribute__((aligned(8))) g5Dither;
  78 volatile uint64_t __attribute__((aligned(8))) g6Dither;
  79 volatile uint64_t __attribute__((aligned(8))) r5Dither;
  80
  81 uint64_t __attribute__((aligned(8))) dither4[2]={
  82         0x0103010301030103LL,
  83         0x0200020002000200LL,};
  84
  85 uint64_t __attribute__((aligned(8))) dither8[2]={
  86         0x0602060206020602LL,
  87         0x0004000400040004LL,};
  88
  89 #undef HAVE_MMX
  90 #undef ARCH_X86
  91
  92 //MMX versions
  93 #undef RENAME
  94 #define HAVE_MMX
  95 #undef HAVE_MMX2
  96 #undef HAVE_3DNOW
  97 #define ARCH_X86
  98 #define RENAME(a) a ## _MMX
  99 #include "yuv2rgb_template.c"
 100
 101 //MMX2 versions
 102 #undef RENAME
 103 #define HAVE_MMX
 104 #define HAVE_MMX2
 105 #undef HAVE_3DNOW
 106 #define ARCH_X86
 107 #define RENAME(a) a ## _MMX2
 108 #include "yuv2rgb_template.c"
 109
 110 #endif // CAN_COMPILE_X86_ASM
 111
 112
 113 uint32_t matrix_coefficients = 6;
 114
 115 const int32_t Inverse_Table_6_9[8][4] = {
 116     {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
 117     {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
 118     {104597, 132201, 25675, 53279}, /* unspecified */
 119     {104597, 132201, 25675, 53279}, /* reserved */
 120     {104448, 132798, 24759, 53109}, /* FCC */
 121     {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
 122     {104597, 132201, 25675, 53279}, /* SMPTE 170M */
 123     {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
 124 };
 125
 126 static void yuv2rgb_c_init (int bpp, int mode);
 127
 128 yuv2rgb_fun yuv2rgb;
 129
 130 static void (* yuv2rgb_c_internal) (uint8_t *, uint8_t *,
 131                                     uint8_t *, uint8_t *,
 132                                     void *, void *, int);
 133
 134 static void yuv2rgb_c (void * dst, uint8_t * py,
 135                        uint8_t * pu, uint8_t * pv,
 136                        int h_size, int v_size,
 137                        int rgb_stride, int y_stride, int uv_stride)
 138 {
 139     v_size >>= 1;
 140
 141     while (v_size--) {
 142         yuv2rgb_c_internal (py, py + y_stride, pu, pv, dst, dst + rgb_stride,
 143                             h_size);
 144
 145         py += 2 * y_stride;
 146         pu += uv_stride;
 147         pv += uv_stride;
 148         dst += 2 * rgb_stride;
 149     }
 150 }
 151
 152 void yuv2rgb_init (int bpp, int mode)
 153 {
 154     yuv2rgb = NULL;
 155 #ifdef CAN_COMPILE_X86_ASM
 156     if(gCpuCaps.hasMMX2)
 157     {
 158         if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
 159                 yuv2rgb = yuv2rgb_init_MMX2 (bpp, mode);
 160                 if (yuv2rgb != NULL)
 161                         mp_msg(MSGT_SWS,MSGL_INFO,"Using MMX2 for colorspace transform\n");
 162                 else
 163                         mp_msg(MSGT_SWS,MSGL_WARN,"Cannot init MMX2 colorspace transform\n");
 164         }
 165     }
 166     else if(gCpuCaps.hasMMX)
 167     {
 168         if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
 169                 yuv2rgb = yuv2rgb_init_MMX (bpp, mode);
 170                 if (yuv2rgb != NULL)
 171                         mp_msg(MSGT_SWS,MSGL_INFO,"Using MMX for colorspace transform\n");
 172                 else
 173                         mp_msg(MSGT_SWS,MSGL_WARN,"Cannot init MMX colorspace transform\n");
 174         }
 175     }
 176 #endif
 177 #ifdef HAVE_MLIB
 178     if (yuv2rgb == NULL /*&& (config.flags & VO_MLIB_ENABLE)*/) {
 179         yuv2rgb = yuv2rgb_init_mlib (bpp, mode);
 180         if (yuv2rgb != NULL)
 181             mp_msg(MSGT_SWS,MSGL_INFO,"Using mlib for colorspace transform\n");
 182     }
 183 #endif
 184     if (yuv2rgb == NULL) {
 185         mp_msg(MSGT_SWS,MSGL_INFO,"No accelerated colorspace conversion found\n");
 186         yuv2rgb_c_init (bpp, mode);
 187         yuv2rgb = (yuv2rgb_fun)yuv2rgb_c;
 188     }
 189 }
 190
 191 void * table_rV[256];
 192 void * table_gU[256];
 193 int table_gV[256];
 194 void * table_bU[256];
 195
 196 #define RGB(i)                                  \
 197         U = pu[i];                              \
 198         V = pv[i];                              \
 199         r = table_rV[V];                        \
 200         g = table_gU[U] + table_gV[V];          \
 201         b = table_bU[U];
 202
 203 #define DST1(i)                                 \
 204         Y = py_1[2*i];                          \
 205         dst_1[2*i] = r[Y] + g[Y] + b[Y];        \
 206         Y = py_1[2*i+1];                        \
 207         dst_1[2*i+1] = r[Y] + g[Y] + b[Y];
 208
 209 #define DST2(i)                                 \
 210         Y = py_2[2*i];                          \
 211         dst_2[2*i] = r[Y] + g[Y] + b[Y];        \
 212         Y = py_2[2*i+1];                        \
 213         dst_2[2*i+1] = r[Y] + g[Y] + b[Y];
 214
 215 #define DST1RGB(i)                                                      \
 216         Y = py_1[2*i];                                                  \
 217         dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y];    \
 218         Y = py_1[2*i+1];                                                \
 219         dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y];
 220
 221 #define DST2RGB(i)                                                      \
 222         Y = py_2[2*i];                                                  \
 223         dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y];    \
 224         Y = py_2[2*i+1];                                                \
 225         dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y];
 226
 227 #define DST1BGR(i)                                                      \
 228         Y = py_1[2*i];                                                  \
 229         dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y];    \
 230         Y = py_1[2*i+1];                                                \
 231         dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y];
 232
 233 #define DST2BGR(i)                                                      \
 234         Y = py_2[2*i];                                                  \
 235         dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y];    \
 236         Y = py_2[2*i+1];                                                \
 237         dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y];
 238
 239 static void yuv2rgb_c_32 (uint8_t * py_1, uint8_t * py_2,
 240                           uint8_t * pu, uint8_t * pv,
 241                           void * _dst_1, void * _dst_2, int h_size)
 242 {
 243     int U, V, Y;
 244     uint32_t * r, * g, * b;
 245     uint32_t * dst_1, * dst_2;
 246
 247     h_size >>= 3;
 248     dst_1 = _dst_1;
 249     dst_2 = _dst_2;
 250
 251     while (h_size--) {
 252         RGB(0);
 253         DST1(0);
 254         DST2(0);
 255
 256         RGB(1);
 257         DST2(1);
 258         DST1(1);
 259
 260         RGB(2);
 261         DST1(2);
 262         DST2(2);
 263
 264         RGB(3);
 265         DST2(3);
 266         DST1(3);
 267
 268         pu += 4;
 269         pv += 4;
 270         py_1 += 8;
 271         py_2 += 8;
 272         dst_1 += 8;
 273         dst_2 += 8;
 274     }
 275 }
 276
 277 // This is very near from the yuv2rgb_c_32 code
 278 static void yuv2rgb_c_24_rgb (uint8_t * py_1, uint8_t * py_2,
 279                               uint8_t * pu, uint8_t * pv,
 280                               void * _dst_1, void * _dst_2, int h_size)
 281 {
 282     int U, V, Y;
 283     uint8_t * r, * g, * b;
 284     uint8_t * dst_1, * dst_2;
 285
 286     h_size >>= 3;
 287     dst_1 = _dst_1;
 288     dst_2 = _dst_2;
 289
 290     while (h_size--) {
 291         RGB(0);
 292         DST1RGB(0);
 293         DST2RGB(0);
 294
 295         RGB(1);
 296         DST2RGB(1);
 297         DST1RGB(1);
 298
 299         RGB(2);
 300         DST1RGB(2);
 301         DST2RGB(2);
 302
 303         RGB(3);
 304         DST2RGB(3);
 305         DST1RGB(3);
 306
 307         pu += 4;
 308         pv += 4;
 309         py_1 += 8;
 310         py_2 += 8;
 311         dst_1 += 24;
 312         dst_2 += 24;
 313     }
 314 }
 315
 316 // only trivial mods from yuv2rgb_c_24_rgb
 317 static void yuv2rgb_c_24_bgr (uint8_t * py_1, uint8_t * py_2,
 318                               uint8_t * pu, uint8_t * pv,
 319                               void * _dst_1, void * _dst_2, int h_size)
 320 {
 321     int U, V, Y;
 322     uint8_t * r, * g, * b;
 323     uint8_t * dst_1, * dst_2;
 324
 325     h_size >>= 3;
 326     dst_1 = _dst_1;
 327     dst_2 = _dst_2;
 328
 329     while (h_size--) {
 330         RGB(0);
 331         DST1BGR(0);
 332         DST2BGR(0);
 333
 334         RGB(1);
 335         DST2BGR(1);
 336         DST1BGR(1);
 337
 338         RGB(2);
 339         DST1BGR(2);
 340         DST2BGR(2);
 341
 342         RGB(3);
 343         DST2BGR(3);
 344         DST1BGR(3);
 345
 346         pu += 4;
 347         pv += 4;
 348         py_1 += 8;
 349         py_2 += 8;
 350         dst_1 += 24;
 351         dst_2 += 24;
 352     }
 353 }
 354
 355 // This is exactly the same code as yuv2rgb_c_32 except for the types of
 356 // r, g, b, dst_1, dst_2
 357 static void yuv2rgb_c_16 (uint8_t * py_1, uint8_t * py_2,
 358                           uint8_t * pu, uint8_t * pv,
 359                           void * _dst_1, void * _dst_2, int h_size)
 360 {
 361     int U, V, Y;
 362     uint16_t * r, * g, * b;
 363     uint16_t * dst_1, * dst_2;
 364
 365     h_size >>= 3;
 366     dst_1 = _dst_1;
 367     dst_2 = _dst_2;
 368
 369     while (h_size--) {
 370         RGB(0);
 371         DST1(0);
 372         DST2(0);
 373
 374         RGB(1);
 375         DST2(1);
 376         DST1(1);
 377
 378         RGB(2);
 379         DST1(2);
 380         DST2(2);
 381
 382         RGB(3);
 383         DST2(3);
 384         DST1(3);
 385
 386         pu += 4;
 387         pv += 4;
 388         py_1 += 8;
 389         py_2 += 8;
 390         dst_1 += 8;
 391         dst_2 += 8;
 392     }
 393 }
 394
 395 // This is exactly the same code as yuv2rgb_c_32 except for the types of
 396 // r, g, b, dst_1, dst_2
 397 static void yuv2rgb_c_8  (uint8_t * py_1, uint8_t * py_2,
 398                           uint8_t * pu, uint8_t * pv,
 399                           void * _dst_1, void * _dst_2, int h_size)
 400 {
 401     int U, V, Y;
 402     uint8_t * r, * g, * b;
 403     uint8_t * dst_1, * dst_2;
 404
 405     h_size >>= 3;
 406     dst_1 = _dst_1;
 407     dst_2 = _dst_2;
 408
 409     while (h_size--) {
 410         RGB(0);
 411         DST1(0);
 412         DST2(0);
 413
 414         RGB(1);
 415         DST2(1);
 416         DST1(1);
 417
 418         RGB(2);
 419         DST1(2);
 420         DST2(2);
 421
 422         RGB(3);
 423         DST2(3);
 424         DST1(3);
 425
 426         pu += 4;
 427         pv += 4;
 428         py_1 += 8;
 429         py_2 += 8;
 430         dst_1 += 8;
 431         dst_2 += 8;
 432     }
 433 }
 434
 435
 436 static int div_round (int dividend, int divisor)
 437 {
 438     if (dividend > 0)
 439         return (dividend + (divisor>>1)) / divisor;
 440     else
 441         return -((-dividend + (divisor>>1)) / divisor);
 442 }
 443
 444 static void yuv2rgb_c_init (int bpp, int mode)
 445 {
 446     int i;
 447     uint8_t table_Y[1024];
 448     uint32_t *table_32 = 0;
 449     uint16_t *table_16 = 0;
 450     uint8_t *table_8 = 0;
 451     uint8_t *table_332 = 0;
 452     int entry_size = 0;
 453     void *table_r = 0, *table_g = 0, *table_b = 0;
 454
 455     int crv = Inverse_Table_6_9[matrix_coefficients][0];
 456     int cbu = Inverse_Table_6_9[matrix_coefficients][1];
 457     int cgu = -Inverse_Table_6_9[matrix_coefficients][2];
 458     int cgv = -Inverse_Table_6_9[matrix_coefficients][3];
 459
 460     for (i = 0; i < 1024; i++) {
 461         int j;
 462
 463         j = (76309 * (i - 384 - 16) + 32768) >> 16;
 464         j = (j < 0) ? 0 : ((j > 255) ? 255 : j);
 465         table_Y[i] = j;
 466     }
 467
 468     switch (bpp) {
 469     case 32:
 470         yuv2rgb_c_internal = yuv2rgb_c_32;
 471
 472         table_32 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t));
 473
 474         entry_size = sizeof (uint32_t);
 475         table_r = table_32 + 197;
 476         table_b = table_32 + 197 + 685;
 477         table_g = table_32 + 197 + 2*682;
 478
 479         for (i = -197; i < 256+197; i++)
 480             ((uint32_t *)table_r)[i] = table_Y[i+384] << ((mode==MODE_RGB) ? 16 : 0);
 481         for (i = -132; i < 256+132; i++)
 482             ((uint32_t *)table_g)[i] = table_Y[i+384] << 8;
 483         for (i = -232; i < 256+232; i++)
 484             ((uint32_t *)table_b)[i] = table_Y[i+384] << ((mode==MODE_RGB) ? 0 : 16);
 485         break;
 486
 487     case 24:
 488 //      yuv2rgb_c_internal = (mode==MODE_RGB) ? yuv2rgb_c_24_rgb : yuv2rgb_c_24_bgr;
 489         yuv2rgb_c_internal = (mode!=MODE_RGB) ? yuv2rgb_c_24_rgb : yuv2rgb_c_24_bgr;
 490
 491         table_8 = malloc ((256 + 2*232) * sizeof (uint8_t));
 492
 493         entry_size = sizeof (uint8_t);
 494         table_r = table_g = table_b = table_8 + 232;
 495
 496         for (i = -232; i < 256+232; i++)
 497             ((uint8_t * )table_b)[i] = table_Y[i+384];
 498         break;
 499
 500     case 15:
 501     case 16:
 502         yuv2rgb_c_internal = yuv2rgb_c_16;
 503
 504         table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t));
 505
 506         entry_size = sizeof (uint16_t);
 507         table_r = table_16 + 197;
 508         table_b = table_16 + 197 + 685;
 509         table_g = table_16 + 197 + 2*682;
 510
 511         for (i = -197; i < 256+197; i++) {
 512             int j = table_Y[i+384] >> 3;
 513
 514             if (mode == MODE_RGB)
 515                 j <<= ((bpp==16) ? 11 : 10);
 516
 517             ((uint16_t *)table_r)[i] = j;
 518         }
 519         for (i = -132; i < 256+132; i++) {
 520             int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3);
 521
 522             ((uint16_t *)table_g)[i] = j << 5;
 523         }
 524         for (i = -232; i < 256+232; i++) {
 525             int j = table_Y[i+384] >> 3;
 526
 527             if (mode == MODE_BGR)
 528                 j <<= ((bpp==16) ? 11 : 10);
 529
 530             ((uint16_t *)table_b)[i] = j;
 531         }
 532         break;
 533
 534     case 8:
 535         yuv2rgb_c_internal = yuv2rgb_c_8;
 536
 537         table_332 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t));
 538
 539         entry_size = sizeof (uint8_t);
 540         table_r = table_332 + 197;
 541         table_b = table_332 + 197 + 685;
 542         table_g = table_332 + 197 + 2*682;
 543
 544         for (i = -197; i < 256+197; i++) {
 545             int j = table_Y[i+384] >> 5;
 546
 547             if (mode == MODE_RGB)
 548                 j <<= 5;
 549
 550             ((uint8_t *)table_r)[i] = j;
 551         }
 552         for (i = -132; i < 256+132; i++) {
 553             int j = table_Y[i+384] >> 5;
 554
 555             if (mode == MODE_BGR)
 556                 j <<= 1;
 557
 558             ((uint8_t *)table_g)[i] = j << 2;
 559         }
 560         for (i = -232; i < 256+232; i++) {
 561             int j = table_Y[i+384] >> 6;
 562
 563             if (mode == MODE_BGR)
 564                 j <<= 6;
 565
 566             ((uint8_t *)table_b)[i] = j;
 567         }
 568         break;
 569
 570     default:
 571         mp_msg(MSGT_SWS,MSGL_ERR,"%ibpp not supported by yuv2rgb\n", bpp);
 572         //exit (1);
 573     }
 574
 575     for (i = 0; i < 256; i++) {
 576         table_rV[i] = table_r + entry_size * div_round (crv * (i-128), 76309);
 577         table_gU[i] = table_g + entry_size * div_round (cgu * (i-128), 76309);
 578         table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
 579         table_bU[i] = table_b + entry_size * div_round (cbu * (i-128), 76309);
 580     }
 581 }