2 * software YUV to RGB converter
4 * Copyright (C) 2009 Konstantin Shishkov
6 * MMX/MMX2 template stuff (needed for fast movntq support),
7 * 1,4,8bpp support and context / deglobalize stuff
8 * by Michael Niedermayer (michaelni@gmx.at)
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "swscale_internal.h"
37 #define DITHER1XBPP // only for MMX
39 extern const uint8_t dither_8x8_32[8][8];
40 extern const uint8_t dither_8x8_73[8][8];
41 extern const uint8_t dither_8x8_220[8][8];
43 #if HAVE_MMX && CONFIG_GPL
45 /* hope these constant values are cache line aligned */
46 DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
47 DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
48 DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
55 #define HAVE_AMD3DNOW 0
56 #define RENAME(a) a ## _MMX
57 #include "yuv2rgb_template.c"
63 #define RENAME(a) a ## _MMX2
64 #include "yuv2rgb_template.c"
66 #endif /* HAVE_MMX && CONFIG_GPL */
68 const int32_t ff_yuv2rgb_coeffs[8][4] = {
69 {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
70 {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
71 {104597, 132201, 25675, 53279}, /* unspecified */
72 {104597, 132201, 25675, 53279}, /* reserved */
73 {104448, 132798, 24759, 53109}, /* FCC */
74 {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
75 {104597, 132201, 25675, 53279}, /* SMPTE 170M */
76 {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
79 #define LOADCHROMA(i) \
82 r = (void *)c->table_rV[V]; \
83 g = (void *)(c->table_gU[U] + c->table_gV[V]); \
84 b = (void *)c->table_bU[U];
86 #define PUTRGB(dst,src,i,o) \
88 dst[2*i ] = r[Y] + g[Y] + b[Y]; \
90 dst[2*i+1] = r[Y] + g[Y] + b[Y];
92 #define PUTRGB24(dst,src,i) \
94 dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
96 dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
98 #define PUTBGR24(dst,src,i) \
100 dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
102 dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
104 #define YUV2RGBFUNC(func_name, dst_type) \
105 static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
106 int srcSliceH, uint8_t* dst[], int dstStride[]){\
109 if (c->srcFormat == PIX_FMT_YUV422P) {\
113 for (y=0; y<srcSliceH; y+=2) {\
114 dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY )*dstStride[0]);\
115 dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
116 dst_type av_unused *r, *b;\
118 uint8_t *py_1 = src[0] + y*srcStride[0];\
119 uint8_t *py_2 = py_1 + srcStride[0];\
120 uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
121 uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
122 unsigned int h_size = c->dstW>>3;\
127 #define ENDYUV2RGBLINE(dst_delta)\
136 int av_unused Y, U, V;\
138 #define ENDYUV2RGBFUNC()\
144 #define CLOSEYUV2RGBFUNC(dst_delta)\
145 ENDYUV2RGBLINE(dst_delta)\
148 YUV2RGBFUNC(yuv2rgb_c_32, uint32_t)
150 PUTRGB(dst_1,py_1,0,0);
151 PUTRGB(dst_2,py_2,0,1);
154 PUTRGB(dst_2,py_2,1,1);
155 PUTRGB(dst_1,py_1,1,0);
157 PUTRGB(dst_2,py_2,1,1);
158 PUTRGB(dst_1,py_1,1,0);
161 PUTRGB(dst_1,py_1,2,0);
162 PUTRGB(dst_2,py_2,2,1);
165 PUTRGB(dst_2,py_2,3,1);
166 PUTRGB(dst_1,py_1,3,0);
169 PUTRGB(dst_1,py_1,0,0);
170 PUTRGB(dst_2,py_2,0,1);
173 PUTRGB(dst_2,py_2,1,1);
174 PUTRGB(dst_1,py_1,1,0);
177 YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t)
179 PUTRGB24(dst_1,py_1,0);
180 PUTRGB24(dst_2,py_2,0);
183 PUTRGB24(dst_2,py_2,1);
184 PUTRGB24(dst_1,py_1,1);
187 PUTRGB24(dst_1,py_1,2);
188 PUTRGB24(dst_2,py_2,2);
191 PUTRGB24(dst_2,py_2,3);
192 PUTRGB24(dst_1,py_1,3);
195 PUTRGB24(dst_1,py_1,0);
196 PUTRGB24(dst_2,py_2,0);
199 PUTRGB24(dst_2,py_2,1);
200 PUTRGB24(dst_1,py_1,1);
203 // only trivial mods from yuv2rgb_c_24_rgb
204 YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t)
206 PUTBGR24(dst_1,py_1,0);
207 PUTBGR24(dst_2,py_2,0);
210 PUTBGR24(dst_2,py_2,1);
211 PUTBGR24(dst_1,py_1,1);
214 PUTBGR24(dst_1,py_1,2);
215 PUTBGR24(dst_2,py_2,2);
218 PUTBGR24(dst_2,py_2,3);
219 PUTBGR24(dst_1,py_1,3);
222 PUTBGR24(dst_1,py_1,0);
223 PUTBGR24(dst_2,py_2,0);
226 PUTBGR24(dst_2,py_2,1);
227 PUTBGR24(dst_1,py_1,1);
230 // This is exactly the same code as yuv2rgb_c_32 except for the types of
231 // r, g, b, dst_1, dst_2
232 YUV2RGBFUNC(yuv2rgb_c_16, uint16_t)
234 PUTRGB(dst_1,py_1,0,0);
235 PUTRGB(dst_2,py_2,0,1);
238 PUTRGB(dst_2,py_2,1,1);
239 PUTRGB(dst_1,py_1,1,0);
242 PUTRGB(dst_1,py_1,2,0);
243 PUTRGB(dst_2,py_2,2,1);
246 PUTRGB(dst_2,py_2,3,1);
247 PUTRGB(dst_1,py_1,3,0);
250 // This is exactly the same code as yuv2rgb_c_32 except for the types of
251 // r, g, b, dst_1, dst_2
252 YUV2RGBFUNC(yuv2rgb_c_8, uint8_t)
254 PUTRGB(dst_1,py_1,0,0);
255 PUTRGB(dst_2,py_2,0,1);
258 PUTRGB(dst_2,py_2,1,1);
259 PUTRGB(dst_1,py_1,1,0);
262 PUTRGB(dst_1,py_1,2,0);
263 PUTRGB(dst_2,py_2,2,1);
266 PUTRGB(dst_2,py_2,3,1);
267 PUTRGB(dst_1,py_1,3,0);
270 // r, g, b, dst_1, dst_2
271 YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t)
272 const uint8_t *d32 = dither_8x8_32[y&7];
273 const uint8_t *d64 = dither_8x8_73[y&7];
274 #define PUTRGB8(dst,src,i,o) \
276 dst[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
278 dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
281 PUTRGB8(dst_1,py_1,0,0);
282 PUTRGB8(dst_2,py_2,0,0+8);
285 PUTRGB8(dst_2,py_2,1,2+8);
286 PUTRGB8(dst_1,py_1,1,2);
289 PUTRGB8(dst_1,py_1,2,4);
290 PUTRGB8(dst_2,py_2,2,4+8);
293 PUTRGB8(dst_2,py_2,3,6+8);
294 PUTRGB8(dst_1,py_1,3,6);
298 // This is exactly the same code as yuv2rgb_c_32 except for the types of
299 // r, g, b, dst_1, dst_2
300 YUV2RGBFUNC(yuv2rgb_c_4, uint8_t)
302 #define PUTRGB4(dst,src,i) \
304 acc = r[Y] + g[Y] + b[Y]; \
306 acc |= (r[Y] + g[Y] + b[Y])<<4; \
310 PUTRGB4(dst_1,py_1,0);
311 PUTRGB4(dst_2,py_2,0);
314 PUTRGB4(dst_2,py_2,1);
315 PUTRGB4(dst_1,py_1,1);
318 PUTRGB4(dst_1,py_1,2);
319 PUTRGB4(dst_2,py_2,2);
322 PUTRGB4(dst_2,py_2,3);
323 PUTRGB4(dst_1,py_1,3);
326 YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t)
327 const uint8_t *d64 = dither_8x8_73[y&7];
328 const uint8_t *d128 = dither_8x8_220[y&7];
331 #define PUTRGB4D(dst,src,i,o) \
333 acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
335 acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4; \
339 PUTRGB4D(dst_1,py_1,0,0);
340 PUTRGB4D(dst_2,py_2,0,0+8);
343 PUTRGB4D(dst_2,py_2,1,2+8);
344 PUTRGB4D(dst_1,py_1,1,2);
347 PUTRGB4D(dst_1,py_1,2,4);
348 PUTRGB4D(dst_2,py_2,2,4+8);
351 PUTRGB4D(dst_2,py_2,3,6+8);
352 PUTRGB4D(dst_1,py_1,3,6);
355 // This is exactly the same code as yuv2rgb_c_32 except for the types of
356 // r, g, b, dst_1, dst_2
357 YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t)
359 PUTRGB(dst_1,py_1,0,0);
360 PUTRGB(dst_2,py_2,0,1);
363 PUTRGB(dst_2,py_2,1,1);
364 PUTRGB(dst_1,py_1,1,0);
367 PUTRGB(dst_1,py_1,2,0);
368 PUTRGB(dst_2,py_2,2,1);
371 PUTRGB(dst_2,py_2,3,1);
372 PUTRGB(dst_1,py_1,3,0);
375 YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t)
376 const uint8_t *d64 = dither_8x8_73[y&7];
377 const uint8_t *d128 = dither_8x8_220[y&7];
379 #define PUTRGB4DB(dst,src,i,o) \
381 dst[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
383 dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
386 PUTRGB4DB(dst_1,py_1,0,0);
387 PUTRGB4DB(dst_2,py_2,0,0+8);
390 PUTRGB4DB(dst_2,py_2,1,2+8);
391 PUTRGB4DB(dst_1,py_1,1,2);
394 PUTRGB4DB(dst_1,py_1,2,4);
395 PUTRGB4DB(dst_2,py_2,2,4+8);
398 PUTRGB4DB(dst_2,py_2,3,6+8);
399 PUTRGB4DB(dst_1,py_1,3,6);
402 YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t)
403 const uint8_t *d128 = dither_8x8_220[y&7];
404 char out_1 = 0, out_2 = 0;
405 g= c->table_gU[128] + c->table_gV[128];
407 #define PUTRGB1(out,src,i,o) \
409 out+= out + g[Y+d128[0+o]]; \
411 out+= out + g[Y+d128[1+o]];
413 PUTRGB1(out_1,py_1,0,0);
414 PUTRGB1(out_2,py_2,0,0+8);
416 PUTRGB1(out_2,py_2,1,2+8);
417 PUTRGB1(out_1,py_1,1,2);
419 PUTRGB1(out_1,py_1,2,4);
420 PUTRGB1(out_2,py_2,2,4+8);
422 PUTRGB1(out_2,py_2,3,6+8);
423 PUTRGB1(out_1,py_1,3,6);
429 SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c)
432 #if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL
433 if (c->flags & SWS_CPU_CAPS_MMX2) {
434 switch (c->dstFormat) {
435 case PIX_FMT_RGB32: return yuv420_rgb32_MMX2;
436 case PIX_FMT_BGR24: return yuv420_rgb24_MMX2;
437 case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
438 case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
441 if (c->flags & SWS_CPU_CAPS_MMX) {
442 switch (c->dstFormat) {
443 case PIX_FMT_RGB32: return yuv420_rgb32_MMX;
444 case PIX_FMT_BGR24: return yuv420_rgb24_MMX;
445 case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
446 case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
451 t = sws_yuv2rgb_init_vis(c);
454 t = sws_yuv2rgb_init_mlib(c);
456 #if HAVE_ALTIVEC && CONFIG_GPL
457 if (c->flags & SWS_CPU_CAPS_ALTIVEC)
458 t = sws_yuv2rgb_init_altivec(c);
462 if (c->flags & SWS_CPU_CAPS_BFIN)
463 t = sws_ff_bfin_yuv2rgb_get_func_ptr(c);
469 av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
471 switch (c->dstFormat) {
472 case PIX_FMT_BGR32_1:
473 case PIX_FMT_RGB32_1:
475 case PIX_FMT_RGB32: return yuv2rgb_c_32;
476 case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb;
477 case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr;
481 case PIX_FMT_BGR555: return yuv2rgb_c_16;
483 case PIX_FMT_BGR8: return yuv2rgb_c_8_ordered_dither;
485 case PIX_FMT_BGR4: return yuv2rgb_c_4_ordered_dither;
486 case PIX_FMT_RGB4_BYTE:
487 case PIX_FMT_BGR4_BYTE: return yuv2rgb_c_4b_ordered_dither;
488 case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither;
495 static void fill_table(uint8_t* table[256], const int elemsize, const int inc, uint8_t *y_table)
500 y_table -= elemsize * (inc >> 9);
502 for (i = 0; i < 256; i++) {
503 table[i] = y_table + elemsize * (cb >> 16);
508 static void fill_gv_table(int table[256], const int elemsize, const int inc)
512 int off = -(inc >> 9);
514 for (i = 0; i < 256; i++) {
515 table[i] = elemsize * (off + (cb >> 16));
520 av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
521 int brightness, int contrast, int saturation)
523 const int isRgb = c->dstFormat==PIX_FMT_RGB32
524 || c->dstFormat==PIX_FMT_RGB32_1
525 || c->dstFormat==PIX_FMT_BGR24
526 || c->dstFormat==PIX_FMT_RGB565
527 || c->dstFormat==PIX_FMT_RGB555
528 || c->dstFormat==PIX_FMT_RGB8
529 || c->dstFormat==PIX_FMT_RGB4
530 || c->dstFormat==PIX_FMT_RGB4_BYTE
531 || c->dstFormat==PIX_FMT_MONOBLACK;
532 const int bpp = fmt_depth(c->dstFormat);
536 int i, base, rbase, gbase, bbase, abase;
537 const int yoffs = fullRange ? 384 : 326;
539 int64_t crv = inv_table[0];
540 int64_t cbu = inv_table[1];
541 int64_t cgu = -inv_table[2];
542 int64_t cgv = -inv_table[3];
552 crv = (crv*224) / 255;
553 cbu = (cbu*224) / 255;
554 cgu = (cgu*224) / 255;
555 cgv = (cgv*224) / 255;
558 cy = (cy *contrast ) >> 16;
559 crv = (crv*contrast * saturation) >> 32;
560 cbu = (cbu*contrast * saturation) >> 32;
561 cgu = (cgu*contrast * saturation) >> 32;
562 cgv = (cgv*contrast * saturation) >> 32;
563 oy -= 256*brightness;
565 //scale coefficients by cy
566 crv = ((crv << 16) + 0x8000) / cy;
567 cbu = ((cbu << 16) + 0x8000) / cy;
568 cgu = ((cgu << 16) + 0x8000) / cy;
569 cgv = ((cgv << 16) + 0x8000) / cy;
571 av_free(c->yuvTable);
575 c->yuvTable = av_malloc(1024);
576 y_table = c->yuvTable;
577 yb = -(384<<16) - oy;
578 for (i = 0; i < 1024-110; i++) {
579 y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
582 fill_table(c->table_gU, 1, cgu, y_table + yoffs);
583 fill_gv_table(c->table_gV, 1, cgv);
587 rbase = isRgb ? 3 : 0;
589 bbase = isRgb ? 0 : 3;
590 c->yuvTable = av_malloc(1024*3);
591 y_table = c->yuvTable;
592 yb = -(384<<16) - oy;
593 for (i = 0; i < 1024-110; i++) {
594 int yval = av_clip_uint8((yb + 0x8000) >> 16);
595 y_table[i+110 ] = (yval >> 7) << rbase;
596 y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
597 y_table[i+110+2048] = (yval >> 7) << bbase;
600 fill_table(c->table_rV, 1, crv, y_table + yoffs);
601 fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
602 fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
603 fill_gv_table(c->table_gV, 1, cgv);
606 rbase = isRgb ? 5 : 0;
607 gbase = isRgb ? 2 : 3;
608 bbase = isRgb ? 0 : 6;
609 c->yuvTable = av_malloc(1024*3);
610 y_table = c->yuvTable;
611 yb = -(384<<16) - oy;
612 for (i = 0; i < 1024-38; i++) {
613 int yval = av_clip_uint8((yb + 0x8000) >> 16);
614 y_table[i+16 ] = ((yval + 18) / 36) << rbase;
615 y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
616 y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
619 fill_table(c->table_rV, 1, crv, y_table + yoffs);
620 fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
621 fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
622 fill_gv_table(c->table_gV, 1, cgv);
626 rbase = isRgb ? bpp - 5 : 0;
628 bbase = isRgb ? 0 : (bpp - 5);
629 c->yuvTable = av_malloc(1024*3*2);
630 y_table16 = c->yuvTable;
631 yb = -(384<<16) - oy;
632 for (i = 0; i < 1024; i++) {
633 uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
634 y_table16[i ] = (yval >> 3) << rbase;
635 y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
636 y_table16[i+2048] = (yval >> 3) << bbase;
639 fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
640 fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
641 fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
642 fill_gv_table(c->table_gV, 2, cgv);
645 c->yuvTable = av_malloc(1024);
646 y_table = c->yuvTable;
647 yb = -(384<<16) - oy;
648 for (i = 0; i < 1024; i++) {
649 y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
652 fill_table(c->table_rV, 1, crv, y_table + yoffs);
653 fill_table(c->table_gU, 1, cgu, y_table + yoffs);
654 fill_table(c->table_bU, 1, cbu, y_table + yoffs);
655 fill_gv_table(c->table_gV, 1, cgv);
658 base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
659 rbase = base + (isRgb ? 16 : 0);
661 bbase = base + (isRgb ? 0 : 16);
662 abase = (c->dstFormat == PIX_FMT_RGBA || c->dstFormat == PIX_FMT_BGRA) ? 24 : 0;
663 c->yuvTable = av_malloc(1024*3*4);
664 y_table32 = c->yuvTable;
665 yb = -(384<<16) - oy;
666 for (i = 0; i < 1024; i++) {
667 uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
668 y_table32[i ] = (yval << rbase) + (255 << abase);
669 y_table32[i+1024] = yval << gbase;
670 y_table32[i+2048] = yval << bbase;
673 fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
674 fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
675 fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
676 fill_gv_table(c->table_gV, 4, cgv);
680 av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);