2 * software YUV to RGB converter
4 * Copyright (C) 2009 Konstantin Shishkov
6 * MMX/MMX2 template stuff (needed for fast movntq support),
7 * 1,4,8bpp support and context / deglobalize stuff
8 * by Michael Niedermayer (michaelni@gmx.at)
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "swscale_internal.h"
36 #include "libavutil/x86_cpu.h"
38 #define DITHER1XBPP // only for MMX
40 extern const uint8_t dither_8x8_32[8][8];
41 extern const uint8_t dither_8x8_73[8][8];
42 extern const uint8_t dither_8x8_220[8][8];
44 #if HAVE_MMX && CONFIG_GPL
46 /* hope these constant values are cache line aligned */
47 DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
48 DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
49 DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
56 #define HAVE_AMD3DNOW 0
57 #define RENAME(a) a ## _MMX
58 #include "yuv2rgb_template.c"
64 #define RENAME(a) a ## _MMX2
65 #include "yuv2rgb_template.c"
67 #endif /* HAVE_MMX && CONFIG_GPL */
69 const int32_t ff_yuv2rgb_coeffs[8][4] = {
70 {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
71 {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
72 {104597, 132201, 25675, 53279}, /* unspecified */
73 {104597, 132201, 25675, 53279}, /* reserved */
74 {104448, 132798, 24759, 53109}, /* FCC */
75 {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
76 {104597, 132201, 25675, 53279}, /* SMPTE 170M */
77 {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
80 #define LOADCHROMA(i) \
83 r = (void *)c->table_rV[V]; \
84 g = (void *)(c->table_gU[U] + c->table_gV[V]); \
85 b = (void *)c->table_bU[U];
87 #define PUTRGB(dst,src,i,o) \
89 dst[2*i ] = r[Y] + g[Y] + b[Y]; \
91 dst[2*i+1] = r[Y] + g[Y] + b[Y];
93 #define PUTRGB24(dst,src,i) \
95 dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
97 dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
99 #define PUTBGR24(dst,src,i) \
101 dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
103 dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
105 #define YUV2RGBFUNC(func_name, dst_type, alpha) \
106 static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
107 int srcSliceH, uint8_t* dst[], int dstStride[]){\
110 if (c->srcFormat == PIX_FMT_YUV422P) {\
114 for (y=0; y<srcSliceH; y+=2) {\
115 dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY )*dstStride[0]);\
116 dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
117 dst_type av_unused *r, *b;\
119 uint8_t *py_1 = src[0] + y*srcStride[0];\
120 uint8_t *py_2 = py_1 + srcStride[0];\
121 uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
122 uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
123 unsigned int h_size = c->dstW>>3;\
128 #define ENDYUV2RGBLINE(dst_delta)\
137 int av_unused Y, U, V;\
139 #define ENDYUV2RGBFUNC()\
145 #define CLOSEYUV2RGBFUNC(dst_delta)\
146 ENDYUV2RGBLINE(dst_delta)\
149 YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
151 PUTRGB(dst_1,py_1,0,0);
152 PUTRGB(dst_2,py_2,0,1);
155 PUTRGB(dst_2,py_2,1,1);
156 PUTRGB(dst_1,py_1,1,0);
158 PUTRGB(dst_2,py_2,1,1);
159 PUTRGB(dst_1,py_1,1,0);
162 PUTRGB(dst_1,py_1,2,0);
163 PUTRGB(dst_2,py_2,2,1);
166 PUTRGB(dst_2,py_2,3,1);
167 PUTRGB(dst_1,py_1,3,0);
170 PUTRGB(dst_1,py_1,0,0);
171 PUTRGB(dst_2,py_2,0,1);
174 PUTRGB(dst_2,py_2,1,1);
175 PUTRGB(dst_1,py_1,1,0);
178 YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
180 PUTRGB24(dst_1,py_1,0);
181 PUTRGB24(dst_2,py_2,0);
184 PUTRGB24(dst_2,py_2,1);
185 PUTRGB24(dst_1,py_1,1);
188 PUTRGB24(dst_1,py_1,2);
189 PUTRGB24(dst_2,py_2,2);
192 PUTRGB24(dst_2,py_2,3);
193 PUTRGB24(dst_1,py_1,3);
196 PUTRGB24(dst_1,py_1,0);
197 PUTRGB24(dst_2,py_2,0);
200 PUTRGB24(dst_2,py_2,1);
201 PUTRGB24(dst_1,py_1,1);
204 // only trivial mods from yuv2rgb_c_24_rgb
205 YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
207 PUTBGR24(dst_1,py_1,0);
208 PUTBGR24(dst_2,py_2,0);
211 PUTBGR24(dst_2,py_2,1);
212 PUTBGR24(dst_1,py_1,1);
215 PUTBGR24(dst_1,py_1,2);
216 PUTBGR24(dst_2,py_2,2);
219 PUTBGR24(dst_2,py_2,3);
220 PUTBGR24(dst_1,py_1,3);
223 PUTBGR24(dst_1,py_1,0);
224 PUTBGR24(dst_2,py_2,0);
227 PUTBGR24(dst_2,py_2,1);
228 PUTBGR24(dst_1,py_1,1);
231 // This is exactly the same code as yuv2rgb_c_32 except for the types of
232 // r, g, b, dst_1, dst_2
233 YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
235 PUTRGB(dst_1,py_1,0,0);
236 PUTRGB(dst_2,py_2,0,1);
239 PUTRGB(dst_2,py_2,1,1);
240 PUTRGB(dst_1,py_1,1,0);
243 PUTRGB(dst_1,py_1,2,0);
244 PUTRGB(dst_2,py_2,2,1);
247 PUTRGB(dst_2,py_2,3,1);
248 PUTRGB(dst_1,py_1,3,0);
251 // This is exactly the same code as yuv2rgb_c_32 except for the types of
252 // r, g, b, dst_1, dst_2
253 YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0)
255 PUTRGB(dst_1,py_1,0,0);
256 PUTRGB(dst_2,py_2,0,1);
259 PUTRGB(dst_2,py_2,1,1);
260 PUTRGB(dst_1,py_1,1,0);
263 PUTRGB(dst_1,py_1,2,0);
264 PUTRGB(dst_2,py_2,2,1);
267 PUTRGB(dst_2,py_2,3,1);
268 PUTRGB(dst_1,py_1,3,0);
271 // r, g, b, dst_1, dst_2
272 YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
273 const uint8_t *d32 = dither_8x8_32[y&7];
274 const uint8_t *d64 = dither_8x8_73[y&7];
275 #define PUTRGB8(dst,src,i,o) \
277 dst[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
279 dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
282 PUTRGB8(dst_1,py_1,0,0);
283 PUTRGB8(dst_2,py_2,0,0+8);
286 PUTRGB8(dst_2,py_2,1,2+8);
287 PUTRGB8(dst_1,py_1,1,2);
290 PUTRGB8(dst_1,py_1,2,4);
291 PUTRGB8(dst_2,py_2,2,4+8);
294 PUTRGB8(dst_2,py_2,3,6+8);
295 PUTRGB8(dst_1,py_1,3,6);
299 // This is exactly the same code as yuv2rgb_c_32 except for the types of
300 // r, g, b, dst_1, dst_2
301 YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0)
303 #define PUTRGB4(dst,src,i) \
305 acc = r[Y] + g[Y] + b[Y]; \
307 acc |= (r[Y] + g[Y] + b[Y])<<4; \
311 PUTRGB4(dst_1,py_1,0);
312 PUTRGB4(dst_2,py_2,0);
315 PUTRGB4(dst_2,py_2,1);
316 PUTRGB4(dst_1,py_1,1);
319 PUTRGB4(dst_1,py_1,2);
320 PUTRGB4(dst_2,py_2,2);
323 PUTRGB4(dst_2,py_2,3);
324 PUTRGB4(dst_1,py_1,3);
327 YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
328 const uint8_t *d64 = dither_8x8_73[y&7];
329 const uint8_t *d128 = dither_8x8_220[y&7];
332 #define PUTRGB4D(dst,src,i,o) \
334 acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
336 acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4; \
340 PUTRGB4D(dst_1,py_1,0,0);
341 PUTRGB4D(dst_2,py_2,0,0+8);
344 PUTRGB4D(dst_2,py_2,1,2+8);
345 PUTRGB4D(dst_1,py_1,1,2);
348 PUTRGB4D(dst_1,py_1,2,4);
349 PUTRGB4D(dst_2,py_2,2,4+8);
352 PUTRGB4D(dst_2,py_2,3,6+8);
353 PUTRGB4D(dst_1,py_1,3,6);
356 // This is exactly the same code as yuv2rgb_c_32 except for the types of
357 // r, g, b, dst_1, dst_2
358 YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0)
360 PUTRGB(dst_1,py_1,0,0);
361 PUTRGB(dst_2,py_2,0,1);
364 PUTRGB(dst_2,py_2,1,1);
365 PUTRGB(dst_1,py_1,1,0);
368 PUTRGB(dst_1,py_1,2,0);
369 PUTRGB(dst_2,py_2,2,1);
372 PUTRGB(dst_2,py_2,3,1);
373 PUTRGB(dst_1,py_1,3,0);
376 YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
377 const uint8_t *d64 = dither_8x8_73[y&7];
378 const uint8_t *d128 = dither_8x8_220[y&7];
380 #define PUTRGB4DB(dst,src,i,o) \
382 dst[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
384 dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
387 PUTRGB4DB(dst_1,py_1,0,0);
388 PUTRGB4DB(dst_2,py_2,0,0+8);
391 PUTRGB4DB(dst_2,py_2,1,2+8);
392 PUTRGB4DB(dst_1,py_1,1,2);
395 PUTRGB4DB(dst_1,py_1,2,4);
396 PUTRGB4DB(dst_2,py_2,2,4+8);
399 PUTRGB4DB(dst_2,py_2,3,6+8);
400 PUTRGB4DB(dst_1,py_1,3,6);
403 YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
404 const uint8_t *d128 = dither_8x8_220[y&7];
405 char out_1 = 0, out_2 = 0;
406 g= c->table_gU[128] + c->table_gV[128];
408 #define PUTRGB1(out,src,i,o) \
410 out+= out + g[Y+d128[0+o]]; \
412 out+= out + g[Y+d128[1+o]];
414 PUTRGB1(out_1,py_1,0,0);
415 PUTRGB1(out_2,py_2,0,0+8);
417 PUTRGB1(out_2,py_2,1,2+8);
418 PUTRGB1(out_1,py_1,1,2);
420 PUTRGB1(out_1,py_1,2,4);
421 PUTRGB1(out_2,py_2,2,4+8);
423 PUTRGB1(out_2,py_2,3,6+8);
424 PUTRGB1(out_1,py_1,3,6);
430 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
433 #if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL
434 if (c->flags & SWS_CPU_CAPS_MMX2) {
435 switch (c->dstFormat) {
436 case PIX_FMT_RGB32: return yuv420_rgb32_MMX2;
437 case PIX_FMT_BGR24: return yuv420_rgb24_MMX2;
438 case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
439 case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
442 if (c->flags & SWS_CPU_CAPS_MMX) {
443 switch (c->dstFormat) {
444 case PIX_FMT_RGB32: return yuv420_rgb32_MMX;
445 case PIX_FMT_BGR24: return yuv420_rgb24_MMX;
446 case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
447 case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
452 t = ff_yuv2rgb_init_vis(c);
455 t = ff_yuv2rgb_init_mlib(c);
457 #if HAVE_ALTIVEC && CONFIG_GPL
458 if (c->flags & SWS_CPU_CAPS_ALTIVEC)
459 t = ff_yuv2rgb_init_altivec(c);
463 if (c->flags & SWS_CPU_CAPS_BFIN)
464 t = ff_yuv2rgb_get_func_ptr_bfin(c);
470 av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
472 switch (c->dstFormat) {
473 case PIX_FMT_BGR32_1:
474 case PIX_FMT_RGB32_1:
476 case PIX_FMT_RGB32: return yuv2rgb_c_32;
477 case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb;
478 case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr;
482 case PIX_FMT_BGR555: return yuv2rgb_c_16;
484 case PIX_FMT_BGR8: return yuv2rgb_c_8_ordered_dither;
486 case PIX_FMT_BGR4: return yuv2rgb_c_4_ordered_dither;
487 case PIX_FMT_RGB4_BYTE:
488 case PIX_FMT_BGR4_BYTE: return yuv2rgb_c_4b_ordered_dither;
489 case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither;
496 static void fill_table(uint8_t* table[256], const int elemsize, const int inc, uint8_t *y_table)
501 y_table -= elemsize * (inc >> 9);
503 for (i = 0; i < 256; i++) {
504 table[i] = y_table + elemsize * (cb >> 16);
509 static void fill_gv_table(int table[256], const int elemsize, const int inc)
513 int off = -(inc >> 9);
515 for (i = 0; i < 256; i++) {
516 table[i] = elemsize * (off + (cb >> 16));
521 av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
522 int brightness, int contrast, int saturation)
524 const int isRgb = c->dstFormat==PIX_FMT_RGB32
525 || c->dstFormat==PIX_FMT_RGB32_1
526 || c->dstFormat==PIX_FMT_BGR24
527 || c->dstFormat==PIX_FMT_RGB565
528 || c->dstFormat==PIX_FMT_RGB555
529 || c->dstFormat==PIX_FMT_RGB8
530 || c->dstFormat==PIX_FMT_RGB4
531 || c->dstFormat==PIX_FMT_RGB4_BYTE
532 || c->dstFormat==PIX_FMT_MONOBLACK;
533 const int bpp = fmt_depth(c->dstFormat);
537 int i, base, rbase, gbase, bbase, abase;
538 const int yoffs = fullRange ? 384 : 326;
540 int64_t crv = inv_table[0];
541 int64_t cbu = inv_table[1];
542 int64_t cgu = -inv_table[2];
543 int64_t cgv = -inv_table[3];
553 crv = (crv*224) / 255;
554 cbu = (cbu*224) / 255;
555 cgu = (cgu*224) / 255;
556 cgv = (cgv*224) / 255;
559 cy = (cy *contrast ) >> 16;
560 crv = (crv*contrast * saturation) >> 32;
561 cbu = (cbu*contrast * saturation) >> 32;
562 cgu = (cgu*contrast * saturation) >> 32;
563 cgv = (cgv*contrast * saturation) >> 32;
564 oy -= 256*brightness;
566 //scale coefficients by cy
567 crv = ((crv << 16) + 0x8000) / cy;
568 cbu = ((cbu << 16) + 0x8000) / cy;
569 cgu = ((cgu << 16) + 0x8000) / cy;
570 cgv = ((cgv << 16) + 0x8000) / cy;
572 av_free(c->yuvTable);
576 c->yuvTable = av_malloc(1024);
577 y_table = c->yuvTable;
578 yb = -(384<<16) - oy;
579 for (i = 0; i < 1024-110; i++) {
580 y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
583 fill_table(c->table_gU, 1, cgu, y_table + yoffs);
584 fill_gv_table(c->table_gV, 1, cgv);
588 rbase = isRgb ? 3 : 0;
590 bbase = isRgb ? 0 : 3;
591 c->yuvTable = av_malloc(1024*3);
592 y_table = c->yuvTable;
593 yb = -(384<<16) - oy;
594 for (i = 0; i < 1024-110; i++) {
595 int yval = av_clip_uint8((yb + 0x8000) >> 16);
596 y_table[i+110 ] = (yval >> 7) << rbase;
597 y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
598 y_table[i+110+2048] = (yval >> 7) << bbase;
601 fill_table(c->table_rV, 1, crv, y_table + yoffs);
602 fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
603 fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
604 fill_gv_table(c->table_gV, 1, cgv);
607 rbase = isRgb ? 5 : 0;
608 gbase = isRgb ? 2 : 3;
609 bbase = isRgb ? 0 : 6;
610 c->yuvTable = av_malloc(1024*3);
611 y_table = c->yuvTable;
612 yb = -(384<<16) - oy;
613 for (i = 0; i < 1024-38; i++) {
614 int yval = av_clip_uint8((yb + 0x8000) >> 16);
615 y_table[i+16 ] = ((yval + 18) / 36) << rbase;
616 y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
617 y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
620 fill_table(c->table_rV, 1, crv, y_table + yoffs);
621 fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
622 fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
623 fill_gv_table(c->table_gV, 1, cgv);
627 rbase = isRgb ? bpp - 5 : 0;
629 bbase = isRgb ? 0 : (bpp - 5);
630 c->yuvTable = av_malloc(1024*3*2);
631 y_table16 = c->yuvTable;
632 yb = -(384<<16) - oy;
633 for (i = 0; i < 1024; i++) {
634 uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
635 y_table16[i ] = (yval >> 3) << rbase;
636 y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
637 y_table16[i+2048] = (yval >> 3) << bbase;
640 fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
641 fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
642 fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
643 fill_gv_table(c->table_gV, 2, cgv);
646 c->yuvTable = av_malloc(1024);
647 y_table = c->yuvTable;
648 yb = -(384<<16) - oy;
649 for (i = 0; i < 1024; i++) {
650 y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
653 fill_table(c->table_rV, 1, crv, y_table + yoffs);
654 fill_table(c->table_gU, 1, cgu, y_table + yoffs);
655 fill_table(c->table_bU, 1, cbu, y_table + yoffs);
656 fill_gv_table(c->table_gV, 1, cgv);
659 base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
660 rbase = base + (isRgb ? 16 : 0);
662 bbase = base + (isRgb ? 0 : 16);
663 abase = (base + 24) & 31;
664 c->yuvTable = av_malloc(1024*3*4);
665 y_table32 = c->yuvTable;
666 yb = -(384<<16) - oy;
667 for (i = 0; i < 1024; i++) {
668 uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
669 y_table32[i ] = (yval << rbase) + (255 << abase);
670 y_table32[i+1024] = yval << gbase;
671 y_table32[i+2048] = yval << bbase;
674 fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
675 fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
676 fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
677 fill_gv_table(c->table_gV, 4, cgv);
681 av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);