2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 static av_always_inline void
199 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
200 int lumFilterSize, const int16_t *chrFilter,
201 const int32_t **chrUSrc, const int32_t **chrVSrc,
202 int chrFilterSize, const int32_t **alpSrc,
203 uint16_t *dest[4], int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
209 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
210 int shift = 15 + 16 - output_bits - 1;
212 #define output_pixel(pos, val) \
214 if (output_bits == 16) { \
215 AV_WB16(pos, av_clip_uint16(val >> shift)); \
217 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
220 if (output_bits == 16) { \
221 AV_WL16(pos, av_clip_uint16(val >> shift)); \
223 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits - 1);
230 for (j = 0; j < lumFilterSize; j++)
231 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
233 output_pixel(&yDest[i], val);
237 for (i = 0; i < chrDstW; i++) {
238 int u = 1 << (30-output_bits - 1);
239 int v = 1 << (30-output_bits - 1);
242 for (j = 0; j < chrFilterSize; j++) {
243 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
244 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
247 output_pixel(&uDest[i], u);
248 output_pixel(&vDest[i], v);
252 if (CONFIG_SWSCALE_ALPHA && aDest) {
253 for (i = 0; i < dstW; i++) {
254 int val = 1 << (30-output_bits - 1);
257 for (j = 0; j < lumFilterSize; j++)
258 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
260 output_pixel(&aDest[i], val);
266 #define yuv2NBPS(bits, BE_LE, is_be) \
267 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
268 const int16_t **_lumSrc, int lumFilterSize, \
269 const int16_t *chrFilter, const int16_t **_chrUSrc, \
270 const int16_t **_chrVSrc, \
271 int chrFilterSize, const int16_t **_alpSrc, \
272 uint8_t *_dest[4], int dstW, int chrDstW) \
274 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
275 **chrUSrc = (const int32_t **) _chrUSrc, \
276 **chrVSrc = (const int32_t **) _chrVSrc, \
277 **alpSrc = (const int32_t **) _alpSrc; \
278 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
279 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
280 alpSrc, (uint16_t **) _dest, \
281 dstW, chrDstW, is_be, bits); \
290 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
291 const int16_t **lumSrc, int lumFilterSize,
292 const int16_t *chrFilter, const int16_t **chrUSrc,
293 const int16_t **chrVSrc,
294 int chrFilterSize, const int16_t **alpSrc,
295 uint8_t *dest[4], int dstW, int chrDstW)
297 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
298 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
300 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
302 //FIXME Optimize (just quickly written not optimized..)
303 for (i=0; i<dstW; i++) {
304 int val = lumDither[i & 7] << 12;
306 for (j=0; j<lumFilterSize; j++)
307 val += lumSrc[j][i] * lumFilter[j];
309 yDest[i]= av_clip_uint8(val>>19);
313 for (i=0; i<chrDstW; i++) {
314 int u = chrDither[i & 7] << 12;
315 int v = chrDither[(i + 3) & 7] << 12;
317 for (j=0; j<chrFilterSize; j++) {
318 u += chrUSrc[j][i] * chrFilter[j];
319 v += chrVSrc[j][i] * chrFilter[j];
322 uDest[i]= av_clip_uint8(u>>19);
323 vDest[i]= av_clip_uint8(v>>19);
326 if (CONFIG_SWSCALE_ALPHA && aDest)
327 for (i=0; i<dstW; i++) {
328 int val = lumDither[i & 7] << 12;
330 for (j=0; j<lumFilterSize; j++)
331 val += alpSrc[j][i] * lumFilter[j];
333 aDest[i]= av_clip_uint8(val>>19);
337 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
338 const int16_t *chrUSrc, const int16_t *chrVSrc,
339 const int16_t *alpSrc,
340 uint8_t *dest[4], int dstW, int chrDstW)
342 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
343 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
345 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
347 for (i=0; i<dstW; i++) {
348 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
349 yDest[i]= av_clip_uint8(val);
353 for (i=0; i<chrDstW; i++) {
354 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
355 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
356 uDest[i]= av_clip_uint8(u);
357 vDest[i]= av_clip_uint8(v);
360 if (CONFIG_SWSCALE_ALPHA && aDest)
361 for (i=0; i<dstW; i++) {
362 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
363 aDest[i]= av_clip_uint8(val);
367 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
368 const int16_t **lumSrc, int lumFilterSize,
369 const int16_t *chrFilter, const int16_t **chrUSrc,
370 const int16_t **chrVSrc, int chrFilterSize,
371 const int16_t **alpSrc, uint8_t *dest[4],
372 int dstW, int chrDstW)
374 uint8_t *yDest = dest[0], *uDest = dest[1];
375 enum PixelFormat dstFormat = c->dstFormat;
376 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
378 //FIXME Optimize (just quickly written not optimized..)
380 for (i=0; i<dstW; i++) {
381 int val = lumDither[i & 7] << 12;
383 for (j=0; j<lumFilterSize; j++)
384 val += lumSrc[j][i] * lumFilter[j];
386 yDest[i]= av_clip_uint8(val>>19);
392 if (dstFormat == PIX_FMT_NV12)
393 for (i=0; i<chrDstW; i++) {
394 int u = chrDither[i & 7] << 12;
395 int v = chrDither[(i + 3) & 7] << 12;
397 for (j=0; j<chrFilterSize; j++) {
398 u += chrUSrc[j][i] * chrFilter[j];
399 v += chrVSrc[j][i] * chrFilter[j];
402 uDest[2*i]= av_clip_uint8(u>>19);
403 uDest[2*i+1]= av_clip_uint8(v>>19);
406 for (i=0; i<chrDstW; i++) {
407 int u = chrDither[i & 7] << 12;
408 int v = chrDither[(i + 3) & 7] << 12;
410 for (j=0; j<chrFilterSize; j++) {
411 u += chrUSrc[j][i] * chrFilter[j];
412 v += chrVSrc[j][i] * chrFilter[j];
415 uDest[2*i]= av_clip_uint8(v>>19);
416 uDest[2*i+1]= av_clip_uint8(u>>19);
420 #define output_pixel(pos, val) \
421 if (target == PIX_FMT_GRAY16BE) { \
427 static av_always_inline void
428 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
429 const int32_t **lumSrc, int lumFilterSize,
430 const int16_t *chrFilter, const int32_t **chrUSrc,
431 const int32_t **chrVSrc, int chrFilterSize,
432 const int32_t **alpSrc, uint16_t *dest, int dstW,
433 int y, enum PixelFormat target)
437 for (i = 0; i < (dstW >> 1); i++) {
442 for (j = 0; j < lumFilterSize; j++) {
443 Y1 += lumSrc[j][i * 2] * lumFilter[j];
444 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
448 if ((Y1 | Y2) & 0x10000) {
449 Y1 = av_clip_uint16(Y1);
450 Y2 = av_clip_uint16(Y2);
452 output_pixel(&dest[i * 2 + 0], Y1);
453 output_pixel(&dest[i * 2 + 1], Y2);
457 static av_always_inline void
458 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
459 const int32_t *ubuf[2], const int32_t *vbuf[2],
460 const int32_t *abuf[2], uint16_t *dest, int dstW,
461 int yalpha, int uvalpha, int y,
462 enum PixelFormat target)
464 int yalpha1 = 4095 - yalpha;
466 const int32_t *buf0 = buf[0], *buf1 = buf[1];
468 for (i = 0; i < (dstW >> 1); i++) {
469 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
470 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
472 output_pixel(&dest[i * 2 + 0], Y1);
473 output_pixel(&dest[i * 2 + 1], Y2);
477 static av_always_inline void
478 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
479 const int32_t *ubuf[2], const int32_t *vbuf[2],
480 const int32_t *abuf0, uint16_t *dest, int dstW,
481 int uvalpha, int y, enum PixelFormat target)
485 for (i = 0; i < (dstW >> 1); i++) {
486 int Y1 = buf0[i * 2 ] << 1;
487 int Y2 = buf0[i * 2 + 1] << 1;
489 output_pixel(&dest[i * 2 + 0], Y1);
490 output_pixel(&dest[i * 2 + 1], Y2);
496 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
497 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
498 const int16_t **_lumSrc, int lumFilterSize, \
499 const int16_t *chrFilter, const int16_t **_chrUSrc, \
500 const int16_t **_chrVSrc, int chrFilterSize, \
501 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
504 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
505 **chrUSrc = (const int32_t **) _chrUSrc, \
506 **chrVSrc = (const int32_t **) _chrVSrc, \
507 **alpSrc = (const int32_t **) _alpSrc; \
508 uint16_t *dest = (uint16_t *) _dest; \
509 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
510 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
511 alpSrc, dest, dstW, y, fmt); \
514 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
515 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
516 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
517 int yalpha, int uvalpha, int y) \
519 const int32_t **buf = (const int32_t **) _buf, \
520 **ubuf = (const int32_t **) _ubuf, \
521 **vbuf = (const int32_t **) _vbuf, \
522 **abuf = (const int32_t **) _abuf; \
523 uint16_t *dest = (uint16_t *) _dest; \
524 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
525 dest, dstW, yalpha, uvalpha, y, fmt); \
528 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
529 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
530 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
531 int uvalpha, int y) \
533 const int32_t *buf0 = (const int32_t *) _buf0, \
534 **ubuf = (const int32_t **) _ubuf, \
535 **vbuf = (const int32_t **) _vbuf, \
536 *abuf0 = (const int32_t *) _abuf0; \
537 uint16_t *dest = (uint16_t *) _dest; \
538 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
539 dstW, uvalpha, y, fmt); \
542 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
543 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
545 #define output_pixel(pos, acc) \
546 if (target == PIX_FMT_MONOBLACK) { \
552 static av_always_inline void
553 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
554 const int16_t **lumSrc, int lumFilterSize,
555 const int16_t *chrFilter, const int16_t **chrUSrc,
556 const int16_t **chrVSrc, int chrFilterSize,
557 const int16_t **alpSrc, uint8_t *dest, int dstW,
558 int y, enum PixelFormat target)
560 const uint8_t * const d128=dither_8x8_220[y&7];
561 uint8_t *g = c->table_gU[128] + c->table_gV[128];
565 for (i = 0; i < dstW - 1; i += 2) {
570 for (j = 0; j < lumFilterSize; j++) {
571 Y1 += lumSrc[j][i] * lumFilter[j];
572 Y2 += lumSrc[j][i+1] * lumFilter[j];
576 if ((Y1 | Y2) & 0x100) {
577 Y1 = av_clip_uint8(Y1);
578 Y2 = av_clip_uint8(Y2);
580 acc += acc + g[Y1 + d128[(i + 0) & 7]];
581 acc += acc + g[Y2 + d128[(i + 1) & 7]];
583 output_pixel(*dest++, acc);
588 static av_always_inline void
589 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
590 const int16_t *ubuf[2], const int16_t *vbuf[2],
591 const int16_t *abuf[2], uint8_t *dest, int dstW,
592 int yalpha, int uvalpha, int y,
593 enum PixelFormat target)
595 const int16_t *buf0 = buf[0], *buf1 = buf[1];
596 const uint8_t * const d128 = dither_8x8_220[y & 7];
597 uint8_t *g = c->table_gU[128] + c->table_gV[128];
598 int yalpha1 = 4095 - yalpha;
601 for (i = 0; i < dstW - 7; i += 8) {
602 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
603 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
604 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
605 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
606 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
607 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
608 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
609 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
610 output_pixel(*dest++, acc);
614 static av_always_inline void
615 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
616 const int16_t *ubuf[2], const int16_t *vbuf[2],
617 const int16_t *abuf0, uint8_t *dest, int dstW,
618 int uvalpha, int y, enum PixelFormat target)
620 const uint8_t * const d128 = dither_8x8_220[y & 7];
621 uint8_t *g = c->table_gU[128] + c->table_gV[128];
624 for (i = 0; i < dstW - 7; i += 8) {
625 int acc = g[(buf0[i ] >> 7) + d128[0]];
626 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
627 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
628 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
629 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
630 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
631 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
632 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
633 output_pixel(*dest++, acc);
639 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
640 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
641 const int16_t **lumSrc, int lumFilterSize, \
642 const int16_t *chrFilter, const int16_t **chrUSrc, \
643 const int16_t **chrVSrc, int chrFilterSize, \
644 const int16_t **alpSrc, uint8_t *dest, int dstW, \
647 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
648 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
649 alpSrc, dest, dstW, y, fmt); \
652 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
653 const int16_t *ubuf[2], const int16_t *vbuf[2], \
654 const int16_t *abuf[2], uint8_t *dest, int dstW, \
655 int yalpha, int uvalpha, int y) \
657 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
658 dest, dstW, yalpha, uvalpha, y, fmt); \
661 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
662 const int16_t *ubuf[2], const int16_t *vbuf[2], \
663 const int16_t *abuf0, uint8_t *dest, int dstW, \
664 int uvalpha, int y) \
666 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
667 abuf0, dest, dstW, uvalpha, \
671 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
672 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
674 #define output_pixels(pos, Y1, U, Y2, V) \
675 if (target == PIX_FMT_YUYV422) { \
676 dest[pos + 0] = Y1; \
678 dest[pos + 2] = Y2; \
682 dest[pos + 1] = Y1; \
684 dest[pos + 3] = Y2; \
687 static av_always_inline void
688 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
689 const int16_t **lumSrc, int lumFilterSize,
690 const int16_t *chrFilter, const int16_t **chrUSrc,
691 const int16_t **chrVSrc, int chrFilterSize,
692 const int16_t **alpSrc, uint8_t *dest, int dstW,
693 int y, enum PixelFormat target)
697 for (i = 0; i < (dstW >> 1); i++) {
704 for (j = 0; j < lumFilterSize; j++) {
705 Y1 += lumSrc[j][i * 2] * lumFilter[j];
706 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
708 for (j = 0; j < chrFilterSize; j++) {
709 U += chrUSrc[j][i] * chrFilter[j];
710 V += chrVSrc[j][i] * chrFilter[j];
716 if ((Y1 | Y2 | U | V) & 0x100) {
717 Y1 = av_clip_uint8(Y1);
718 Y2 = av_clip_uint8(Y2);
719 U = av_clip_uint8(U);
720 V = av_clip_uint8(V);
722 output_pixels(4*i, Y1, U, Y2, V);
726 static av_always_inline void
727 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
728 const int16_t *ubuf[2], const int16_t *vbuf[2],
729 const int16_t *abuf[2], uint8_t *dest, int dstW,
730 int yalpha, int uvalpha, int y,
731 enum PixelFormat target)
733 const int16_t *buf0 = buf[0], *buf1 = buf[1],
734 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
735 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
736 int yalpha1 = 4095 - yalpha;
737 int uvalpha1 = 4095 - uvalpha;
740 for (i = 0; i < (dstW >> 1); i++) {
741 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
742 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
743 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
744 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
746 output_pixels(i * 4, Y1, U, Y2, V);
750 static av_always_inline void
751 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
752 const int16_t *ubuf[2], const int16_t *vbuf[2],
753 const int16_t *abuf0, uint8_t *dest, int dstW,
754 int uvalpha, int y, enum PixelFormat target)
756 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
757 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
760 if (uvalpha < 2048) {
761 for (i = 0; i < (dstW >> 1); i++) {
762 int Y1 = buf0[i * 2] >> 7;
763 int Y2 = buf0[i * 2 + 1] >> 7;
764 int U = ubuf1[i] >> 7;
765 int V = vbuf1[i] >> 7;
767 output_pixels(i * 4, Y1, U, Y2, V);
770 for (i = 0; i < (dstW >> 1); i++) {
771 int Y1 = buf0[i * 2] >> 7;
772 int Y2 = buf0[i * 2 + 1] >> 7;
773 int U = (ubuf0[i] + ubuf1[i]) >> 8;
774 int V = (vbuf0[i] + vbuf1[i]) >> 8;
776 output_pixels(i * 4, Y1, U, Y2, V);
783 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
784 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
786 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
787 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
788 #define output_pixel(pos, val) \
789 if (isBE(target)) { \
795 static av_always_inline void
796 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
797 const int32_t **lumSrc, int lumFilterSize,
798 const int16_t *chrFilter, const int32_t **chrUSrc,
799 const int32_t **chrVSrc, int chrFilterSize,
800 const int32_t **alpSrc, uint16_t *dest, int dstW,
801 int y, enum PixelFormat target)
805 for (i = 0; i < (dstW >> 1); i++) {
809 int U = -128 << 23; // 19
813 for (j = 0; j < lumFilterSize; j++) {
814 Y1 += lumSrc[j][i * 2] * lumFilter[j];
815 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
817 for (j = 0; j < chrFilterSize; j++) {
818 U += chrUSrc[j][i] * chrFilter[j];
819 V += chrVSrc[j][i] * chrFilter[j];
822 // 8bit: 12+15=27; 16-bit: 12+19=31
828 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
829 Y1 -= c->yuv2rgb_y_offset;
830 Y2 -= c->yuv2rgb_y_offset;
831 Y1 *= c->yuv2rgb_y_coeff;
832 Y2 *= c->yuv2rgb_y_coeff;
835 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
837 R = V * c->yuv2rgb_v2r_coeff;
838 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
839 B = U * c->yuv2rgb_u2b_coeff;
841 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
842 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
843 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
844 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
845 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
846 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
847 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
852 static av_always_inline void
853 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
854 const int32_t *ubuf[2], const int32_t *vbuf[2],
855 const int32_t *abuf[2], uint16_t *dest, int dstW,
856 int yalpha, int uvalpha, int y,
857 enum PixelFormat target)
859 const int32_t *buf0 = buf[0], *buf1 = buf[1],
860 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
861 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
862 int yalpha1 = 4095 - yalpha;
863 int uvalpha1 = 4095 - uvalpha;
866 for (i = 0; i < (dstW >> 1); i++) {
867 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
868 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
869 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
870 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
873 Y1 -= c->yuv2rgb_y_offset;
874 Y2 -= c->yuv2rgb_y_offset;
875 Y1 *= c->yuv2rgb_y_coeff;
876 Y2 *= c->yuv2rgb_y_coeff;
880 R = V * c->yuv2rgb_v2r_coeff;
881 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
882 B = U * c->yuv2rgb_u2b_coeff;
884 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
885 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
886 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
887 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
888 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
889 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
894 static av_always_inline void
895 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
896 const int32_t *ubuf[2], const int32_t *vbuf[2],
897 const int32_t *abuf0, uint16_t *dest, int dstW,
898 int uvalpha, int y, enum PixelFormat target)
900 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
901 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
904 if (uvalpha < 2048) {
905 for (i = 0; i < (dstW >> 1); i++) {
906 int Y1 = (buf0[i * 2] ) >> 2;
907 int Y2 = (buf0[i * 2 + 1]) >> 2;
908 int U = (ubuf0[i] + (-128 << 11)) >> 2;
909 int V = (vbuf0[i] + (-128 << 11)) >> 2;
912 Y1 -= c->yuv2rgb_y_offset;
913 Y2 -= c->yuv2rgb_y_offset;
914 Y1 *= c->yuv2rgb_y_coeff;
915 Y2 *= c->yuv2rgb_y_coeff;
919 R = V * c->yuv2rgb_v2r_coeff;
920 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
921 B = U * c->yuv2rgb_u2b_coeff;
923 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
924 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
925 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
926 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
927 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
928 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
932 for (i = 0; i < (dstW >> 1); i++) {
933 int Y1 = (buf0[i * 2] ) >> 2;
934 int Y2 = (buf0[i * 2 + 1]) >> 2;
935 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
936 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
939 Y1 -= c->yuv2rgb_y_offset;
940 Y2 -= c->yuv2rgb_y_offset;
941 Y1 *= c->yuv2rgb_y_coeff;
942 Y2 *= c->yuv2rgb_y_coeff;
946 R = V * c->yuv2rgb_v2r_coeff;
947 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
948 B = U * c->yuv2rgb_u2b_coeff;
950 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
951 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
952 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
953 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
954 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
955 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
965 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
966 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
967 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
968 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
970 static av_always_inline void
971 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
972 int U, int V, int A1, int A2,
973 const void *_r, const void *_g, const void *_b, int y,
974 enum PixelFormat target, int hasAlpha)
976 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
977 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
978 uint32_t *dest = (uint32_t *) _dest;
979 const uint32_t *r = (const uint32_t *) _r;
980 const uint32_t *g = (const uint32_t *) _g;
981 const uint32_t *b = (const uint32_t *) _b;
984 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
986 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
987 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
990 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
992 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
993 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
995 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
996 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
999 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1000 uint8_t *dest = (uint8_t *) _dest;
1001 const uint8_t *r = (const uint8_t *) _r;
1002 const uint8_t *g = (const uint8_t *) _g;
1003 const uint8_t *b = (const uint8_t *) _b;
1005 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1006 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1007 dest[i * 6 + 0] = r_b[Y1];
1008 dest[i * 6 + 1] = g[Y1];
1009 dest[i * 6 + 2] = b_r[Y1];
1010 dest[i * 6 + 3] = r_b[Y2];
1011 dest[i * 6 + 4] = g[Y2];
1012 dest[i * 6 + 5] = b_r[Y2];
1015 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1016 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1017 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1018 uint16_t *dest = (uint16_t *) _dest;
1019 const uint16_t *r = (const uint16_t *) _r;
1020 const uint16_t *g = (const uint16_t *) _g;
1021 const uint16_t *b = (const uint16_t *) _b;
1022 int dr1, dg1, db1, dr2, dg2, db2;
1024 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1025 dr1 = dither_2x2_8[ y & 1 ][0];
1026 dg1 = dither_2x2_4[ y & 1 ][0];
1027 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1028 dr2 = dither_2x2_8[ y & 1 ][1];
1029 dg2 = dither_2x2_4[ y & 1 ][1];
1030 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1031 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1032 dr1 = dither_2x2_8[ y & 1 ][0];
1033 dg1 = dither_2x2_8[ y & 1 ][1];
1034 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1035 dr2 = dither_2x2_8[ y & 1 ][1];
1036 dg2 = dither_2x2_8[ y & 1 ][0];
1037 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1039 dr1 = dither_4x4_16[ y & 3 ][0];
1040 dg1 = dither_4x4_16[ y & 3 ][1];
1041 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1042 dr2 = dither_4x4_16[ y & 3 ][1];
1043 dg2 = dither_4x4_16[ y & 3 ][0];
1044 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1047 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1048 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1049 } else /* 8/4-bit */ {
1050 uint8_t *dest = (uint8_t *) _dest;
1051 const uint8_t *r = (const uint8_t *) _r;
1052 const uint8_t *g = (const uint8_t *) _g;
1053 const uint8_t *b = (const uint8_t *) _b;
1054 int dr1, dg1, db1, dr2, dg2, db2;
1056 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1057 const uint8_t * const d64 = dither_8x8_73[y & 7];
1058 const uint8_t * const d32 = dither_8x8_32[y & 7];
1059 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1060 db1 = d64[(i * 2 + 0) & 7];
1061 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1062 db2 = d64[(i * 2 + 1) & 7];
1064 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1065 const uint8_t * const d128 = dither_8x8_220[y & 7];
1066 dr1 = db1 = d128[(i * 2 + 0) & 7];
1067 dg1 = d64[(i * 2 + 0) & 7];
1068 dr2 = db2 = d128[(i * 2 + 1) & 7];
1069 dg2 = d64[(i * 2 + 1) & 7];
1072 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1073 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1074 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1076 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1077 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1082 static av_always_inline void
1083 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1084 const int16_t **lumSrc, int lumFilterSize,
1085 const int16_t *chrFilter, const int16_t **chrUSrc,
1086 const int16_t **chrVSrc, int chrFilterSize,
1087 const int16_t **alpSrc, uint8_t *dest, int dstW,
1088 int y, enum PixelFormat target, int hasAlpha)
1092 for (i = 0; i < (dstW >> 1); i++) {
1098 int av_unused A1, A2;
1099 const void *r, *g, *b;
1101 for (j = 0; j < lumFilterSize; j++) {
1102 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1103 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1105 for (j = 0; j < chrFilterSize; j++) {
1106 U += chrUSrc[j][i] * chrFilter[j];
1107 V += chrVSrc[j][i] * chrFilter[j];
1113 if ((Y1 | Y2 | U | V) & 0x100) {
1114 Y1 = av_clip_uint8(Y1);
1115 Y2 = av_clip_uint8(Y2);
1116 U = av_clip_uint8(U);
1117 V = av_clip_uint8(V);
1122 for (j = 0; j < lumFilterSize; j++) {
1123 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1124 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1128 if ((A1 | A2) & 0x100) {
1129 A1 = av_clip_uint8(A1);
1130 A2 = av_clip_uint8(A2);
1134 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1136 g = (c->table_gU[U] + c->table_gV[V]);
1139 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1140 r, g, b, y, target, hasAlpha);
1144 static av_always_inline void
1145 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1146 const int16_t *ubuf[2], const int16_t *vbuf[2],
1147 const int16_t *abuf[2], uint8_t *dest, int dstW,
1148 int yalpha, int uvalpha, int y,
1149 enum PixelFormat target, int hasAlpha)
1151 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1152 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1153 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1154 *abuf0 = hasAlpha ? abuf[0] : NULL,
1155 *abuf1 = hasAlpha ? abuf[1] : NULL;
1156 int yalpha1 = 4095 - yalpha;
1157 int uvalpha1 = 4095 - uvalpha;
1160 for (i = 0; i < (dstW >> 1); i++) {
1161 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1162 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1163 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1164 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1166 const void *r = c->table_rV[V],
1167 *g = (c->table_gU[U] + c->table_gV[V]),
1168 *b = c->table_bU[U];
1171 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1172 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1175 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1176 r, g, b, y, target, hasAlpha);
1180 static av_always_inline void
1181 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1182 const int16_t *ubuf[2], const int16_t *vbuf[2],
1183 const int16_t *abuf0, uint8_t *dest, int dstW,
1184 int uvalpha, int y, enum PixelFormat target,
1187 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1188 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1191 if (uvalpha < 2048) {
1192 for (i = 0; i < (dstW >> 1); i++) {
1193 int Y1 = buf0[i * 2] >> 7;
1194 int Y2 = buf0[i * 2 + 1] >> 7;
1195 int U = ubuf1[i] >> 7;
1196 int V = vbuf1[i] >> 7;
1198 const void *r = c->table_rV[V],
1199 *g = (c->table_gU[U] + c->table_gV[V]),
1200 *b = c->table_bU[U];
1203 A1 = abuf0[i * 2 ] >> 7;
1204 A2 = abuf0[i * 2 + 1] >> 7;
1207 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1208 r, g, b, y, target, hasAlpha);
1211 for (i = 0; i < (dstW >> 1); i++) {
1212 int Y1 = buf0[i * 2] >> 7;
1213 int Y2 = buf0[i * 2 + 1] >> 7;
1214 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1215 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1217 const void *r = c->table_rV[V],
1218 *g = (c->table_gU[U] + c->table_gV[V]),
1219 *b = c->table_bU[U];
1222 A1 = abuf0[i * 2 ] >> 7;
1223 A2 = abuf0[i * 2 + 1] >> 7;
1226 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1227 r, g, b, y, target, hasAlpha);
1232 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1233 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1234 const int16_t **lumSrc, int lumFilterSize, \
1235 const int16_t *chrFilter, const int16_t **chrUSrc, \
1236 const int16_t **chrVSrc, int chrFilterSize, \
1237 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1240 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1241 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1242 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1244 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1245 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1246 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1247 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1248 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1249 int yalpha, int uvalpha, int y) \
1251 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1252 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1255 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1256 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1257 const int16_t *abuf0, uint8_t *dest, int dstW, \
1258 int uvalpha, int y) \
1260 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1261 dstW, uvalpha, y, fmt, hasAlpha); \
1265 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1266 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1268 #if CONFIG_SWSCALE_ALPHA
1269 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1270 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1272 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1273 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1275 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1276 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1277 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1278 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1279 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1280 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1281 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1282 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1284 static av_always_inline void
1285 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1286 const int16_t **lumSrc, int lumFilterSize,
1287 const int16_t *chrFilter, const int16_t **chrUSrc,
1288 const int16_t **chrVSrc, int chrFilterSize,
1289 const int16_t **alpSrc, uint8_t *dest,
1290 int dstW, int y, enum PixelFormat target, int hasAlpha)
1293 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1295 for (i = 0; i < dstW; i++) {
1303 for (j = 0; j < lumFilterSize; j++) {
1304 Y += lumSrc[j][i] * lumFilter[j];
1306 for (j = 0; j < chrFilterSize; j++) {
1307 U += chrUSrc[j][i] * chrFilter[j];
1308 V += chrVSrc[j][i] * chrFilter[j];
1315 for (j = 0; j < lumFilterSize; j++) {
1316 A += alpSrc[j][i] * lumFilter[j];
1320 A = av_clip_uint8(A);
1322 Y -= c->yuv2rgb_y_offset;
1323 Y *= c->yuv2rgb_y_coeff;
1325 R = Y + V*c->yuv2rgb_v2r_coeff;
1326 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1327 B = Y + U*c->yuv2rgb_u2b_coeff;
1328 if ((R | G | B) & 0xC0000000) {
1329 R = av_clip_uintp2(R, 30);
1330 G = av_clip_uintp2(G, 30);
1331 B = av_clip_uintp2(B, 30);
1336 dest[0] = hasAlpha ? A : 255;
1350 dest[3] = hasAlpha ? A : 255;
1353 dest[0] = hasAlpha ? A : 255;
1368 dest[3] = hasAlpha ? A : 255;
1376 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1377 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1378 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1379 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1381 #if CONFIG_SWSCALE_ALPHA
1382 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1383 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1384 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1385 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1387 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1388 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1389 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1390 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1392 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1393 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1395 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1396 int width, int height,
1400 uint8_t *ptr = plane + stride*y;
1401 for (i=0; i<height; i++) {
1402 memset(ptr, val, width);
1407 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1409 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1410 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1412 static av_always_inline void
1413 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1414 enum PixelFormat origin)
1417 for (i = 0; i < width; i++) {
1418 unsigned int r_b = input_pixel(&src[i*3+0]);
1419 unsigned int g = input_pixel(&src[i*3+1]);
1420 unsigned int b_r = input_pixel(&src[i*3+2]);
1422 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1426 static av_always_inline void
1427 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1428 const uint16_t *src1, const uint16_t *src2,
1429 int width, enum PixelFormat origin)
1433 for (i = 0; i < width; i++) {
1434 int r_b = input_pixel(&src1[i*3+0]);
1435 int g = input_pixel(&src1[i*3+1]);
1436 int b_r = input_pixel(&src1[i*3+2]);
1438 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1439 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1443 static av_always_inline void
1444 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1445 const uint16_t *src1, const uint16_t *src2,
1446 int width, enum PixelFormat origin)
1450 for (i = 0; i < width; i++) {
1451 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1452 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1453 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1455 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1456 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1464 #define rgb48funcs(pattern, BE_LE, origin) \
1465 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1466 int width, uint32_t *unused) \
1468 const uint16_t *src = (const uint16_t *) _src; \
1469 uint16_t *dst = (uint16_t *) _dst; \
1470 rgb48ToY_c_template(dst, src, width, origin); \
1473 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1474 const uint8_t *_src1, const uint8_t *_src2, \
1475 int width, uint32_t *unused) \
1477 const uint16_t *src1 = (const uint16_t *) _src1, \
1478 *src2 = (const uint16_t *) _src2; \
1479 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1480 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1483 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1484 const uint8_t *_src1, const uint8_t *_src2, \
1485 int width, uint32_t *unused) \
1487 const uint16_t *src1 = (const uint16_t *) _src1, \
1488 *src2 = (const uint16_t *) _src2; \
1489 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1490 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1493 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1494 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1495 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1496 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1498 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1499 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1500 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1502 static av_always_inline void
1503 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1504 int width, enum PixelFormat origin,
1505 int shr, int shg, int shb, int shp,
1506 int maskr, int maskg, int maskb,
1507 int rsh, int gsh, int bsh, int S)
1509 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1510 rnd = 33 << (S - 1);
1513 for (i = 0; i < width; i++) {
1514 int px = input_pixel(i) >> shp;
1515 int b = (px & maskb) >> shb;
1516 int g = (px & maskg) >> shg;
1517 int r = (px & maskr) >> shr;
1519 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1523 static av_always_inline void
1524 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1525 const uint8_t *src, int width,
1526 enum PixelFormat origin,
1527 int shr, int shg, int shb, int shp,
1528 int maskr, int maskg, int maskb,
1529 int rsh, int gsh, int bsh, int S)
1531 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1532 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1533 rnd = 257 << (S - 1);
1536 for (i = 0; i < width; i++) {
1537 int px = input_pixel(i) >> shp;
1538 int b = (px & maskb) >> shb;
1539 int g = (px & maskg) >> shg;
1540 int r = (px & maskr) >> shr;
1542 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1543 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1547 static av_always_inline void
1548 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1549 const uint8_t *src, int width,
1550 enum PixelFormat origin,
1551 int shr, int shg, int shb, int shp,
1552 int maskr, int maskg, int maskb,
1553 int rsh, int gsh, int bsh, int S)
1555 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1556 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1557 rnd = 257 << S, maskgx = ~(maskr | maskb);
1560 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1561 for (i = 0; i < width; i++) {
1562 int px0 = input_pixel(2 * i + 0) >> shp;
1563 int px1 = input_pixel(2 * i + 1) >> shp;
1564 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1565 int rb = px0 + px1 - g;
1567 b = (rb & maskb) >> shb;
1568 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1569 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1572 g = (g & maskg) >> shg;
1574 r = (rb & maskr) >> shr;
1576 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1577 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1583 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1584 maskg, maskb, rsh, gsh, bsh, S) \
1585 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1586 int width, uint32_t *unused) \
1588 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1589 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1592 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1593 const uint8_t *src, const uint8_t *dummy, \
1594 int width, uint32_t *unused) \
1596 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1597 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1600 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1601 const uint8_t *src, const uint8_t *dummy, \
1602 int width, uint32_t *unused) \
1604 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1605 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1608 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1609 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1610 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1611 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1612 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1613 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1614 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1615 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1616 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1617 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1618 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1619 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1621 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1624 for (i=0; i<width; i++) {
1629 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1632 for (i=0; i<width; i++) {
1637 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1640 for (i=0; i<width; i++) {
1643 dst[i]= pal[d] & 0xFF;
1647 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1648 const uint8_t *src1, const uint8_t *src2,
1649 int width, uint32_t *pal)
1652 assert(src1 == src2);
1653 for (i=0; i<width; i++) {
1654 int p= pal[src1[i]];
1661 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1662 int width, uint32_t *unused)
1665 for (i=0; i<width/8; i++) {
1668 dst[8*i+j]= ((d>>(7-j))&1)*255;
1672 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1673 int width, uint32_t *unused)
1676 for (i=0; i<width/8; i++) {
1679 dst[8*i+j]= ((d>>(7-j))&1)*255;
1683 //FIXME yuy2* can read up to 7 samples too much
1685 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1689 for (i=0; i<width; i++)
1693 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1694 const uint8_t *src2, int width, uint32_t *unused)
1697 for (i=0; i<width; i++) {
1698 dstU[i]= src1[4*i + 1];
1699 dstV[i]= src1[4*i + 3];
1701 assert(src1 == src2);
1704 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1707 const uint16_t *src = (const uint16_t *) _src;
1708 uint16_t *dst = (uint16_t *) _dst;
1709 for (i=0; i<width; i++) {
1710 dst[i] = av_bswap16(src[i]);
1714 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1715 const uint8_t *_src2, int width, uint32_t *unused)
1718 const uint16_t *src1 = (const uint16_t *) _src1,
1719 *src2 = (const uint16_t *) _src2;
1720 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1721 for (i=0; i<width; i++) {
1722 dstU[i] = av_bswap16(src1[i]);
1723 dstV[i] = av_bswap16(src2[i]);
1727 /* This is almost identical to the previous, end exists only because
1728 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1729 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1733 for (i=0; i<width; i++)
1737 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1738 const uint8_t *src2, int width, uint32_t *unused)
1741 for (i=0; i<width; i++) {
1742 dstU[i]= src1[4*i + 0];
1743 dstV[i]= src1[4*i + 2];
1745 assert(src1 == src2);
1748 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1749 const uint8_t *src, int width)
1752 for (i = 0; i < width; i++) {
1753 dst1[i] = src[2*i+0];
1754 dst2[i] = src[2*i+1];
1758 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1759 const uint8_t *src1, const uint8_t *src2,
1760 int width, uint32_t *unused)
1762 nvXXtoUV_c(dstU, dstV, src1, width);
1765 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1766 const uint8_t *src1, const uint8_t *src2,
1767 int width, uint32_t *unused)
1769 nvXXtoUV_c(dstV, dstU, src1, width);
1772 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1774 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1775 int width, uint32_t *unused)
1778 for (i=0; i<width; i++) {
1783 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1787 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1788 const uint8_t *src2, int width, uint32_t *unused)
1791 for (i=0; i<width; i++) {
1792 int b= src1[3*i + 0];
1793 int g= src1[3*i + 1];
1794 int r= src1[3*i + 2];
1796 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1797 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1799 assert(src1 == src2);
1802 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1803 const uint8_t *src2, int width, uint32_t *unused)
1806 for (i=0; i<width; i++) {
1807 int b= src1[6*i + 0] + src1[6*i + 3];
1808 int g= src1[6*i + 1] + src1[6*i + 4];
1809 int r= src1[6*i + 2] + src1[6*i + 5];
1811 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1812 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1814 assert(src1 == src2);
1817 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1821 for (i=0; i<width; i++) {
1826 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1830 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1831 const uint8_t *src2, int width, uint32_t *unused)
1835 for (i=0; i<width; i++) {
1836 int r= src1[3*i + 0];
1837 int g= src1[3*i + 1];
1838 int b= src1[3*i + 2];
1840 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1841 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1845 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1846 const uint8_t *src2, int width, uint32_t *unused)
1850 for (i=0; i<width; i++) {
1851 int r= src1[6*i + 0] + src1[6*i + 3];
1852 int g= src1[6*i + 1] + src1[6*i + 4];
1853 int b= src1[6*i + 2] + src1[6*i + 5];
1855 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1856 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1860 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1861 const int16_t *filter,
1862 const int16_t *filterPos, int filterSize)
1865 int32_t *dst = (int32_t *) _dst;
1866 const uint16_t *src = (const uint16_t *) _src;
1867 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1868 int sh = (bits <= 7) ? 11 : (bits - 4);
1870 for (i = 0; i < dstW; i++) {
1872 int srcPos = filterPos[i];
1875 for (j = 0; j < filterSize; j++) {
1876 val += src[srcPos + j] * filter[filterSize * i + j];
1878 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1879 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1883 // bilinear / bicubic scaling
1884 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1885 const int16_t *filter, const int16_t *filterPos,
1889 for (i=0; i<dstW; i++) {
1891 int srcPos= filterPos[i];
1893 for (j=0; j<filterSize; j++) {
1894 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1896 //filter += hFilterSize;
1897 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1902 //FIXME all pal and rgb srcFormats could do this convertion as well
1903 //FIXME all scalers more complex than bilinear could do half of this transform
1904 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1907 for (i = 0; i < width; i++) {
1908 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1909 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1912 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1915 for (i = 0; i < width; i++) {
1916 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1917 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1920 static void lumRangeToJpeg_c(int16_t *dst, int width)
1923 for (i = 0; i < width; i++)
1924 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1926 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1929 for (i = 0; i < width; i++)
1930 dst[i] = (dst[i]*14071 + 33561947)>>14;
1933 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1936 int32_t *dstU = (int32_t *) _dstU;
1937 int32_t *dstV = (int32_t *) _dstV;
1938 for (i = 0; i < width; i++) {
1939 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1940 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1943 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1946 int32_t *dstU = (int32_t *) _dstU;
1947 int32_t *dstV = (int32_t *) _dstV;
1948 for (i = 0; i < width; i++) {
1949 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1950 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1953 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1956 int32_t *dst = (int32_t *) _dst;
1957 for (i = 0; i < width; i++)
1958 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1960 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1963 int32_t *dst = (int32_t *) _dst;
1964 for (i = 0; i < width; i++)
1965 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1968 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1969 const uint8_t *src, int srcW, int xInc)
1972 unsigned int xpos=0;
1973 for (i=0;i<dstWidth;i++) {
1974 register unsigned int xx=xpos>>16;
1975 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1976 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1981 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
1984 uint8_t *dst = (uint8_t *) _dst;
1985 for (i = len - 1; i >= 0; i--) {
1986 dst[i * 2] = dst[i * 2 + 1] = src[i];
1990 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
1993 for (i = 0; i < len; i++) {
1994 dst[i] = src[i] >> 4;
1998 // *** horizontal scale Y line to temp buffer
1999 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2000 const uint8_t *src, int srcW, int xInc,
2001 const int16_t *hLumFilter,
2002 const int16_t *hLumFilterPos, int hLumFilterSize,
2003 uint8_t *formatConvBuffer,
2004 uint32_t *pal, int isAlpha)
2006 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2007 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2010 toYV12(formatConvBuffer, src, srcW, pal);
2011 src= formatConvBuffer;
2014 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2015 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2016 src = formatConvBuffer;
2019 if (!c->hyscale_fast) {
2020 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2021 } else { // fast bilinear upscale / crap downscale
2022 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2026 convertRange(dst, dstWidth);
2028 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2029 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2033 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2034 int dstWidth, const uint8_t *src1,
2035 const uint8_t *src2, int srcW, int xInc)
2038 unsigned int xpos=0;
2039 for (i=0;i<dstWidth;i++) {
2040 register unsigned int xx=xpos>>16;
2041 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2042 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2043 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2048 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2049 const uint8_t *src1, const uint8_t *src2,
2050 int srcW, int xInc, const int16_t *hChrFilter,
2051 const int16_t *hChrFilterPos, int hChrFilterSize,
2052 uint8_t *formatConvBuffer, uint32_t *pal)
2055 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
2056 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2057 src1= formatConvBuffer;
2061 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2062 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
2063 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2064 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2065 src1 = formatConvBuffer;
2069 if (!c->hcscale_fast) {
2070 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2071 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2072 } else { // fast bilinear upscale / crap downscale
2073 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2076 if (c->chrConvertRange)
2077 c->chrConvertRange(dst1, dst2, dstWidth);
2079 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2080 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2081 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2085 static av_always_inline void
2086 find_c_packed_planar_out_funcs(SwsContext *c,
2087 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2088 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2089 yuv2packedX_fn *yuv2packedX)
2091 enum PixelFormat dstFormat = c->dstFormat;
2093 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2094 *yuv2yuvX = yuv2nv12X_c;
2095 } else if (is16BPS(dstFormat)) {
2096 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2097 } else if (is9_OR_10BPS(dstFormat)) {
2098 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2099 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2101 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2104 *yuv2yuv1 = yuv2yuv1_c;
2105 *yuv2yuvX = yuv2yuvX_c;
2107 if(c->flags & SWS_FULL_CHR_H_INT) {
2108 switch (dstFormat) {
2111 *yuv2packedX = yuv2rgba32_full_X_c;
2113 #if CONFIG_SWSCALE_ALPHA
2115 *yuv2packedX = yuv2rgba32_full_X_c;
2117 #endif /* CONFIG_SWSCALE_ALPHA */
2119 *yuv2packedX = yuv2rgbx32_full_X_c;
2121 #endif /* !CONFIG_SMALL */
2125 *yuv2packedX = yuv2argb32_full_X_c;
2127 #if CONFIG_SWSCALE_ALPHA
2129 *yuv2packedX = yuv2argb32_full_X_c;
2131 #endif /* CONFIG_SWSCALE_ALPHA */
2133 *yuv2packedX = yuv2xrgb32_full_X_c;
2135 #endif /* !CONFIG_SMALL */
2139 *yuv2packedX = yuv2bgra32_full_X_c;
2141 #if CONFIG_SWSCALE_ALPHA
2143 *yuv2packedX = yuv2bgra32_full_X_c;
2145 #endif /* CONFIG_SWSCALE_ALPHA */
2147 *yuv2packedX = yuv2bgrx32_full_X_c;
2149 #endif /* !CONFIG_SMALL */
2153 *yuv2packedX = yuv2abgr32_full_X_c;
2155 #if CONFIG_SWSCALE_ALPHA
2157 *yuv2packedX = yuv2abgr32_full_X_c;
2159 #endif /* CONFIG_SWSCALE_ALPHA */
2161 *yuv2packedX = yuv2xbgr32_full_X_c;
2163 #endif /* !CONFIG_SMALL */
2166 *yuv2packedX = yuv2rgb24_full_X_c;
2169 *yuv2packedX = yuv2bgr24_full_X_c;
2173 switch (dstFormat) {
2174 case PIX_FMT_GRAY16BE:
2175 *yuv2packed1 = yuv2gray16BE_1_c;
2176 *yuv2packed2 = yuv2gray16BE_2_c;
2177 *yuv2packedX = yuv2gray16BE_X_c;
2179 case PIX_FMT_GRAY16LE:
2180 *yuv2packed1 = yuv2gray16LE_1_c;
2181 *yuv2packed2 = yuv2gray16LE_2_c;
2182 *yuv2packedX = yuv2gray16LE_X_c;
2184 case PIX_FMT_MONOWHITE:
2185 *yuv2packed1 = yuv2monowhite_1_c;
2186 *yuv2packed2 = yuv2monowhite_2_c;
2187 *yuv2packedX = yuv2monowhite_X_c;
2189 case PIX_FMT_MONOBLACK:
2190 *yuv2packed1 = yuv2monoblack_1_c;
2191 *yuv2packed2 = yuv2monoblack_2_c;
2192 *yuv2packedX = yuv2monoblack_X_c;
2194 case PIX_FMT_YUYV422:
2195 *yuv2packed1 = yuv2yuyv422_1_c;
2196 *yuv2packed2 = yuv2yuyv422_2_c;
2197 *yuv2packedX = yuv2yuyv422_X_c;
2199 case PIX_FMT_UYVY422:
2200 *yuv2packed1 = yuv2uyvy422_1_c;
2201 *yuv2packed2 = yuv2uyvy422_2_c;
2202 *yuv2packedX = yuv2uyvy422_X_c;
2204 case PIX_FMT_RGB48LE:
2205 *yuv2packed1 = yuv2rgb48le_1_c;
2206 *yuv2packed2 = yuv2rgb48le_2_c;
2207 *yuv2packedX = yuv2rgb48le_X_c;
2209 case PIX_FMT_RGB48BE:
2210 *yuv2packed1 = yuv2rgb48be_1_c;
2211 *yuv2packed2 = yuv2rgb48be_2_c;
2212 *yuv2packedX = yuv2rgb48be_X_c;
2214 case PIX_FMT_BGR48LE:
2215 *yuv2packed1 = yuv2bgr48le_1_c;
2216 *yuv2packed2 = yuv2bgr48le_2_c;
2217 *yuv2packedX = yuv2bgr48le_X_c;
2219 case PIX_FMT_BGR48BE:
2220 *yuv2packed1 = yuv2bgr48be_1_c;
2221 *yuv2packed2 = yuv2bgr48be_2_c;
2222 *yuv2packedX = yuv2bgr48be_X_c;
2227 *yuv2packed1 = yuv2rgb32_1_c;
2228 *yuv2packed2 = yuv2rgb32_2_c;
2229 *yuv2packedX = yuv2rgb32_X_c;
2231 #if CONFIG_SWSCALE_ALPHA
2233 *yuv2packed1 = yuv2rgba32_1_c;
2234 *yuv2packed2 = yuv2rgba32_2_c;
2235 *yuv2packedX = yuv2rgba32_X_c;
2237 #endif /* CONFIG_SWSCALE_ALPHA */
2239 *yuv2packed1 = yuv2rgbx32_1_c;
2240 *yuv2packed2 = yuv2rgbx32_2_c;
2241 *yuv2packedX = yuv2rgbx32_X_c;
2243 #endif /* !CONFIG_SMALL */
2245 case PIX_FMT_RGB32_1:
2246 case PIX_FMT_BGR32_1:
2248 *yuv2packed1 = yuv2rgb32_1_1_c;
2249 *yuv2packed2 = yuv2rgb32_1_2_c;
2250 *yuv2packedX = yuv2rgb32_1_X_c;
2252 #if CONFIG_SWSCALE_ALPHA
2254 *yuv2packed1 = yuv2rgba32_1_1_c;
2255 *yuv2packed2 = yuv2rgba32_1_2_c;
2256 *yuv2packedX = yuv2rgba32_1_X_c;
2258 #endif /* CONFIG_SWSCALE_ALPHA */
2260 *yuv2packed1 = yuv2rgbx32_1_1_c;
2261 *yuv2packed2 = yuv2rgbx32_1_2_c;
2262 *yuv2packedX = yuv2rgbx32_1_X_c;
2264 #endif /* !CONFIG_SMALL */
2267 *yuv2packed1 = yuv2rgb24_1_c;
2268 *yuv2packed2 = yuv2rgb24_2_c;
2269 *yuv2packedX = yuv2rgb24_X_c;
2272 *yuv2packed1 = yuv2bgr24_1_c;
2273 *yuv2packed2 = yuv2bgr24_2_c;
2274 *yuv2packedX = yuv2bgr24_X_c;
2276 case PIX_FMT_RGB565LE:
2277 case PIX_FMT_RGB565BE:
2278 case PIX_FMT_BGR565LE:
2279 case PIX_FMT_BGR565BE:
2280 *yuv2packed1 = yuv2rgb16_1_c;
2281 *yuv2packed2 = yuv2rgb16_2_c;
2282 *yuv2packedX = yuv2rgb16_X_c;
2284 case PIX_FMT_RGB555LE:
2285 case PIX_FMT_RGB555BE:
2286 case PIX_FMT_BGR555LE:
2287 case PIX_FMT_BGR555BE:
2288 *yuv2packed1 = yuv2rgb15_1_c;
2289 *yuv2packed2 = yuv2rgb15_2_c;
2290 *yuv2packedX = yuv2rgb15_X_c;
2292 case PIX_FMT_RGB444LE:
2293 case PIX_FMT_RGB444BE:
2294 case PIX_FMT_BGR444LE:
2295 case PIX_FMT_BGR444BE:
2296 *yuv2packed1 = yuv2rgb12_1_c;
2297 *yuv2packed2 = yuv2rgb12_2_c;
2298 *yuv2packedX = yuv2rgb12_X_c;
2302 *yuv2packed1 = yuv2rgb8_1_c;
2303 *yuv2packed2 = yuv2rgb8_2_c;
2304 *yuv2packedX = yuv2rgb8_X_c;
2308 *yuv2packed1 = yuv2rgb4_1_c;
2309 *yuv2packed2 = yuv2rgb4_2_c;
2310 *yuv2packedX = yuv2rgb4_X_c;
2312 case PIX_FMT_RGB4_BYTE:
2313 case PIX_FMT_BGR4_BYTE:
2314 *yuv2packed1 = yuv2rgb4b_1_c;
2315 *yuv2packed2 = yuv2rgb4b_2_c;
2316 *yuv2packedX = yuv2rgb4b_X_c;
2322 #define DEBUG_SWSCALE_BUFFERS 0
2323 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2325 static int swScale(SwsContext *c, const uint8_t* src[],
2326 int srcStride[], int srcSliceY,
2327 int srcSliceH, uint8_t* dst[], int dstStride[])
2329 /* load a few things into local vars to make the code more readable? and faster */
2330 const int srcW= c->srcW;
2331 const int dstW= c->dstW;
2332 const int dstH= c->dstH;
2333 const int chrDstW= c->chrDstW;
2334 const int chrSrcW= c->chrSrcW;
2335 const int lumXInc= c->lumXInc;
2336 const int chrXInc= c->chrXInc;
2337 const enum PixelFormat dstFormat= c->dstFormat;
2338 const int flags= c->flags;
2339 int16_t *vLumFilterPos= c->vLumFilterPos;
2340 int16_t *vChrFilterPos= c->vChrFilterPos;
2341 int16_t *hLumFilterPos= c->hLumFilterPos;
2342 int16_t *hChrFilterPos= c->hChrFilterPos;
2343 int16_t *vLumFilter= c->vLumFilter;
2344 int16_t *vChrFilter= c->vChrFilter;
2345 int16_t *hLumFilter= c->hLumFilter;
2346 int16_t *hChrFilter= c->hChrFilter;
2347 int32_t *lumMmxFilter= c->lumMmxFilter;
2348 int32_t *chrMmxFilter= c->chrMmxFilter;
2349 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2350 const int vLumFilterSize= c->vLumFilterSize;
2351 const int vChrFilterSize= c->vChrFilterSize;
2352 const int hLumFilterSize= c->hLumFilterSize;
2353 const int hChrFilterSize= c->hChrFilterSize;
2354 int16_t **lumPixBuf= c->lumPixBuf;
2355 int16_t **chrUPixBuf= c->chrUPixBuf;
2356 int16_t **chrVPixBuf= c->chrVPixBuf;
2357 int16_t **alpPixBuf= c->alpPixBuf;
2358 const int vLumBufSize= c->vLumBufSize;
2359 const int vChrBufSize= c->vChrBufSize;
2360 uint8_t *formatConvBuffer= c->formatConvBuffer;
2361 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2362 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2364 uint32_t *pal=c->pal_yuv;
2365 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2366 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2367 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2368 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2369 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2370 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2372 /* vars which will change and which we need to store back in the context */
2374 int lumBufIndex= c->lumBufIndex;
2375 int chrBufIndex= c->chrBufIndex;
2376 int lastInLumBuf= c->lastInLumBuf;
2377 int lastInChrBuf= c->lastInChrBuf;
2379 if (isPacked(c->srcFormat)) {
2387 srcStride[3]= srcStride[0];
2389 srcStride[1]<<= c->vChrDrop;
2390 srcStride[2]<<= c->vChrDrop;
2392 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2393 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2394 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2395 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2396 srcSliceY, srcSliceH, dstY, dstH);
2397 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2398 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2400 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2401 static int warnedAlready=0; //FIXME move this into the context perhaps
2402 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2403 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2404 " ->cannot do aligned memory accesses anymore\n");
2409 /* Note the user might start scaling the picture in the middle so this
2410 will not get executed. This is not really intended but works
2411 currently, so people might do it. */
2412 if (srcSliceY ==0) {
2420 if (!should_dither) {
2421 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2425 for (;dstY < dstH; dstY++) {
2426 const int chrDstY= dstY>>c->chrDstVSubSample;
2427 uint8_t *dest[4] = {
2428 dst[0] + dstStride[0] * dstY,
2429 dst[1] + dstStride[1] * chrDstY,
2430 dst[2] + dstStride[2] * chrDstY,
2431 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2434 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2435 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2436 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2437 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2438 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2439 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2442 //handle holes (FAST_BILINEAR & weird filters)
2443 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2444 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2445 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2446 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2448 DEBUG_BUFFERS("dstY: %d\n", dstY);
2449 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2450 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2451 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2452 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2454 // Do we have enough lines in this slice to output the dstY line
2455 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2457 if (!enough_lines) {
2458 lastLumSrcY = srcSliceY + srcSliceH - 1;
2459 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2460 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2461 lastLumSrcY, lastChrSrcY);
2464 //Do horizontal scaling
2465 while(lastInLumBuf < lastLumSrcY) {
2466 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2467 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2469 assert(lumBufIndex < 2*vLumBufSize);
2470 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2471 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2472 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2473 hLumFilter, hLumFilterPos, hLumFilterSize,
2476 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2477 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2478 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2482 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2483 lumBufIndex, lastInLumBuf);
2485 while(lastInChrBuf < lastChrSrcY) {
2486 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2487 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2489 assert(chrBufIndex < 2*vChrBufSize);
2490 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2491 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2492 //FIXME replace parameters through context struct (some at least)
2494 if (c->needs_hcscale)
2495 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2496 chrDstW, src1, src2, chrSrcW, chrXInc,
2497 hChrFilter, hChrFilterPos, hChrFilterSize,
2498 formatConvBuffer, pal);
2500 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2501 chrBufIndex, lastInChrBuf);
2503 //wrap buf index around to stay inside the ring buffer
2504 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2505 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2507 break; //we can't output a dstY line so let's try with the next slice
2510 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2512 if (should_dither) {
2513 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2514 c->lumDither8 = dither_8x8_128[dstY & 7];
2516 if (dstY >= dstH-2) {
2517 // hmm looks like we can't use MMX here without overwriting this array's tail
2518 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2519 &yuv2packed1, &yuv2packed2,
2524 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2525 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2526 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2527 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2528 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2529 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2530 if ((dstY&chrSkipMask) || isGray(dstFormat))
2531 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2532 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2533 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2534 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2535 dest, dstW, chrDstW);
2536 } else { //General YV12
2537 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2538 lumSrcPtr, vLumFilterSize,
2539 vChrFilter + chrDstY * vChrFilterSize,
2540 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2541 alpSrcPtr, dest, dstW, chrDstW);
2544 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2545 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2546 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2547 int chrAlpha = vChrFilter[2 * dstY + 1];
2548 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2549 alpPixBuf ? *alpSrcPtr : NULL,
2550 dest[0], dstW, chrAlpha, dstY);
2551 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2552 int lumAlpha = vLumFilter[2 * dstY + 1];
2553 int chrAlpha = vChrFilter[2 * dstY + 1];
2555 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2557 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2558 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2559 alpPixBuf ? alpSrcPtr : NULL,
2560 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2561 } else { //general RGB
2562 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2563 lumSrcPtr, vLumFilterSize,
2564 vChrFilter + dstY * vChrFilterSize,
2565 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2566 alpSrcPtr, dest[0], dstW, dstY);
2572 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2573 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2576 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2577 __asm__ volatile("sfence":::"memory");
2581 /* store changed local vars back in the context */
2583 c->lumBufIndex= lumBufIndex;
2584 c->chrBufIndex= chrBufIndex;
2585 c->lastInLumBuf= lastInLumBuf;
2586 c->lastInChrBuf= lastInChrBuf;
2588 return dstY - lastDstY;
2591 static av_cold void sws_init_swScale_c(SwsContext *c)
2593 enum PixelFormat srcFormat = c->srcFormat;
2595 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2596 &c->yuv2packed1, &c->yuv2packed2,
2599 c->chrToYV12 = NULL;
2601 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2602 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2603 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2604 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2608 case PIX_FMT_BGR4_BYTE:
2609 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2611 case PIX_FMT_YUV444P9LE:
2612 case PIX_FMT_YUV420P9LE:
2613 case PIX_FMT_YUV422P10LE:
2614 case PIX_FMT_YUV444P10LE:
2615 case PIX_FMT_YUV420P10LE:
2616 case PIX_FMT_YUV420P16LE:
2617 case PIX_FMT_YUV422P16LE:
2618 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2620 case PIX_FMT_YUV444P9BE:
2621 case PIX_FMT_YUV420P9BE:
2622 case PIX_FMT_YUV444P10BE:
2623 case PIX_FMT_YUV422P10BE:
2624 case PIX_FMT_YUV420P10BE:
2625 case PIX_FMT_YUV420P16BE:
2626 case PIX_FMT_YUV422P16BE:
2627 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2630 if (c->chrSrcHSubSample) {
2632 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2633 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2634 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2635 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2636 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2637 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2638 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2639 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2640 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2641 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2642 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2643 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2644 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2645 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2646 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2647 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2648 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2649 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2653 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2654 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2655 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2656 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2657 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2658 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2659 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2660 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2661 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2662 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2663 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2664 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2665 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2666 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2667 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2668 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2669 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2670 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2674 c->lumToYV12 = NULL;
2675 c->alpToYV12 = NULL;
2676 switch (srcFormat) {
2678 case PIX_FMT_YUV444P9LE:
2679 case PIX_FMT_YUV420P9LE:
2680 case PIX_FMT_YUV444P10LE:
2681 case PIX_FMT_YUV422P10LE:
2682 case PIX_FMT_YUV420P10LE:
2683 case PIX_FMT_YUV420P16LE:
2684 case PIX_FMT_YUV422P16LE:
2685 case PIX_FMT_YUV444P16LE:
2686 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2688 case PIX_FMT_YUV444P9BE:
2689 case PIX_FMT_YUV420P9BE:
2690 case PIX_FMT_YUV444P10BE:
2691 case PIX_FMT_YUV422P10BE:
2692 case PIX_FMT_YUV420P10BE:
2693 case PIX_FMT_YUV420P16BE:
2694 case PIX_FMT_YUV422P16BE:
2695 case PIX_FMT_YUV444P16BE:
2696 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2698 case PIX_FMT_YUYV422 :
2699 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2700 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2701 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2702 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2703 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2704 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2705 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2706 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2707 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2708 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2709 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2710 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2714 case PIX_FMT_BGR4_BYTE:
2715 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2716 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2717 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2718 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2719 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2720 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2721 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2722 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2723 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2724 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2725 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2728 switch (srcFormat) {
2730 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2732 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2733 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2737 if (c->scalingBpp == 8) {
2738 c->hScale = hScale_c;
2739 if (c->flags & SWS_FAST_BILINEAR) {
2740 c->hyscale_fast = hyscale_fast_c;
2741 c->hcscale_fast = hcscale_fast_c;
2744 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2746 c->lumConvertRange = lumRangeFromJpeg_c;
2747 c->chrConvertRange = chrRangeFromJpeg_c;
2749 c->lumConvertRange = lumRangeToJpeg_c;
2750 c->chrConvertRange = chrRangeToJpeg_c;
2754 c->hScale = hScale16_c;
2755 c->scale19To15Fw = scale19To15Fw_c;
2756 c->scale8To16Rv = scale8To16Rv_c;
2758 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2760 c->lumConvertRange = lumRangeFromJpeg16_c;
2761 c->chrConvertRange = chrRangeFromJpeg16_c;
2763 c->lumConvertRange = lumRangeToJpeg16_c;
2764 c->chrConvertRange = chrRangeToJpeg16_c;
2769 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2770 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2771 c->needs_hcscale = 1;
2774 SwsFunc ff_getSwsFunc(SwsContext *c)
2776 sws_init_swScale_c(c);
2779 ff_sws_init_swScale_mmx(c);
2781 ff_sws_init_swScale_altivec(c);