2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 static av_always_inline void
199 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
200 int lumFilterSize, const int16_t *chrFilter,
201 const int32_t **chrUSrc, const int32_t **chrVSrc,
202 int chrFilterSize, const int32_t **alpSrc,
203 uint16_t *dest[4], int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
209 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
210 int shift = 15 + 16 - output_bits - 1;
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uint16(val >> shift)); \
216 AV_WL16(pos, av_clip_uint16(val >> shift)); \
218 for (i = 0; i < dstW; i++) {
219 int val = 1 << (30-output_bits - 1);
222 for (j = 0; j < lumFilterSize; j++)
223 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
225 output_pixel(&yDest[i], val);
229 for (i = 0; i < chrDstW; i++) {
230 int u = 1 << (30-output_bits - 1);
231 int v = 1 << (30-output_bits - 1);
234 for (j = 0; j < chrFilterSize; j++) {
235 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
236 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
239 output_pixel(&uDest[i], u);
240 output_pixel(&vDest[i], v);
244 if (CONFIG_SWSCALE_ALPHA && aDest) {
245 for (i = 0; i < dstW; i++) {
246 int val = 1 << (30-output_bits - 1);
249 for (j = 0; j < lumFilterSize; j++)
250 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
252 output_pixel(&aDest[i], val);
258 static av_always_inline void
259 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
260 int lumFilterSize, const int16_t *chrFilter,
261 const int16_t **chrUSrc, const int16_t **chrVSrc,
262 int chrFilterSize, const int16_t **alpSrc,
263 uint16_t *dest[4], int dstW, int chrDstW,
264 int big_endian, int output_bits)
266 //FIXME Optimize (just quickly written not optimized..)
268 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
269 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
270 int shift = 11 + 16 - output_bits;
272 #define output_pixel(pos, val) \
274 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
276 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
278 for (i = 0; i < dstW; i++) {
279 int val = 1 << (26-output_bits);
282 for (j = 0; j < lumFilterSize; j++)
283 val += lumSrc[j][i] * lumFilter[j];
285 output_pixel(&yDest[i], val);
289 for (i = 0; i < chrDstW; i++) {
290 int u = 1 << (26-output_bits);
291 int v = 1 << (26-output_bits);
294 for (j = 0; j < chrFilterSize; j++) {
295 u += chrUSrc[j][i] * chrFilter[j];
296 v += chrVSrc[j][i] * chrFilter[j];
299 output_pixel(&uDest[i], u);
300 output_pixel(&vDest[i], v);
304 if (CONFIG_SWSCALE_ALPHA && aDest) {
305 for (i = 0; i < dstW; i++) {
306 int val = 1 << (26-output_bits);
309 for (j = 0; j < lumFilterSize; j++)
310 val += alpSrc[j][i] * lumFilter[j];
312 output_pixel(&aDest[i], val);
318 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
319 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
320 const int16_t **_lumSrc, int lumFilterSize, \
321 const int16_t *chrFilter, const int16_t **_chrUSrc, \
322 const int16_t **_chrVSrc, \
323 int chrFilterSize, const int16_t **_alpSrc, \
324 uint8_t *_dest[4], int dstW, int chrDstW) \
326 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
327 **chrUSrc = (const typeX_t **) _chrUSrc, \
328 **chrVSrc = (const typeX_t **) _chrVSrc, \
329 **alpSrc = (const typeX_t **) _alpSrc; \
330 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
331 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
332 alpSrc, (uint16_t **) _dest, \
333 dstW, chrDstW, is_be, bits); \
335 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
336 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
337 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
338 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
339 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
340 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
342 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
343 const int16_t **lumSrc, int lumFilterSize,
344 const int16_t *chrFilter, const int16_t **chrUSrc,
345 const int16_t **chrVSrc,
346 int chrFilterSize, const int16_t **alpSrc,
347 uint8_t *dest[4], int dstW, int chrDstW)
349 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
350 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
352 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
354 //FIXME Optimize (just quickly written not optimized..)
355 for (i=0; i<dstW; i++) {
356 int val = lumDither[i & 7] << 12;
358 for (j=0; j<lumFilterSize; j++)
359 val += lumSrc[j][i] * lumFilter[j];
361 yDest[i]= av_clip_uint8(val>>19);
365 for (i=0; i<chrDstW; i++) {
366 int u = chrDither[i & 7] << 12;
367 int v = chrDither[(i + 3) & 7] << 12;
369 for (j=0; j<chrFilterSize; j++) {
370 u += chrUSrc[j][i] * chrFilter[j];
371 v += chrVSrc[j][i] * chrFilter[j];
374 uDest[i]= av_clip_uint8(u>>19);
375 vDest[i]= av_clip_uint8(v>>19);
378 if (CONFIG_SWSCALE_ALPHA && aDest)
379 for (i=0; i<dstW; i++) {
380 int val = lumDither[i & 7] << 12;
382 for (j=0; j<lumFilterSize; j++)
383 val += alpSrc[j][i] * lumFilter[j];
385 aDest[i]= av_clip_uint8(val>>19);
389 static void yuv2yuv1_c(const int16_t *src, uint8_t *dest, int dstW,
390 const uint8_t *dither, int offset)
393 for (i=0; i<dstW; i++) {
394 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
395 dest[i]= av_clip_uint8(val);
399 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
400 const int16_t **lumSrc, int lumFilterSize,
401 const int16_t *chrFilter, const int16_t **chrUSrc,
402 const int16_t **chrVSrc, int chrFilterSize,
403 const int16_t **alpSrc, uint8_t *dest[4],
404 int dstW, int chrDstW)
406 uint8_t *yDest = dest[0], *uDest = dest[1];
407 enum PixelFormat dstFormat = c->dstFormat;
408 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
410 //FIXME Optimize (just quickly written not optimized..)
412 for (i=0; i<dstW; i++) {
413 int val = lumDither[i & 7] << 12;
415 for (j=0; j<lumFilterSize; j++)
416 val += lumSrc[j][i] * lumFilter[j];
418 yDest[i]= av_clip_uint8(val>>19);
424 if (dstFormat == PIX_FMT_NV12)
425 for (i=0; i<chrDstW; i++) {
426 int u = chrDither[i & 7] << 12;
427 int v = chrDither[(i + 3) & 7] << 12;
429 for (j=0; j<chrFilterSize; j++) {
430 u += chrUSrc[j][i] * chrFilter[j];
431 v += chrVSrc[j][i] * chrFilter[j];
434 uDest[2*i]= av_clip_uint8(u>>19);
435 uDest[2*i+1]= av_clip_uint8(v>>19);
438 for (i=0; i<chrDstW; i++) {
439 int u = chrDither[i & 7] << 12;
440 int v = chrDither[(i + 3) & 7] << 12;
442 for (j=0; j<chrFilterSize; j++) {
443 u += chrUSrc[j][i] * chrFilter[j];
444 v += chrVSrc[j][i] * chrFilter[j];
447 uDest[2*i]= av_clip_uint8(v>>19);
448 uDest[2*i+1]= av_clip_uint8(u>>19);
452 #define output_pixel(pos, val) \
453 if (target == PIX_FMT_GRAY16BE) { \
459 static av_always_inline void
460 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
461 const int32_t **lumSrc, int lumFilterSize,
462 const int16_t *chrFilter, const int32_t **chrUSrc,
463 const int32_t **chrVSrc, int chrFilterSize,
464 const int32_t **alpSrc, uint16_t *dest, int dstW,
465 int y, enum PixelFormat target)
469 for (i = 0; i < (dstW >> 1); i++) {
474 for (j = 0; j < lumFilterSize; j++) {
475 Y1 += lumSrc[j][i * 2] * lumFilter[j];
476 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
480 if ((Y1 | Y2) & 0x10000) {
481 Y1 = av_clip_uint16(Y1);
482 Y2 = av_clip_uint16(Y2);
484 output_pixel(&dest[i * 2 + 0], Y1);
485 output_pixel(&dest[i * 2 + 1], Y2);
489 static av_always_inline void
490 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
491 const int32_t *ubuf[2], const int32_t *vbuf[2],
492 const int32_t *abuf[2], uint16_t *dest, int dstW,
493 int yalpha, int uvalpha, int y,
494 enum PixelFormat target)
496 int yalpha1 = 4095 - yalpha;
498 const int32_t *buf0 = buf[0], *buf1 = buf[1];
500 for (i = 0; i < (dstW >> 1); i++) {
501 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
502 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
504 output_pixel(&dest[i * 2 + 0], Y1);
505 output_pixel(&dest[i * 2 + 1], Y2);
509 static av_always_inline void
510 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
511 const int32_t *ubuf[2], const int32_t *vbuf[2],
512 const int32_t *abuf0, uint16_t *dest, int dstW,
513 int uvalpha, int y, enum PixelFormat target)
517 for (i = 0; i < (dstW >> 1); i++) {
518 int Y1 = buf0[i * 2 ] << 1;
519 int Y2 = buf0[i * 2 + 1] << 1;
521 output_pixel(&dest[i * 2 + 0], Y1);
522 output_pixel(&dest[i * 2 + 1], Y2);
528 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
529 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
530 const int16_t **_lumSrc, int lumFilterSize, \
531 const int16_t *chrFilter, const int16_t **_chrUSrc, \
532 const int16_t **_chrVSrc, int chrFilterSize, \
533 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
536 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
537 **chrUSrc = (const int32_t **) _chrUSrc, \
538 **chrVSrc = (const int32_t **) _chrVSrc, \
539 **alpSrc = (const int32_t **) _alpSrc; \
540 uint16_t *dest = (uint16_t *) _dest; \
541 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
542 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
543 alpSrc, dest, dstW, y, fmt); \
546 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
547 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
548 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
549 int yalpha, int uvalpha, int y) \
551 const int32_t **buf = (const int32_t **) _buf, \
552 **ubuf = (const int32_t **) _ubuf, \
553 **vbuf = (const int32_t **) _vbuf, \
554 **abuf = (const int32_t **) _abuf; \
555 uint16_t *dest = (uint16_t *) _dest; \
556 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
557 dest, dstW, yalpha, uvalpha, y, fmt); \
560 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
561 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
562 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
563 int uvalpha, int y) \
565 const int32_t *buf0 = (const int32_t *) _buf0, \
566 **ubuf = (const int32_t **) _ubuf, \
567 **vbuf = (const int32_t **) _vbuf, \
568 *abuf0 = (const int32_t *) _abuf0; \
569 uint16_t *dest = (uint16_t *) _dest; \
570 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
571 dstW, uvalpha, y, fmt); \
574 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
575 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
577 #define output_pixel(pos, acc) \
578 if (target == PIX_FMT_MONOBLACK) { \
584 static av_always_inline void
585 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
586 const int16_t **lumSrc, int lumFilterSize,
587 const int16_t *chrFilter, const int16_t **chrUSrc,
588 const int16_t **chrVSrc, int chrFilterSize,
589 const int16_t **alpSrc, uint8_t *dest, int dstW,
590 int y, enum PixelFormat target)
592 const uint8_t * const d128=dither_8x8_220[y&7];
593 uint8_t *g = c->table_gU[128] + c->table_gV[128];
597 for (i = 0; i < dstW - 1; i += 2) {
602 for (j = 0; j < lumFilterSize; j++) {
603 Y1 += lumSrc[j][i] * lumFilter[j];
604 Y2 += lumSrc[j][i+1] * lumFilter[j];
608 if ((Y1 | Y2) & 0x100) {
609 Y1 = av_clip_uint8(Y1);
610 Y2 = av_clip_uint8(Y2);
612 acc += acc + g[Y1 + d128[(i + 0) & 7]];
613 acc += acc + g[Y2 + d128[(i + 1) & 7]];
615 output_pixel(*dest++, acc);
620 static av_always_inline void
621 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
622 const int16_t *ubuf[2], const int16_t *vbuf[2],
623 const int16_t *abuf[2], uint8_t *dest, int dstW,
624 int yalpha, int uvalpha, int y,
625 enum PixelFormat target)
627 const int16_t *buf0 = buf[0], *buf1 = buf[1];
628 const uint8_t * const d128 = dither_8x8_220[y & 7];
629 uint8_t *g = c->table_gU[128] + c->table_gV[128];
630 int yalpha1 = 4095 - yalpha;
633 for (i = 0; i < dstW - 7; i += 8) {
634 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
635 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
636 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
637 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
638 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
639 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
640 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
641 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
642 output_pixel(*dest++, acc);
646 static av_always_inline void
647 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
648 const int16_t *ubuf[2], const int16_t *vbuf[2],
649 const int16_t *abuf0, uint8_t *dest, int dstW,
650 int uvalpha, int y, enum PixelFormat target)
652 const uint8_t * const d128 = dither_8x8_220[y & 7];
653 uint8_t *g = c->table_gU[128] + c->table_gV[128];
656 for (i = 0; i < dstW - 7; i += 8) {
657 int acc = g[(buf0[i ] >> 7) + d128[0]];
658 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
659 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
660 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
661 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
662 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
663 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
664 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
665 output_pixel(*dest++, acc);
671 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
672 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
673 const int16_t **lumSrc, int lumFilterSize, \
674 const int16_t *chrFilter, const int16_t **chrUSrc, \
675 const int16_t **chrVSrc, int chrFilterSize, \
676 const int16_t **alpSrc, uint8_t *dest, int dstW, \
679 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
680 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
681 alpSrc, dest, dstW, y, fmt); \
684 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
685 const int16_t *ubuf[2], const int16_t *vbuf[2], \
686 const int16_t *abuf[2], uint8_t *dest, int dstW, \
687 int yalpha, int uvalpha, int y) \
689 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
690 dest, dstW, yalpha, uvalpha, y, fmt); \
693 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
694 const int16_t *ubuf[2], const int16_t *vbuf[2], \
695 const int16_t *abuf0, uint8_t *dest, int dstW, \
696 int uvalpha, int y) \
698 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
699 abuf0, dest, dstW, uvalpha, \
703 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
704 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
706 #define output_pixels(pos, Y1, U, Y2, V) \
707 if (target == PIX_FMT_YUYV422) { \
708 dest[pos + 0] = Y1; \
710 dest[pos + 2] = Y2; \
714 dest[pos + 1] = Y1; \
716 dest[pos + 3] = Y2; \
719 static av_always_inline void
720 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
721 const int16_t **lumSrc, int lumFilterSize,
722 const int16_t *chrFilter, const int16_t **chrUSrc,
723 const int16_t **chrVSrc, int chrFilterSize,
724 const int16_t **alpSrc, uint8_t *dest, int dstW,
725 int y, enum PixelFormat target)
729 for (i = 0; i < (dstW >> 1); i++) {
736 for (j = 0; j < lumFilterSize; j++) {
737 Y1 += lumSrc[j][i * 2] * lumFilter[j];
738 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
740 for (j = 0; j < chrFilterSize; j++) {
741 U += chrUSrc[j][i] * chrFilter[j];
742 V += chrVSrc[j][i] * chrFilter[j];
748 if ((Y1 | Y2 | U | V) & 0x100) {
749 Y1 = av_clip_uint8(Y1);
750 Y2 = av_clip_uint8(Y2);
751 U = av_clip_uint8(U);
752 V = av_clip_uint8(V);
754 output_pixels(4*i, Y1, U, Y2, V);
758 static av_always_inline void
759 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
760 const int16_t *ubuf[2], const int16_t *vbuf[2],
761 const int16_t *abuf[2], uint8_t *dest, int dstW,
762 int yalpha, int uvalpha, int y,
763 enum PixelFormat target)
765 const int16_t *buf0 = buf[0], *buf1 = buf[1],
766 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
767 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
768 int yalpha1 = 4095 - yalpha;
769 int uvalpha1 = 4095 - uvalpha;
772 for (i = 0; i < (dstW >> 1); i++) {
773 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
774 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
775 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
776 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
778 output_pixels(i * 4, Y1, U, Y2, V);
782 static av_always_inline void
783 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
784 const int16_t *ubuf[2], const int16_t *vbuf[2],
785 const int16_t *abuf0, uint8_t *dest, int dstW,
786 int uvalpha, int y, enum PixelFormat target)
788 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
789 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
792 if (uvalpha < 2048) {
793 for (i = 0; i < (dstW >> 1); i++) {
794 int Y1 = buf0[i * 2] >> 7;
795 int Y2 = buf0[i * 2 + 1] >> 7;
796 int U = ubuf1[i] >> 7;
797 int V = vbuf1[i] >> 7;
799 output_pixels(i * 4, Y1, U, Y2, V);
802 for (i = 0; i < (dstW >> 1); i++) {
803 int Y1 = buf0[i * 2] >> 7;
804 int Y2 = buf0[i * 2 + 1] >> 7;
805 int U = (ubuf0[i] + ubuf1[i]) >> 8;
806 int V = (vbuf0[i] + vbuf1[i]) >> 8;
808 output_pixels(i * 4, Y1, U, Y2, V);
815 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
816 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
818 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
819 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
820 #define output_pixel(pos, val) \
821 if (isBE(target)) { \
827 static av_always_inline void
828 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
829 const int32_t **lumSrc, int lumFilterSize,
830 const int16_t *chrFilter, const int32_t **chrUSrc,
831 const int32_t **chrVSrc, int chrFilterSize,
832 const int32_t **alpSrc, uint16_t *dest, int dstW,
833 int y, enum PixelFormat target)
837 for (i = 0; i < (dstW >> 1); i++) {
841 int U = -128 << 23; // 19
845 for (j = 0; j < lumFilterSize; j++) {
846 Y1 += lumSrc[j][i * 2] * lumFilter[j];
847 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
849 for (j = 0; j < chrFilterSize; j++) {
850 U += chrUSrc[j][i] * chrFilter[j];
851 V += chrVSrc[j][i] * chrFilter[j];
854 // 8bit: 12+15=27; 16-bit: 12+19=31
860 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
861 Y1 -= c->yuv2rgb_y_offset;
862 Y2 -= c->yuv2rgb_y_offset;
863 Y1 *= c->yuv2rgb_y_coeff;
864 Y2 *= c->yuv2rgb_y_coeff;
867 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
869 R = V * c->yuv2rgb_v2r_coeff;
870 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
871 B = U * c->yuv2rgb_u2b_coeff;
873 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
874 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
875 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
876 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
877 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
878 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
879 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
884 static av_always_inline void
885 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
886 const int32_t *ubuf[2], const int32_t *vbuf[2],
887 const int32_t *abuf[2], uint16_t *dest, int dstW,
888 int yalpha, int uvalpha, int y,
889 enum PixelFormat target)
891 const int32_t *buf0 = buf[0], *buf1 = buf[1],
892 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
893 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
894 int yalpha1 = 4095 - yalpha;
895 int uvalpha1 = 4095 - uvalpha;
898 for (i = 0; i < (dstW >> 1); i++) {
899 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
900 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
901 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
902 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
905 Y1 -= c->yuv2rgb_y_offset;
906 Y2 -= c->yuv2rgb_y_offset;
907 Y1 *= c->yuv2rgb_y_coeff;
908 Y2 *= c->yuv2rgb_y_coeff;
912 R = V * c->yuv2rgb_v2r_coeff;
913 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
914 B = U * c->yuv2rgb_u2b_coeff;
916 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
917 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
918 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
919 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
920 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
921 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
926 static av_always_inline void
927 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
928 const int32_t *ubuf[2], const int32_t *vbuf[2],
929 const int32_t *abuf0, uint16_t *dest, int dstW,
930 int uvalpha, int y, enum PixelFormat target)
932 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
933 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
936 if (uvalpha < 2048) {
937 for (i = 0; i < (dstW >> 1); i++) {
938 int Y1 = (buf0[i * 2] ) >> 2;
939 int Y2 = (buf0[i * 2 + 1]) >> 2;
940 int U = (ubuf0[i] + (-128 << 11)) >> 2;
941 int V = (vbuf0[i] + (-128 << 11)) >> 2;
944 Y1 -= c->yuv2rgb_y_offset;
945 Y2 -= c->yuv2rgb_y_offset;
946 Y1 *= c->yuv2rgb_y_coeff;
947 Y2 *= c->yuv2rgb_y_coeff;
951 R = V * c->yuv2rgb_v2r_coeff;
952 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
953 B = U * c->yuv2rgb_u2b_coeff;
955 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
956 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
957 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
958 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
959 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
960 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
964 for (i = 0; i < (dstW >> 1); i++) {
965 int Y1 = (buf0[i * 2] ) >> 2;
966 int Y2 = (buf0[i * 2 + 1]) >> 2;
967 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
968 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
971 Y1 -= c->yuv2rgb_y_offset;
972 Y2 -= c->yuv2rgb_y_offset;
973 Y1 *= c->yuv2rgb_y_coeff;
974 Y2 *= c->yuv2rgb_y_coeff;
978 R = V * c->yuv2rgb_v2r_coeff;
979 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
980 B = U * c->yuv2rgb_u2b_coeff;
982 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
983 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
984 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
985 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
986 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
987 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
997 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
998 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
999 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1000 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1002 static av_always_inline void
1003 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1004 int U, int V, int A1, int A2,
1005 const void *_r, const void *_g, const void *_b, int y,
1006 enum PixelFormat target, int hasAlpha)
1008 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1009 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1010 uint32_t *dest = (uint32_t *) _dest;
1011 const uint32_t *r = (const uint32_t *) _r;
1012 const uint32_t *g = (const uint32_t *) _g;
1013 const uint32_t *b = (const uint32_t *) _b;
1016 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1018 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1019 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1022 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1024 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1025 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1027 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1028 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1031 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1032 uint8_t *dest = (uint8_t *) _dest;
1033 const uint8_t *r = (const uint8_t *) _r;
1034 const uint8_t *g = (const uint8_t *) _g;
1035 const uint8_t *b = (const uint8_t *) _b;
1037 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1038 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1039 dest[i * 6 + 0] = r_b[Y1];
1040 dest[i * 6 + 1] = g[Y1];
1041 dest[i * 6 + 2] = b_r[Y1];
1042 dest[i * 6 + 3] = r_b[Y2];
1043 dest[i * 6 + 4] = g[Y2];
1044 dest[i * 6 + 5] = b_r[Y2];
1047 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1048 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1049 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1050 uint16_t *dest = (uint16_t *) _dest;
1051 const uint16_t *r = (const uint16_t *) _r;
1052 const uint16_t *g = (const uint16_t *) _g;
1053 const uint16_t *b = (const uint16_t *) _b;
1054 int dr1, dg1, db1, dr2, dg2, db2;
1056 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1057 dr1 = dither_2x2_8[ y & 1 ][0];
1058 dg1 = dither_2x2_4[ y & 1 ][0];
1059 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1060 dr2 = dither_2x2_8[ y & 1 ][1];
1061 dg2 = dither_2x2_4[ y & 1 ][1];
1062 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1063 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1064 dr1 = dither_2x2_8[ y & 1 ][0];
1065 dg1 = dither_2x2_8[ y & 1 ][1];
1066 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1067 dr2 = dither_2x2_8[ y & 1 ][1];
1068 dg2 = dither_2x2_8[ y & 1 ][0];
1069 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1071 dr1 = dither_4x4_16[ y & 3 ][0];
1072 dg1 = dither_4x4_16[ y & 3 ][1];
1073 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1074 dr2 = dither_4x4_16[ y & 3 ][1];
1075 dg2 = dither_4x4_16[ y & 3 ][0];
1076 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1079 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1080 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1081 } else /* 8/4-bit */ {
1082 uint8_t *dest = (uint8_t *) _dest;
1083 const uint8_t *r = (const uint8_t *) _r;
1084 const uint8_t *g = (const uint8_t *) _g;
1085 const uint8_t *b = (const uint8_t *) _b;
1086 int dr1, dg1, db1, dr2, dg2, db2;
1088 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1089 const uint8_t * const d64 = dither_8x8_73[y & 7];
1090 const uint8_t * const d32 = dither_8x8_32[y & 7];
1091 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1092 db1 = d64[(i * 2 + 0) & 7];
1093 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1094 db2 = d64[(i * 2 + 1) & 7];
1096 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1097 const uint8_t * const d128 = dither_8x8_220[y & 7];
1098 dr1 = db1 = d128[(i * 2 + 0) & 7];
1099 dg1 = d64[(i * 2 + 0) & 7];
1100 dr2 = db2 = d128[(i * 2 + 1) & 7];
1101 dg2 = d64[(i * 2 + 1) & 7];
1104 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1105 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1106 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1108 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1109 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1114 static av_always_inline void
1115 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1116 const int16_t **lumSrc, int lumFilterSize,
1117 const int16_t *chrFilter, const int16_t **chrUSrc,
1118 const int16_t **chrVSrc, int chrFilterSize,
1119 const int16_t **alpSrc, uint8_t *dest, int dstW,
1120 int y, enum PixelFormat target, int hasAlpha)
1124 for (i = 0; i < (dstW >> 1); i++) {
1130 int av_unused A1, A2;
1131 const void *r, *g, *b;
1133 for (j = 0; j < lumFilterSize; j++) {
1134 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1135 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1137 for (j = 0; j < chrFilterSize; j++) {
1138 U += chrUSrc[j][i] * chrFilter[j];
1139 V += chrVSrc[j][i] * chrFilter[j];
1145 if ((Y1 | Y2 | U | V) & 0x100) {
1146 Y1 = av_clip_uint8(Y1);
1147 Y2 = av_clip_uint8(Y2);
1148 U = av_clip_uint8(U);
1149 V = av_clip_uint8(V);
1154 for (j = 0; j < lumFilterSize; j++) {
1155 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1156 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1160 if ((A1 | A2) & 0x100) {
1161 A1 = av_clip_uint8(A1);
1162 A2 = av_clip_uint8(A2);
1166 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1168 g = (c->table_gU[U] + c->table_gV[V]);
1171 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1172 r, g, b, y, target, hasAlpha);
1176 static av_always_inline void
1177 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1178 const int16_t *ubuf[2], const int16_t *vbuf[2],
1179 const int16_t *abuf[2], uint8_t *dest, int dstW,
1180 int yalpha, int uvalpha, int y,
1181 enum PixelFormat target, int hasAlpha)
1183 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1184 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1185 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1186 *abuf0 = hasAlpha ? abuf[0] : NULL,
1187 *abuf1 = hasAlpha ? abuf[1] : NULL;
1188 int yalpha1 = 4095 - yalpha;
1189 int uvalpha1 = 4095 - uvalpha;
1192 for (i = 0; i < (dstW >> 1); i++) {
1193 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1194 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1195 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1196 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1198 const void *r = c->table_rV[V],
1199 *g = (c->table_gU[U] + c->table_gV[V]),
1200 *b = c->table_bU[U];
1203 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1204 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1207 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1208 r, g, b, y, target, hasAlpha);
1212 static av_always_inline void
1213 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1214 const int16_t *ubuf[2], const int16_t *vbuf[2],
1215 const int16_t *abuf0, uint8_t *dest, int dstW,
1216 int uvalpha, int y, enum PixelFormat target,
1219 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1220 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1223 if (uvalpha < 2048) {
1224 for (i = 0; i < (dstW >> 1); i++) {
1225 int Y1 = buf0[i * 2] >> 7;
1226 int Y2 = buf0[i * 2 + 1] >> 7;
1227 int U = ubuf1[i] >> 7;
1228 int V = vbuf1[i] >> 7;
1230 const void *r = c->table_rV[V],
1231 *g = (c->table_gU[U] + c->table_gV[V]),
1232 *b = c->table_bU[U];
1235 A1 = abuf0[i * 2 ] >> 7;
1236 A2 = abuf0[i * 2 + 1] >> 7;
1239 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1240 r, g, b, y, target, hasAlpha);
1243 for (i = 0; i < (dstW >> 1); i++) {
1244 int Y1 = buf0[i * 2] >> 7;
1245 int Y2 = buf0[i * 2 + 1] >> 7;
1246 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1247 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1249 const void *r = c->table_rV[V],
1250 *g = (c->table_gU[U] + c->table_gV[V]),
1251 *b = c->table_bU[U];
1254 A1 = abuf0[i * 2 ] >> 7;
1255 A2 = abuf0[i * 2 + 1] >> 7;
1258 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1259 r, g, b, y, target, hasAlpha);
1264 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1265 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1266 const int16_t **lumSrc, int lumFilterSize, \
1267 const int16_t *chrFilter, const int16_t **chrUSrc, \
1268 const int16_t **chrVSrc, int chrFilterSize, \
1269 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1272 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1273 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1274 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1276 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1277 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1278 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1279 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1280 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1281 int yalpha, int uvalpha, int y) \
1283 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1284 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1287 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1288 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1289 const int16_t *abuf0, uint8_t *dest, int dstW, \
1290 int uvalpha, int y) \
1292 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1293 dstW, uvalpha, y, fmt, hasAlpha); \
1297 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1298 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1300 #if CONFIG_SWSCALE_ALPHA
1301 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1302 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1304 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1305 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1307 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1308 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1309 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1310 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1311 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1312 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1313 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1314 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1316 static av_always_inline void
1317 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1318 const int16_t **lumSrc, int lumFilterSize,
1319 const int16_t *chrFilter, const int16_t **chrUSrc,
1320 const int16_t **chrVSrc, int chrFilterSize,
1321 const int16_t **alpSrc, uint8_t *dest,
1322 int dstW, int y, enum PixelFormat target, int hasAlpha)
1325 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1327 for (i = 0; i < dstW; i++) {
1335 for (j = 0; j < lumFilterSize; j++) {
1336 Y += lumSrc[j][i] * lumFilter[j];
1338 for (j = 0; j < chrFilterSize; j++) {
1339 U += chrUSrc[j][i] * chrFilter[j];
1340 V += chrVSrc[j][i] * chrFilter[j];
1347 for (j = 0; j < lumFilterSize; j++) {
1348 A += alpSrc[j][i] * lumFilter[j];
1352 A = av_clip_uint8(A);
1354 Y -= c->yuv2rgb_y_offset;
1355 Y *= c->yuv2rgb_y_coeff;
1357 R = Y + V*c->yuv2rgb_v2r_coeff;
1358 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1359 B = Y + U*c->yuv2rgb_u2b_coeff;
1360 if ((R | G | B) & 0xC0000000) {
1361 R = av_clip_uintp2(R, 30);
1362 G = av_clip_uintp2(G, 30);
1363 B = av_clip_uintp2(B, 30);
1368 dest[0] = hasAlpha ? A : 255;
1382 dest[3] = hasAlpha ? A : 255;
1385 dest[0] = hasAlpha ? A : 255;
1400 dest[3] = hasAlpha ? A : 255;
1408 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1409 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1410 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1411 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1413 #if CONFIG_SWSCALE_ALPHA
1414 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1415 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1416 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1417 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1419 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1420 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1421 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1422 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1424 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1425 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1427 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1428 int width, int height,
1432 uint8_t *ptr = plane + stride*y;
1433 for (i=0; i<height; i++) {
1434 memset(ptr, val, width);
1439 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1441 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1442 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1444 static av_always_inline void
1445 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1446 enum PixelFormat origin)
1449 for (i = 0; i < width; i++) {
1450 unsigned int r_b = input_pixel(&src[i*3+0]);
1451 unsigned int g = input_pixel(&src[i*3+1]);
1452 unsigned int b_r = input_pixel(&src[i*3+2]);
1454 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1458 static av_always_inline void
1459 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1460 const uint16_t *src1, const uint16_t *src2,
1461 int width, enum PixelFormat origin)
1465 for (i = 0; i < width; i++) {
1466 int r_b = input_pixel(&src1[i*3+0]);
1467 int g = input_pixel(&src1[i*3+1]);
1468 int b_r = input_pixel(&src1[i*3+2]);
1470 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1471 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1475 static av_always_inline void
1476 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1477 const uint16_t *src1, const uint16_t *src2,
1478 int width, enum PixelFormat origin)
1482 for (i = 0; i < width; i++) {
1483 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1484 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1485 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1487 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1488 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1496 #define rgb48funcs(pattern, BE_LE, origin) \
1497 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1498 int width, uint32_t *unused) \
1500 const uint16_t *src = (const uint16_t *) _src; \
1501 uint16_t *dst = (uint16_t *) _dst; \
1502 rgb48ToY_c_template(dst, src, width, origin); \
1505 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1506 const uint8_t *_src1, const uint8_t *_src2, \
1507 int width, uint32_t *unused) \
1509 const uint16_t *src1 = (const uint16_t *) _src1, \
1510 *src2 = (const uint16_t *) _src2; \
1511 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1512 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1515 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1516 const uint8_t *_src1, const uint8_t *_src2, \
1517 int width, uint32_t *unused) \
1519 const uint16_t *src1 = (const uint16_t *) _src1, \
1520 *src2 = (const uint16_t *) _src2; \
1521 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1522 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1525 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1526 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1527 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1528 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1530 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1531 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1532 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1534 static av_always_inline void
1535 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1536 int width, enum PixelFormat origin,
1537 int shr, int shg, int shb, int shp,
1538 int maskr, int maskg, int maskb,
1539 int rsh, int gsh, int bsh, int S)
1541 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1542 rnd = 33 << (S - 1);
1545 for (i = 0; i < width; i++) {
1546 int px = input_pixel(i) >> shp;
1547 int b = (px & maskb) >> shb;
1548 int g = (px & maskg) >> shg;
1549 int r = (px & maskr) >> shr;
1551 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1555 static av_always_inline void
1556 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1557 const uint8_t *src, int width,
1558 enum PixelFormat origin,
1559 int shr, int shg, int shb, int shp,
1560 int maskr, int maskg, int maskb,
1561 int rsh, int gsh, int bsh, int S)
1563 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1564 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1565 rnd = 257 << (S - 1);
1568 for (i = 0; i < width; i++) {
1569 int px = input_pixel(i) >> shp;
1570 int b = (px & maskb) >> shb;
1571 int g = (px & maskg) >> shg;
1572 int r = (px & maskr) >> shr;
1574 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1575 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1579 static av_always_inline void
1580 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1581 const uint8_t *src, int width,
1582 enum PixelFormat origin,
1583 int shr, int shg, int shb, int shp,
1584 int maskr, int maskg, int maskb,
1585 int rsh, int gsh, int bsh, int S)
1587 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1588 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1589 rnd = 257 << S, maskgx = ~(maskr | maskb);
1592 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1593 for (i = 0; i < width; i++) {
1594 int px0 = input_pixel(2 * i + 0) >> shp;
1595 int px1 = input_pixel(2 * i + 1) >> shp;
1596 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1597 int rb = px0 + px1 - g;
1599 b = (rb & maskb) >> shb;
1600 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1601 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1604 g = (g & maskg) >> shg;
1606 r = (rb & maskr) >> shr;
1608 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1609 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1615 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1616 maskg, maskb, rsh, gsh, bsh, S) \
1617 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1618 int width, uint32_t *unused) \
1620 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1621 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1624 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1625 const uint8_t *src, const uint8_t *dummy, \
1626 int width, uint32_t *unused) \
1628 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1629 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1632 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1633 const uint8_t *src, const uint8_t *dummy, \
1634 int width, uint32_t *unused) \
1636 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1637 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1640 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1641 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1642 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1643 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1644 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1645 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1646 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1647 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1648 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1649 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1650 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1651 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1653 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1656 for (i=0; i<width; i++) {
1661 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1664 for (i=0; i<width; i++) {
1669 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1672 for (i=0; i<width; i++) {
1675 dst[i]= pal[d] & 0xFF;
1679 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1680 const uint8_t *src1, const uint8_t *src2,
1681 int width, uint32_t *pal)
1684 assert(src1 == src2);
1685 for (i=0; i<width; i++) {
1686 int p= pal[src1[i]];
1693 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1694 int width, uint32_t *unused)
1697 for (i=0; i<width/8; i++) {
1700 dst[8*i+j]= ((d>>(7-j))&1)*255;
1704 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1705 int width, uint32_t *unused)
1708 for (i=0; i<width/8; i++) {
1711 dst[8*i+j]= ((d>>(7-j))&1)*255;
1715 //FIXME yuy2* can read up to 7 samples too much
1717 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1721 for (i=0; i<width; i++)
1725 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1726 const uint8_t *src2, int width, uint32_t *unused)
1729 for (i=0; i<width; i++) {
1730 dstU[i]= src1[4*i + 1];
1731 dstV[i]= src1[4*i + 3];
1733 assert(src1 == src2);
1736 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1739 const uint16_t *src = (const uint16_t *) _src;
1740 uint16_t *dst = (uint16_t *) _dst;
1741 for (i=0; i<width; i++) {
1742 dst[i] = av_bswap16(src[i]);
1746 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1747 const uint8_t *_src2, int width, uint32_t *unused)
1750 const uint16_t *src1 = (const uint16_t *) _src1,
1751 *src2 = (const uint16_t *) _src2;
1752 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1753 for (i=0; i<width; i++) {
1754 dstU[i] = av_bswap16(src1[i]);
1755 dstV[i] = av_bswap16(src2[i]);
1759 /* This is almost identical to the previous, end exists only because
1760 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1761 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1765 for (i=0; i<width; i++)
1769 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1770 const uint8_t *src2, int width, uint32_t *unused)
1773 for (i=0; i<width; i++) {
1774 dstU[i]= src1[4*i + 0];
1775 dstV[i]= src1[4*i + 2];
1777 assert(src1 == src2);
1780 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1781 const uint8_t *src, int width)
1784 for (i = 0; i < width; i++) {
1785 dst1[i] = src[2*i+0];
1786 dst2[i] = src[2*i+1];
1790 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1791 const uint8_t *src1, const uint8_t *src2,
1792 int width, uint32_t *unused)
1794 nvXXtoUV_c(dstU, dstV, src1, width);
1797 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1798 const uint8_t *src1, const uint8_t *src2,
1799 int width, uint32_t *unused)
1801 nvXXtoUV_c(dstV, dstU, src1, width);
1804 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1806 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1807 int width, uint32_t *unused)
1810 for (i=0; i<width; i++) {
1815 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1819 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1820 const uint8_t *src2, int width, uint32_t *unused)
1823 for (i=0; i<width; i++) {
1824 int b= src1[3*i + 0];
1825 int g= src1[3*i + 1];
1826 int r= src1[3*i + 2];
1828 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1829 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1831 assert(src1 == src2);
1834 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1835 const uint8_t *src2, int width, uint32_t *unused)
1838 for (i=0; i<width; i++) {
1839 int b= src1[6*i + 0] + src1[6*i + 3];
1840 int g= src1[6*i + 1] + src1[6*i + 4];
1841 int r= src1[6*i + 2] + src1[6*i + 5];
1843 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1844 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1846 assert(src1 == src2);
1849 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1853 for (i=0; i<width; i++) {
1858 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1862 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1863 const uint8_t *src2, int width, uint32_t *unused)
1867 for (i=0; i<width; i++) {
1868 int r= src1[3*i + 0];
1869 int g= src1[3*i + 1];
1870 int b= src1[3*i + 2];
1872 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1873 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1877 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1878 const uint8_t *src2, int width, uint32_t *unused)
1882 for (i=0; i<width; i++) {
1883 int r= src1[6*i + 0] + src1[6*i + 3];
1884 int g= src1[6*i + 1] + src1[6*i + 4];
1885 int b= src1[6*i + 2] + src1[6*i + 5];
1887 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1888 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1892 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1893 const int16_t *filter,
1894 const int16_t *filterPos, int filterSize)
1897 int32_t *dst = (int32_t *) _dst;
1898 const uint16_t *src = (const uint16_t *) _src;
1899 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1902 for (i = 0; i < dstW; i++) {
1904 int srcPos = filterPos[i];
1907 for (j = 0; j < filterSize; j++) {
1908 val += src[srcPos + j] * filter[filterSize * i + j];
1910 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1911 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1915 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1916 const int16_t *filter,
1917 const int16_t *filterPos, int filterSize)
1920 const uint16_t *src = (const uint16_t *) _src;
1921 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1923 for (i = 0; i < dstW; i++) {
1925 int srcPos = filterPos[i];
1928 for (j = 0; j < filterSize; j++) {
1929 val += src[srcPos + j] * filter[filterSize * i + j];
1931 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1932 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1936 // bilinear / bicubic scaling
1937 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1938 const int16_t *filter, const int16_t *filterPos,
1942 for (i=0; i<dstW; i++) {
1944 int srcPos= filterPos[i];
1946 for (j=0; j<filterSize; j++) {
1947 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1949 //filter += hFilterSize;
1950 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1955 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1956 const int16_t *filter, const int16_t *filterPos,
1960 int32_t *dst = (int32_t *) _dst;
1961 for (i=0; i<dstW; i++) {
1963 int srcPos= filterPos[i];
1965 for (j=0; j<filterSize; j++) {
1966 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1968 //filter += hFilterSize;
1969 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1974 //FIXME all pal and rgb srcFormats could do this convertion as well
1975 //FIXME all scalers more complex than bilinear could do half of this transform
1976 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1979 for (i = 0; i < width; i++) {
1980 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1981 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1984 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1987 for (i = 0; i < width; i++) {
1988 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1989 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1992 static void lumRangeToJpeg_c(int16_t *dst, int width)
1995 for (i = 0; i < width; i++)
1996 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1998 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2001 for (i = 0; i < width; i++)
2002 dst[i] = (dst[i]*14071 + 33561947)>>14;
2005 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2008 int32_t *dstU = (int32_t *) _dstU;
2009 int32_t *dstV = (int32_t *) _dstV;
2010 for (i = 0; i < width; i++) {
2011 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2012 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2015 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2018 int32_t *dstU = (int32_t *) _dstU;
2019 int32_t *dstV = (int32_t *) _dstV;
2020 for (i = 0; i < width; i++) {
2021 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2022 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2025 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2028 int32_t *dst = (int32_t *) _dst;
2029 for (i = 0; i < width; i++)
2030 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2032 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2035 int32_t *dst = (int32_t *) _dst;
2036 for (i = 0; i < width; i++)
2037 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2040 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2041 const uint8_t *src, int srcW, int xInc)
2044 unsigned int xpos=0;
2045 for (i=0;i<dstWidth;i++) {
2046 register unsigned int xx=xpos>>16;
2047 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2048 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2053 // *** horizontal scale Y line to temp buffer
2054 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2055 const uint8_t *src, int srcW, int xInc,
2056 const int16_t *hLumFilter,
2057 const int16_t *hLumFilterPos, int hLumFilterSize,
2058 uint8_t *formatConvBuffer,
2059 uint32_t *pal, int isAlpha)
2061 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2062 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2065 toYV12(formatConvBuffer, src, srcW, pal);
2066 src= formatConvBuffer;
2069 if (!c->hyscale_fast) {
2070 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2071 } else { // fast bilinear upscale / crap downscale
2072 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2076 convertRange(dst, dstWidth);
2079 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2080 int dstWidth, const uint8_t *src1,
2081 const uint8_t *src2, int srcW, int xInc)
2084 unsigned int xpos=0;
2085 for (i=0;i<dstWidth;i++) {
2086 register unsigned int xx=xpos>>16;
2087 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2088 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2089 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2094 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2095 const uint8_t *src1, const uint8_t *src2,
2096 int srcW, int xInc, const int16_t *hChrFilter,
2097 const int16_t *hChrFilterPos, int hChrFilterSize,
2098 uint8_t *formatConvBuffer, uint32_t *pal)
2101 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2102 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2103 src1= formatConvBuffer;
2107 if (!c->hcscale_fast) {
2108 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2109 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2110 } else { // fast bilinear upscale / crap downscale
2111 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2114 if (c->chrConvertRange)
2115 c->chrConvertRange(dst1, dst2, dstWidth);
2118 static av_always_inline void
2119 find_c_packed_planar_out_funcs(SwsContext *c,
2120 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2121 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2122 yuv2packedX_fn *yuv2packedX)
2124 enum PixelFormat dstFormat = c->dstFormat;
2126 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2127 *yuv2yuvX = yuv2nv12X_c;
2128 } else if (is16BPS(dstFormat)) {
2129 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2130 } else if (is9_OR_10BPS(dstFormat)) {
2131 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2132 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2134 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2137 *yuv2yuv1 = yuv2yuv1_c;
2138 *yuv2yuvX = yuv2yuvX_c;
2140 if(c->flags & SWS_FULL_CHR_H_INT) {
2141 switch (dstFormat) {
2144 *yuv2packedX = yuv2rgba32_full_X_c;
2146 #if CONFIG_SWSCALE_ALPHA
2148 *yuv2packedX = yuv2rgba32_full_X_c;
2150 #endif /* CONFIG_SWSCALE_ALPHA */
2152 *yuv2packedX = yuv2rgbx32_full_X_c;
2154 #endif /* !CONFIG_SMALL */
2158 *yuv2packedX = yuv2argb32_full_X_c;
2160 #if CONFIG_SWSCALE_ALPHA
2162 *yuv2packedX = yuv2argb32_full_X_c;
2164 #endif /* CONFIG_SWSCALE_ALPHA */
2166 *yuv2packedX = yuv2xrgb32_full_X_c;
2168 #endif /* !CONFIG_SMALL */
2172 *yuv2packedX = yuv2bgra32_full_X_c;
2174 #if CONFIG_SWSCALE_ALPHA
2176 *yuv2packedX = yuv2bgra32_full_X_c;
2178 #endif /* CONFIG_SWSCALE_ALPHA */
2180 *yuv2packedX = yuv2bgrx32_full_X_c;
2182 #endif /* !CONFIG_SMALL */
2186 *yuv2packedX = yuv2abgr32_full_X_c;
2188 #if CONFIG_SWSCALE_ALPHA
2190 *yuv2packedX = yuv2abgr32_full_X_c;
2192 #endif /* CONFIG_SWSCALE_ALPHA */
2194 *yuv2packedX = yuv2xbgr32_full_X_c;
2196 #endif /* !CONFIG_SMALL */
2199 *yuv2packedX = yuv2rgb24_full_X_c;
2202 *yuv2packedX = yuv2bgr24_full_X_c;
2206 switch (dstFormat) {
2207 case PIX_FMT_GRAY16BE:
2208 *yuv2packed1 = yuv2gray16BE_1_c;
2209 *yuv2packed2 = yuv2gray16BE_2_c;
2210 *yuv2packedX = yuv2gray16BE_X_c;
2212 case PIX_FMT_GRAY16LE:
2213 *yuv2packed1 = yuv2gray16LE_1_c;
2214 *yuv2packed2 = yuv2gray16LE_2_c;
2215 *yuv2packedX = yuv2gray16LE_X_c;
2217 case PIX_FMT_MONOWHITE:
2218 *yuv2packed1 = yuv2monowhite_1_c;
2219 *yuv2packed2 = yuv2monowhite_2_c;
2220 *yuv2packedX = yuv2monowhite_X_c;
2222 case PIX_FMT_MONOBLACK:
2223 *yuv2packed1 = yuv2monoblack_1_c;
2224 *yuv2packed2 = yuv2monoblack_2_c;
2225 *yuv2packedX = yuv2monoblack_X_c;
2227 case PIX_FMT_YUYV422:
2228 *yuv2packed1 = yuv2yuyv422_1_c;
2229 *yuv2packed2 = yuv2yuyv422_2_c;
2230 *yuv2packedX = yuv2yuyv422_X_c;
2232 case PIX_FMT_UYVY422:
2233 *yuv2packed1 = yuv2uyvy422_1_c;
2234 *yuv2packed2 = yuv2uyvy422_2_c;
2235 *yuv2packedX = yuv2uyvy422_X_c;
2237 case PIX_FMT_RGB48LE:
2238 *yuv2packed1 = yuv2rgb48le_1_c;
2239 *yuv2packed2 = yuv2rgb48le_2_c;
2240 *yuv2packedX = yuv2rgb48le_X_c;
2242 case PIX_FMT_RGB48BE:
2243 *yuv2packed1 = yuv2rgb48be_1_c;
2244 *yuv2packed2 = yuv2rgb48be_2_c;
2245 *yuv2packedX = yuv2rgb48be_X_c;
2247 case PIX_FMT_BGR48LE:
2248 *yuv2packed1 = yuv2bgr48le_1_c;
2249 *yuv2packed2 = yuv2bgr48le_2_c;
2250 *yuv2packedX = yuv2bgr48le_X_c;
2252 case PIX_FMT_BGR48BE:
2253 *yuv2packed1 = yuv2bgr48be_1_c;
2254 *yuv2packed2 = yuv2bgr48be_2_c;
2255 *yuv2packedX = yuv2bgr48be_X_c;
2260 *yuv2packed1 = yuv2rgb32_1_c;
2261 *yuv2packed2 = yuv2rgb32_2_c;
2262 *yuv2packedX = yuv2rgb32_X_c;
2264 #if CONFIG_SWSCALE_ALPHA
2266 *yuv2packed1 = yuv2rgba32_1_c;
2267 *yuv2packed2 = yuv2rgba32_2_c;
2268 *yuv2packedX = yuv2rgba32_X_c;
2270 #endif /* CONFIG_SWSCALE_ALPHA */
2272 *yuv2packed1 = yuv2rgbx32_1_c;
2273 *yuv2packed2 = yuv2rgbx32_2_c;
2274 *yuv2packedX = yuv2rgbx32_X_c;
2276 #endif /* !CONFIG_SMALL */
2278 case PIX_FMT_RGB32_1:
2279 case PIX_FMT_BGR32_1:
2281 *yuv2packed1 = yuv2rgb32_1_1_c;
2282 *yuv2packed2 = yuv2rgb32_1_2_c;
2283 *yuv2packedX = yuv2rgb32_1_X_c;
2285 #if CONFIG_SWSCALE_ALPHA
2287 *yuv2packed1 = yuv2rgba32_1_1_c;
2288 *yuv2packed2 = yuv2rgba32_1_2_c;
2289 *yuv2packedX = yuv2rgba32_1_X_c;
2291 #endif /* CONFIG_SWSCALE_ALPHA */
2293 *yuv2packed1 = yuv2rgbx32_1_1_c;
2294 *yuv2packed2 = yuv2rgbx32_1_2_c;
2295 *yuv2packedX = yuv2rgbx32_1_X_c;
2297 #endif /* !CONFIG_SMALL */
2300 *yuv2packed1 = yuv2rgb24_1_c;
2301 *yuv2packed2 = yuv2rgb24_2_c;
2302 *yuv2packedX = yuv2rgb24_X_c;
2305 *yuv2packed1 = yuv2bgr24_1_c;
2306 *yuv2packed2 = yuv2bgr24_2_c;
2307 *yuv2packedX = yuv2bgr24_X_c;
2309 case PIX_FMT_RGB565LE:
2310 case PIX_FMT_RGB565BE:
2311 case PIX_FMT_BGR565LE:
2312 case PIX_FMT_BGR565BE:
2313 *yuv2packed1 = yuv2rgb16_1_c;
2314 *yuv2packed2 = yuv2rgb16_2_c;
2315 *yuv2packedX = yuv2rgb16_X_c;
2317 case PIX_FMT_RGB555LE:
2318 case PIX_FMT_RGB555BE:
2319 case PIX_FMT_BGR555LE:
2320 case PIX_FMT_BGR555BE:
2321 *yuv2packed1 = yuv2rgb15_1_c;
2322 *yuv2packed2 = yuv2rgb15_2_c;
2323 *yuv2packedX = yuv2rgb15_X_c;
2325 case PIX_FMT_RGB444LE:
2326 case PIX_FMT_RGB444BE:
2327 case PIX_FMT_BGR444LE:
2328 case PIX_FMT_BGR444BE:
2329 *yuv2packed1 = yuv2rgb12_1_c;
2330 *yuv2packed2 = yuv2rgb12_2_c;
2331 *yuv2packedX = yuv2rgb12_X_c;
2335 *yuv2packed1 = yuv2rgb8_1_c;
2336 *yuv2packed2 = yuv2rgb8_2_c;
2337 *yuv2packedX = yuv2rgb8_X_c;
2341 *yuv2packed1 = yuv2rgb4_1_c;
2342 *yuv2packed2 = yuv2rgb4_2_c;
2343 *yuv2packedX = yuv2rgb4_X_c;
2345 case PIX_FMT_RGB4_BYTE:
2346 case PIX_FMT_BGR4_BYTE:
2347 *yuv2packed1 = yuv2rgb4b_1_c;
2348 *yuv2packed2 = yuv2rgb4b_2_c;
2349 *yuv2packedX = yuv2rgb4b_X_c;
2355 #define DEBUG_SWSCALE_BUFFERS 0
2356 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2358 static int swScale(SwsContext *c, const uint8_t* src[],
2359 int srcStride[], int srcSliceY,
2360 int srcSliceH, uint8_t* dst[], int dstStride[])
2362 /* load a few things into local vars to make the code more readable? and faster */
2363 const int srcW= c->srcW;
2364 const int dstW= c->dstW;
2365 const int dstH= c->dstH;
2366 const int chrDstW= c->chrDstW;
2367 const int chrSrcW= c->chrSrcW;
2368 const int lumXInc= c->lumXInc;
2369 const int chrXInc= c->chrXInc;
2370 const enum PixelFormat dstFormat= c->dstFormat;
2371 const int flags= c->flags;
2372 int16_t *vLumFilterPos= c->vLumFilterPos;
2373 int16_t *vChrFilterPos= c->vChrFilterPos;
2374 int16_t *hLumFilterPos= c->hLumFilterPos;
2375 int16_t *hChrFilterPos= c->hChrFilterPos;
2376 int16_t *vLumFilter= c->vLumFilter;
2377 int16_t *vChrFilter= c->vChrFilter;
2378 int16_t *hLumFilter= c->hLumFilter;
2379 int16_t *hChrFilter= c->hChrFilter;
2380 int32_t *lumMmxFilter= c->lumMmxFilter;
2381 int32_t *chrMmxFilter= c->chrMmxFilter;
2382 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2383 const int vLumFilterSize= c->vLumFilterSize;
2384 const int vChrFilterSize= c->vChrFilterSize;
2385 const int hLumFilterSize= c->hLumFilterSize;
2386 const int hChrFilterSize= c->hChrFilterSize;
2387 int16_t **lumPixBuf= c->lumPixBuf;
2388 int16_t **chrUPixBuf= c->chrUPixBuf;
2389 int16_t **chrVPixBuf= c->chrVPixBuf;
2390 int16_t **alpPixBuf= c->alpPixBuf;
2391 const int vLumBufSize= c->vLumBufSize;
2392 const int vChrBufSize= c->vChrBufSize;
2393 uint8_t *formatConvBuffer= c->formatConvBuffer;
2394 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2395 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2397 uint32_t *pal=c->pal_yuv;
2398 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2399 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2400 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2401 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2402 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2403 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2405 /* vars which will change and which we need to store back in the context */
2407 int lumBufIndex= c->lumBufIndex;
2408 int chrBufIndex= c->chrBufIndex;
2409 int lastInLumBuf= c->lastInLumBuf;
2410 int lastInChrBuf= c->lastInChrBuf;
2412 if (isPacked(c->srcFormat)) {
2420 srcStride[3]= srcStride[0];
2422 srcStride[1]<<= c->vChrDrop;
2423 srcStride[2]<<= c->vChrDrop;
2425 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2426 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2427 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2428 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2429 srcSliceY, srcSliceH, dstY, dstH);
2430 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2431 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2433 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2434 static int warnedAlready=0; //FIXME move this into the context perhaps
2435 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2436 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2437 " ->cannot do aligned memory accesses anymore\n");
2442 /* Note the user might start scaling the picture in the middle so this
2443 will not get executed. This is not really intended but works
2444 currently, so people might do it. */
2445 if (srcSliceY ==0) {
2453 if (!should_dither) {
2454 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2458 for (;dstY < dstH; dstY++) {
2459 const int chrDstY= dstY>>c->chrDstVSubSample;
2460 uint8_t *dest[4] = {
2461 dst[0] + dstStride[0] * dstY,
2462 dst[1] + dstStride[1] * chrDstY,
2463 dst[2] + dstStride[2] * chrDstY,
2464 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2467 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2468 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2469 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2470 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2471 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2472 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2475 //handle holes (FAST_BILINEAR & weird filters)
2476 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2477 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2478 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2479 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2481 DEBUG_BUFFERS("dstY: %d\n", dstY);
2482 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2483 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2484 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2485 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2487 // Do we have enough lines in this slice to output the dstY line
2488 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2490 if (!enough_lines) {
2491 lastLumSrcY = srcSliceY + srcSliceH - 1;
2492 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2493 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2494 lastLumSrcY, lastChrSrcY);
2497 //Do horizontal scaling
2498 while(lastInLumBuf < lastLumSrcY) {
2499 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2500 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2502 assert(lumBufIndex < 2*vLumBufSize);
2503 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2504 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2505 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2506 hLumFilter, hLumFilterPos, hLumFilterSize,
2509 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2510 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2511 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2515 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2516 lumBufIndex, lastInLumBuf);
2518 while(lastInChrBuf < lastChrSrcY) {
2519 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2520 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2522 assert(chrBufIndex < 2*vChrBufSize);
2523 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2524 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2525 //FIXME replace parameters through context struct (some at least)
2527 if (c->needs_hcscale)
2528 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2529 chrDstW, src1, src2, chrSrcW, chrXInc,
2530 hChrFilter, hChrFilterPos, hChrFilterSize,
2531 formatConvBuffer, pal);
2533 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2534 chrBufIndex, lastInChrBuf);
2536 //wrap buf index around to stay inside the ring buffer
2537 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2538 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2540 break; //we can't output a dstY line so let's try with the next slice
2543 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2545 if (should_dither) {
2546 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2547 c->lumDither8 = dither_8x8_128[dstY & 7];
2549 if (dstY >= dstH-2) {
2550 // hmm looks like we can't use MMX here without overwriting this array's tail
2551 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2552 &yuv2packed1, &yuv2packed2,
2557 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2558 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2559 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2560 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2561 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2562 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2563 if ((dstY&chrSkipMask) || isGray(dstFormat))
2564 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2565 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2567 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2568 yuv2yuv1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2571 yuv2yuv1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2572 yuv2yuv1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2575 if (alpBuf && dest[3])
2576 yuv2yuv1(alpBuf, dest[3], dstW, c->lumDither8, 0);
2577 } else { //General YV12
2578 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2579 lumSrcPtr, vLumFilterSize,
2580 vChrFilter + chrDstY * vChrFilterSize,
2581 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2582 alpSrcPtr, dest, dstW, chrDstW);
2585 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2586 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2587 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2588 int chrAlpha = vChrFilter[2 * dstY + 1];
2589 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2590 alpPixBuf ? *alpSrcPtr : NULL,
2591 dest[0], dstW, chrAlpha, dstY);
2592 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2593 int lumAlpha = vLumFilter[2 * dstY + 1];
2594 int chrAlpha = vChrFilter[2 * dstY + 1];
2596 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2598 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2599 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2600 alpPixBuf ? alpSrcPtr : NULL,
2601 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2602 } else { //general RGB
2603 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2604 lumSrcPtr, vLumFilterSize,
2605 vChrFilter + dstY * vChrFilterSize,
2606 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2607 alpSrcPtr, dest[0], dstW, dstY);
2613 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2614 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2617 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2618 __asm__ volatile("sfence":::"memory");
2622 /* store changed local vars back in the context */
2624 c->lumBufIndex= lumBufIndex;
2625 c->chrBufIndex= chrBufIndex;
2626 c->lastInLumBuf= lastInLumBuf;
2627 c->lastInChrBuf= lastInChrBuf;
2629 return dstY - lastDstY;
2632 static av_cold void sws_init_swScale_c(SwsContext *c)
2634 enum PixelFormat srcFormat = c->srcFormat;
2636 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2637 &c->yuv2packed1, &c->yuv2packed2,
2640 c->chrToYV12 = NULL;
2642 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2643 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2644 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2645 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2649 case PIX_FMT_BGR4_BYTE:
2650 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2652 case PIX_FMT_YUV444P9LE:
2653 case PIX_FMT_YUV422P9LE:
2654 case PIX_FMT_YUV420P9LE:
2655 case PIX_FMT_YUV422P10LE:
2656 case PIX_FMT_YUV444P10LE:
2657 case PIX_FMT_YUV420P10LE:
2658 case PIX_FMT_YUV420P16LE:
2659 case PIX_FMT_YUV422P16LE:
2660 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2662 case PIX_FMT_YUV444P9BE:
2663 case PIX_FMT_YUV422P9BE:
2664 case PIX_FMT_YUV420P9BE:
2665 case PIX_FMT_YUV444P10BE:
2666 case PIX_FMT_YUV422P10BE:
2667 case PIX_FMT_YUV420P10BE:
2668 case PIX_FMT_YUV420P16BE:
2669 case PIX_FMT_YUV422P16BE:
2670 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2673 if (c->chrSrcHSubSample) {
2675 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2676 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2677 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2678 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2679 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2680 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2681 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2682 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2683 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2684 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2685 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2686 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2687 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2688 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2689 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2690 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2691 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2692 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2696 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2697 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2698 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2699 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2700 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2701 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2702 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2703 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2704 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2705 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2706 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2707 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2708 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2709 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2710 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2711 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2712 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2713 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2717 c->lumToYV12 = NULL;
2718 c->alpToYV12 = NULL;
2719 switch (srcFormat) {
2721 case PIX_FMT_YUV444P9LE:
2722 case PIX_FMT_YUV422P9LE:
2723 case PIX_FMT_YUV420P9LE:
2724 case PIX_FMT_YUV444P10LE:
2725 case PIX_FMT_YUV422P10LE:
2726 case PIX_FMT_YUV420P10LE:
2727 case PIX_FMT_YUV420P16LE:
2728 case PIX_FMT_YUV422P16LE:
2729 case PIX_FMT_YUV444P16LE:
2730 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2732 case PIX_FMT_YUV444P9BE:
2733 case PIX_FMT_YUV422P9BE:
2734 case PIX_FMT_YUV420P9BE:
2735 case PIX_FMT_YUV444P10BE:
2736 case PIX_FMT_YUV422P10BE:
2737 case PIX_FMT_YUV420P10BE:
2738 case PIX_FMT_YUV420P16BE:
2739 case PIX_FMT_YUV422P16BE:
2740 case PIX_FMT_YUV444P16BE:
2741 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2743 case PIX_FMT_YUYV422 :
2744 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2745 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2746 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2747 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2748 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2749 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2750 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2751 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2752 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2753 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2754 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2755 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2759 case PIX_FMT_BGR4_BYTE:
2760 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2761 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2762 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2763 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2764 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2765 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2766 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2767 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2768 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2769 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2770 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2773 switch (srcFormat) {
2775 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2777 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2778 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2782 if (c->srcBpc == 8) {
2783 if (c->dstBpc <= 10) {
2784 c->hyScale = c->hcScale = hScale8To15_c;
2785 if (c->flags & SWS_FAST_BILINEAR) {
2786 c->hyscale_fast = hyscale_fast_c;
2787 c->hcscale_fast = hcscale_fast_c;
2790 c->hyScale = c->hcScale = hScale8To19_c;
2793 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2796 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2797 if (c->dstBpc <= 10) {
2799 c->lumConvertRange = lumRangeFromJpeg_c;
2800 c->chrConvertRange = chrRangeFromJpeg_c;
2802 c->lumConvertRange = lumRangeToJpeg_c;
2803 c->chrConvertRange = chrRangeToJpeg_c;
2807 c->lumConvertRange = lumRangeFromJpeg16_c;
2808 c->chrConvertRange = chrRangeFromJpeg16_c;
2810 c->lumConvertRange = lumRangeToJpeg16_c;
2811 c->chrConvertRange = chrRangeToJpeg16_c;
2816 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2817 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2818 c->needs_hcscale = 1;
2821 SwsFunc ff_getSwsFunc(SwsContext *c)
2823 sws_init_swScale_c(c);
2826 ff_sws_init_swScale_mmx(c);
2828 ff_sws_init_swScale_altivec(c);