2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 static av_always_inline void
199 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
200 int lumFilterSize, const int16_t *chrFilter,
201 const int32_t **chrUSrc, const int32_t **chrVSrc,
202 int chrFilterSize, const int32_t **alpSrc,
203 uint16_t *dest[4], int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
209 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
210 int shift = 15 + 16 - output_bits - 1;
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uint16(val >> shift)); \
216 AV_WL16(pos, av_clip_uint16(val >> shift)); \
218 for (i = 0; i < dstW; i++) {
219 int val = 1 << (30-output_bits - 1);
222 for (j = 0; j < lumFilterSize; j++)
223 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
225 output_pixel(&yDest[i], val);
229 for (i = 0; i < chrDstW; i++) {
230 int u = 1 << (30-output_bits - 1);
231 int v = 1 << (30-output_bits - 1);
234 for (j = 0; j < chrFilterSize; j++) {
235 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
236 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
239 output_pixel(&uDest[i], u);
240 output_pixel(&vDest[i], v);
244 if (CONFIG_SWSCALE_ALPHA && aDest) {
245 for (i = 0; i < dstW; i++) {
246 int val = 1 << (30-output_bits - 1);
249 for (j = 0; j < lumFilterSize; j++)
250 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
252 output_pixel(&aDest[i], val);
258 static av_always_inline void
259 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
260 int lumFilterSize, const int16_t *chrFilter,
261 const int16_t **chrUSrc, const int16_t **chrVSrc,
262 int chrFilterSize, const int16_t **alpSrc,
263 uint16_t *dest[4], int dstW, int chrDstW,
264 int big_endian, int output_bits)
266 //FIXME Optimize (just quickly written not optimized..)
268 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
269 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
270 int shift = 15 + 16 - output_bits - 1;
272 #define output_pixel(pos, val) \
274 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
276 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
278 for (i = 0; i < dstW; i++) {
279 int val = 1 << (30-output_bits - 1);
282 for (j = 0; j < lumFilterSize; j++)
283 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
285 output_pixel(&yDest[i], val);
289 for (i = 0; i < chrDstW; i++) {
290 int u = 1 << (30-output_bits - 1);
291 int v = 1 << (30-output_bits - 1);
294 for (j = 0; j < chrFilterSize; j++) {
295 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
296 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
299 output_pixel(&uDest[i], u);
300 output_pixel(&vDest[i], v);
304 if (CONFIG_SWSCALE_ALPHA && aDest) {
305 for (i = 0; i < dstW; i++) {
306 int val = 1 << (30-output_bits - 1);
309 for (j = 0; j < lumFilterSize; j++)
310 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
312 output_pixel(&aDest[i], val);
318 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
319 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
320 const int16_t **_lumSrc, int lumFilterSize, \
321 const int16_t *chrFilter, const int16_t **_chrUSrc, \
322 const int16_t **_chrVSrc, \
323 int chrFilterSize, const int16_t **_alpSrc, \
324 uint8_t *_dest[4], int dstW, int chrDstW) \
326 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
327 **chrUSrc = (const typeX_t **) _chrUSrc, \
328 **chrVSrc = (const typeX_t **) _chrVSrc, \
329 **alpSrc = (const typeX_t **) _alpSrc; \
330 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
331 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
332 alpSrc, (uint16_t **) _dest, \
333 dstW, chrDstW, is_be, bits); \
335 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
336 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
337 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
338 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
339 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
340 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
342 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
343 const int16_t **lumSrc, int lumFilterSize,
344 const int16_t *chrFilter, const int16_t **chrUSrc,
345 const int16_t **chrVSrc,
346 int chrFilterSize, const int16_t **alpSrc,
347 uint8_t *dest[4], int dstW, int chrDstW)
349 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
350 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
352 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
354 //FIXME Optimize (just quickly written not optimized..)
355 for (i=0; i<dstW; i++) {
356 int val = lumDither[i & 7] << 12;
358 for (j=0; j<lumFilterSize; j++)
359 val += lumSrc[j][i] * lumFilter[j];
361 yDest[i]= av_clip_uint8(val>>19);
365 for (i=0; i<chrDstW; i++) {
366 int u = chrDither[i & 7] << 12;
367 int v = chrDither[(i + 3) & 7] << 12;
369 for (j=0; j<chrFilterSize; j++) {
370 u += chrUSrc[j][i] * chrFilter[j];
371 v += chrVSrc[j][i] * chrFilter[j];
374 uDest[i]= av_clip_uint8(u>>19);
375 vDest[i]= av_clip_uint8(v>>19);
378 if (CONFIG_SWSCALE_ALPHA && aDest)
379 for (i=0; i<dstW; i++) {
380 int val = lumDither[i & 7] << 12;
382 for (j=0; j<lumFilterSize; j++)
383 val += alpSrc[j][i] * lumFilter[j];
385 aDest[i]= av_clip_uint8(val>>19);
389 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
390 const int16_t *chrUSrc, const int16_t *chrVSrc,
391 const int16_t *alpSrc,
392 uint8_t *dest[4], int dstW, int chrDstW)
394 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
395 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
397 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
399 for (i=0; i<dstW; i++) {
400 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
401 yDest[i]= av_clip_uint8(val);
405 for (i=0; i<chrDstW; i++) {
406 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
407 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
408 uDest[i]= av_clip_uint8(u);
409 vDest[i]= av_clip_uint8(v);
412 if (CONFIG_SWSCALE_ALPHA && aDest)
413 for (i=0; i<dstW; i++) {
414 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
415 aDest[i]= av_clip_uint8(val);
419 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
420 const int16_t **lumSrc, int lumFilterSize,
421 const int16_t *chrFilter, const int16_t **chrUSrc,
422 const int16_t **chrVSrc, int chrFilterSize,
423 const int16_t **alpSrc, uint8_t *dest[4],
424 int dstW, int chrDstW)
426 uint8_t *yDest = dest[0], *uDest = dest[1];
427 enum PixelFormat dstFormat = c->dstFormat;
428 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
430 //FIXME Optimize (just quickly written not optimized..)
432 for (i=0; i<dstW; i++) {
433 int val = lumDither[i & 7] << 12;
435 for (j=0; j<lumFilterSize; j++)
436 val += lumSrc[j][i] * lumFilter[j];
438 yDest[i]= av_clip_uint8(val>>19);
444 if (dstFormat == PIX_FMT_NV12)
445 for (i=0; i<chrDstW; i++) {
446 int u = chrDither[i & 7] << 12;
447 int v = chrDither[(i + 3) & 7] << 12;
449 for (j=0; j<chrFilterSize; j++) {
450 u += chrUSrc[j][i] * chrFilter[j];
451 v += chrVSrc[j][i] * chrFilter[j];
454 uDest[2*i]= av_clip_uint8(u>>19);
455 uDest[2*i+1]= av_clip_uint8(v>>19);
458 for (i=0; i<chrDstW; i++) {
459 int u = chrDither[i & 7] << 12;
460 int v = chrDither[(i + 3) & 7] << 12;
462 for (j=0; j<chrFilterSize; j++) {
463 u += chrUSrc[j][i] * chrFilter[j];
464 v += chrVSrc[j][i] * chrFilter[j];
467 uDest[2*i]= av_clip_uint8(v>>19);
468 uDest[2*i+1]= av_clip_uint8(u>>19);
472 #define output_pixel(pos, val) \
473 if (target == PIX_FMT_GRAY16BE) { \
479 static av_always_inline void
480 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
481 const int32_t **lumSrc, int lumFilterSize,
482 const int16_t *chrFilter, const int32_t **chrUSrc,
483 const int32_t **chrVSrc, int chrFilterSize,
484 const int32_t **alpSrc, uint16_t *dest, int dstW,
485 int y, enum PixelFormat target)
489 for (i = 0; i < (dstW >> 1); i++) {
494 for (j = 0; j < lumFilterSize; j++) {
495 Y1 += lumSrc[j][i * 2] * lumFilter[j];
496 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
500 if ((Y1 | Y2) & 0x10000) {
501 Y1 = av_clip_uint16(Y1);
502 Y2 = av_clip_uint16(Y2);
504 output_pixel(&dest[i * 2 + 0], Y1);
505 output_pixel(&dest[i * 2 + 1], Y2);
509 static av_always_inline void
510 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
511 const int32_t *ubuf[2], const int32_t *vbuf[2],
512 const int32_t *abuf[2], uint16_t *dest, int dstW,
513 int yalpha, int uvalpha, int y,
514 enum PixelFormat target)
516 int yalpha1 = 4095 - yalpha;
518 const int32_t *buf0 = buf[0], *buf1 = buf[1];
520 for (i = 0; i < (dstW >> 1); i++) {
521 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
522 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
524 output_pixel(&dest[i * 2 + 0], Y1);
525 output_pixel(&dest[i * 2 + 1], Y2);
529 static av_always_inline void
530 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
531 const int32_t *ubuf[2], const int32_t *vbuf[2],
532 const int32_t *abuf0, uint16_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
537 for (i = 0; i < (dstW >> 1); i++) {
538 int Y1 = buf0[i * 2 ] << 1;
539 int Y2 = buf0[i * 2 + 1] << 1;
541 output_pixel(&dest[i * 2 + 0], Y1);
542 output_pixel(&dest[i * 2 + 1], Y2);
548 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
549 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
550 const int16_t **_lumSrc, int lumFilterSize, \
551 const int16_t *chrFilter, const int16_t **_chrUSrc, \
552 const int16_t **_chrVSrc, int chrFilterSize, \
553 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
556 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
557 **chrUSrc = (const int32_t **) _chrUSrc, \
558 **chrVSrc = (const int32_t **) _chrVSrc, \
559 **alpSrc = (const int32_t **) _alpSrc; \
560 uint16_t *dest = (uint16_t *) _dest; \
561 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
562 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
563 alpSrc, dest, dstW, y, fmt); \
566 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
567 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
568 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
569 int yalpha, int uvalpha, int y) \
571 const int32_t **buf = (const int32_t **) _buf, \
572 **ubuf = (const int32_t **) _ubuf, \
573 **vbuf = (const int32_t **) _vbuf, \
574 **abuf = (const int32_t **) _abuf; \
575 uint16_t *dest = (uint16_t *) _dest; \
576 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
577 dest, dstW, yalpha, uvalpha, y, fmt); \
580 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
581 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
582 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
583 int uvalpha, int y) \
585 const int32_t *buf0 = (const int32_t *) _buf0, \
586 **ubuf = (const int32_t **) _ubuf, \
587 **vbuf = (const int32_t **) _vbuf, \
588 *abuf0 = (const int32_t *) _abuf0; \
589 uint16_t *dest = (uint16_t *) _dest; \
590 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
591 dstW, uvalpha, y, fmt); \
594 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
595 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
597 #define output_pixel(pos, acc) \
598 if (target == PIX_FMT_MONOBLACK) { \
604 static av_always_inline void
605 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
606 const int16_t **lumSrc, int lumFilterSize,
607 const int16_t *chrFilter, const int16_t **chrUSrc,
608 const int16_t **chrVSrc, int chrFilterSize,
609 const int16_t **alpSrc, uint8_t *dest, int dstW,
610 int y, enum PixelFormat target)
612 const uint8_t * const d128=dither_8x8_220[y&7];
613 uint8_t *g = c->table_gU[128] + c->table_gV[128];
617 for (i = 0; i < dstW - 1; i += 2) {
622 for (j = 0; j < lumFilterSize; j++) {
623 Y1 += lumSrc[j][i] * lumFilter[j];
624 Y2 += lumSrc[j][i+1] * lumFilter[j];
628 if ((Y1 | Y2) & 0x100) {
629 Y1 = av_clip_uint8(Y1);
630 Y2 = av_clip_uint8(Y2);
632 acc += acc + g[Y1 + d128[(i + 0) & 7]];
633 acc += acc + g[Y2 + d128[(i + 1) & 7]];
635 output_pixel(*dest++, acc);
640 static av_always_inline void
641 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
642 const int16_t *ubuf[2], const int16_t *vbuf[2],
643 const int16_t *abuf[2], uint8_t *dest, int dstW,
644 int yalpha, int uvalpha, int y,
645 enum PixelFormat target)
647 const int16_t *buf0 = buf[0], *buf1 = buf[1];
648 const uint8_t * const d128 = dither_8x8_220[y & 7];
649 uint8_t *g = c->table_gU[128] + c->table_gV[128];
650 int yalpha1 = 4095 - yalpha;
653 for (i = 0; i < dstW - 7; i += 8) {
654 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
655 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
656 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
657 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
658 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
659 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
660 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
661 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
662 output_pixel(*dest++, acc);
666 static av_always_inline void
667 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
668 const int16_t *ubuf[2], const int16_t *vbuf[2],
669 const int16_t *abuf0, uint8_t *dest, int dstW,
670 int uvalpha, int y, enum PixelFormat target)
672 const uint8_t * const d128 = dither_8x8_220[y & 7];
673 uint8_t *g = c->table_gU[128] + c->table_gV[128];
676 for (i = 0; i < dstW - 7; i += 8) {
677 int acc = g[(buf0[i ] >> 7) + d128[0]];
678 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
679 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
680 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
681 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
682 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
683 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
684 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
685 output_pixel(*dest++, acc);
691 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
692 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
693 const int16_t **lumSrc, int lumFilterSize, \
694 const int16_t *chrFilter, const int16_t **chrUSrc, \
695 const int16_t **chrVSrc, int chrFilterSize, \
696 const int16_t **alpSrc, uint8_t *dest, int dstW, \
699 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
700 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
701 alpSrc, dest, dstW, y, fmt); \
704 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
705 const int16_t *ubuf[2], const int16_t *vbuf[2], \
706 const int16_t *abuf[2], uint8_t *dest, int dstW, \
707 int yalpha, int uvalpha, int y) \
709 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
710 dest, dstW, yalpha, uvalpha, y, fmt); \
713 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
714 const int16_t *ubuf[2], const int16_t *vbuf[2], \
715 const int16_t *abuf0, uint8_t *dest, int dstW, \
716 int uvalpha, int y) \
718 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
719 abuf0, dest, dstW, uvalpha, \
723 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
724 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
726 #define output_pixels(pos, Y1, U, Y2, V) \
727 if (target == PIX_FMT_YUYV422) { \
728 dest[pos + 0] = Y1; \
730 dest[pos + 2] = Y2; \
734 dest[pos + 1] = Y1; \
736 dest[pos + 3] = Y2; \
739 static av_always_inline void
740 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
741 const int16_t **lumSrc, int lumFilterSize,
742 const int16_t *chrFilter, const int16_t **chrUSrc,
743 const int16_t **chrVSrc, int chrFilterSize,
744 const int16_t **alpSrc, uint8_t *dest, int dstW,
745 int y, enum PixelFormat target)
749 for (i = 0; i < (dstW >> 1); i++) {
756 for (j = 0; j < lumFilterSize; j++) {
757 Y1 += lumSrc[j][i * 2] * lumFilter[j];
758 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
760 for (j = 0; j < chrFilterSize; j++) {
761 U += chrUSrc[j][i] * chrFilter[j];
762 V += chrVSrc[j][i] * chrFilter[j];
768 if ((Y1 | Y2 | U | V) & 0x100) {
769 Y1 = av_clip_uint8(Y1);
770 Y2 = av_clip_uint8(Y2);
771 U = av_clip_uint8(U);
772 V = av_clip_uint8(V);
774 output_pixels(4*i, Y1, U, Y2, V);
778 static av_always_inline void
779 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
780 const int16_t *ubuf[2], const int16_t *vbuf[2],
781 const int16_t *abuf[2], uint8_t *dest, int dstW,
782 int yalpha, int uvalpha, int y,
783 enum PixelFormat target)
785 const int16_t *buf0 = buf[0], *buf1 = buf[1],
786 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
787 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
788 int yalpha1 = 4095 - yalpha;
789 int uvalpha1 = 4095 - uvalpha;
792 for (i = 0; i < (dstW >> 1); i++) {
793 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
794 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
795 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
796 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
798 output_pixels(i * 4, Y1, U, Y2, V);
802 static av_always_inline void
803 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
804 const int16_t *ubuf[2], const int16_t *vbuf[2],
805 const int16_t *abuf0, uint8_t *dest, int dstW,
806 int uvalpha, int y, enum PixelFormat target)
808 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
809 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 if (uvalpha < 2048) {
813 for (i = 0; i < (dstW >> 1); i++) {
814 int Y1 = buf0[i * 2] >> 7;
815 int Y2 = buf0[i * 2 + 1] >> 7;
816 int U = ubuf1[i] >> 7;
817 int V = vbuf1[i] >> 7;
819 output_pixels(i * 4, Y1, U, Y2, V);
822 for (i = 0; i < (dstW >> 1); i++) {
823 int Y1 = buf0[i * 2] >> 7;
824 int Y2 = buf0[i * 2 + 1] >> 7;
825 int U = (ubuf0[i] + ubuf1[i]) >> 8;
826 int V = (vbuf0[i] + vbuf1[i]) >> 8;
828 output_pixels(i * 4, Y1, U, Y2, V);
835 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
836 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
838 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
839 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
840 #define output_pixel(pos, val) \
841 if (isBE(target)) { \
847 static av_always_inline void
848 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
849 const int32_t **lumSrc, int lumFilterSize,
850 const int16_t *chrFilter, const int32_t **chrUSrc,
851 const int32_t **chrVSrc, int chrFilterSize,
852 const int32_t **alpSrc, uint16_t *dest, int dstW,
853 int y, enum PixelFormat target)
857 for (i = 0; i < (dstW >> 1); i++) {
861 int U = -128 << 23; // 19
865 for (j = 0; j < lumFilterSize; j++) {
866 Y1 += lumSrc[j][i * 2] * lumFilter[j];
867 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
869 for (j = 0; j < chrFilterSize; j++) {
870 U += chrUSrc[j][i] * chrFilter[j];
871 V += chrVSrc[j][i] * chrFilter[j];
874 // 8bit: 12+15=27; 16-bit: 12+19=31
880 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
881 Y1 -= c->yuv2rgb_y_offset;
882 Y2 -= c->yuv2rgb_y_offset;
883 Y1 *= c->yuv2rgb_y_coeff;
884 Y2 *= c->yuv2rgb_y_coeff;
887 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
889 R = V * c->yuv2rgb_v2r_coeff;
890 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
891 B = U * c->yuv2rgb_u2b_coeff;
893 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
894 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
895 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
896 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
897 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
898 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
899 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
904 static av_always_inline void
905 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
906 const int32_t *ubuf[2], const int32_t *vbuf[2],
907 const int32_t *abuf[2], uint16_t *dest, int dstW,
908 int yalpha, int uvalpha, int y,
909 enum PixelFormat target)
911 const int32_t *buf0 = buf[0], *buf1 = buf[1],
912 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
913 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
914 int yalpha1 = 4095 - yalpha;
915 int uvalpha1 = 4095 - uvalpha;
918 for (i = 0; i < (dstW >> 1); i++) {
919 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
920 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
921 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
922 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
925 Y1 -= c->yuv2rgb_y_offset;
926 Y2 -= c->yuv2rgb_y_offset;
927 Y1 *= c->yuv2rgb_y_coeff;
928 Y2 *= c->yuv2rgb_y_coeff;
932 R = V * c->yuv2rgb_v2r_coeff;
933 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
934 B = U * c->yuv2rgb_u2b_coeff;
936 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
937 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
938 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
939 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
940 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
941 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
946 static av_always_inline void
947 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
948 const int32_t *ubuf[2], const int32_t *vbuf[2],
949 const int32_t *abuf0, uint16_t *dest, int dstW,
950 int uvalpha, int y, enum PixelFormat target)
952 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
953 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
956 if (uvalpha < 2048) {
957 for (i = 0; i < (dstW >> 1); i++) {
958 int Y1 = (buf0[i * 2] ) >> 2;
959 int Y2 = (buf0[i * 2 + 1]) >> 2;
960 int U = (ubuf0[i] + (-128 << 11)) >> 2;
961 int V = (vbuf0[i] + (-128 << 11)) >> 2;
964 Y1 -= c->yuv2rgb_y_offset;
965 Y2 -= c->yuv2rgb_y_offset;
966 Y1 *= c->yuv2rgb_y_coeff;
967 Y2 *= c->yuv2rgb_y_coeff;
971 R = V * c->yuv2rgb_v2r_coeff;
972 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
973 B = U * c->yuv2rgb_u2b_coeff;
975 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
976 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
977 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
978 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
979 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
980 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
984 for (i = 0; i < (dstW >> 1); i++) {
985 int Y1 = (buf0[i * 2] ) >> 2;
986 int Y2 = (buf0[i * 2 + 1]) >> 2;
987 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
988 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
991 Y1 -= c->yuv2rgb_y_offset;
992 Y2 -= c->yuv2rgb_y_offset;
993 Y1 *= c->yuv2rgb_y_coeff;
994 Y2 *= c->yuv2rgb_y_coeff;
998 R = V * c->yuv2rgb_v2r_coeff;
999 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1000 B = U * c->yuv2rgb_u2b_coeff;
1002 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1003 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1004 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1005 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1006 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1007 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1017 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1018 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1019 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1020 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1022 static av_always_inline void
1023 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1024 int U, int V, int A1, int A2,
1025 const void *_r, const void *_g, const void *_b, int y,
1026 enum PixelFormat target, int hasAlpha)
1028 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1029 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1030 uint32_t *dest = (uint32_t *) _dest;
1031 const uint32_t *r = (const uint32_t *) _r;
1032 const uint32_t *g = (const uint32_t *) _g;
1033 const uint32_t *b = (const uint32_t *) _b;
1036 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1038 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1039 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1042 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1044 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1045 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1047 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1048 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1051 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1052 uint8_t *dest = (uint8_t *) _dest;
1053 const uint8_t *r = (const uint8_t *) _r;
1054 const uint8_t *g = (const uint8_t *) _g;
1055 const uint8_t *b = (const uint8_t *) _b;
1057 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1058 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1059 dest[i * 6 + 0] = r_b[Y1];
1060 dest[i * 6 + 1] = g[Y1];
1061 dest[i * 6 + 2] = b_r[Y1];
1062 dest[i * 6 + 3] = r_b[Y2];
1063 dest[i * 6 + 4] = g[Y2];
1064 dest[i * 6 + 5] = b_r[Y2];
1067 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1068 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1069 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1070 uint16_t *dest = (uint16_t *) _dest;
1071 const uint16_t *r = (const uint16_t *) _r;
1072 const uint16_t *g = (const uint16_t *) _g;
1073 const uint16_t *b = (const uint16_t *) _b;
1074 int dr1, dg1, db1, dr2, dg2, db2;
1076 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1077 dr1 = dither_2x2_8[ y & 1 ][0];
1078 dg1 = dither_2x2_4[ y & 1 ][0];
1079 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1080 dr2 = dither_2x2_8[ y & 1 ][1];
1081 dg2 = dither_2x2_4[ y & 1 ][1];
1082 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1083 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1084 dr1 = dither_2x2_8[ y & 1 ][0];
1085 dg1 = dither_2x2_8[ y & 1 ][1];
1086 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1087 dr2 = dither_2x2_8[ y & 1 ][1];
1088 dg2 = dither_2x2_8[ y & 1 ][0];
1089 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1091 dr1 = dither_4x4_16[ y & 3 ][0];
1092 dg1 = dither_4x4_16[ y & 3 ][1];
1093 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1094 dr2 = dither_4x4_16[ y & 3 ][1];
1095 dg2 = dither_4x4_16[ y & 3 ][0];
1096 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1099 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1100 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1101 } else /* 8/4-bit */ {
1102 uint8_t *dest = (uint8_t *) _dest;
1103 const uint8_t *r = (const uint8_t *) _r;
1104 const uint8_t *g = (const uint8_t *) _g;
1105 const uint8_t *b = (const uint8_t *) _b;
1106 int dr1, dg1, db1, dr2, dg2, db2;
1108 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1109 const uint8_t * const d64 = dither_8x8_73[y & 7];
1110 const uint8_t * const d32 = dither_8x8_32[y & 7];
1111 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1112 db1 = d64[(i * 2 + 0) & 7];
1113 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1114 db2 = d64[(i * 2 + 1) & 7];
1116 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1117 const uint8_t * const d128 = dither_8x8_220[y & 7];
1118 dr1 = db1 = d128[(i * 2 + 0) & 7];
1119 dg1 = d64[(i * 2 + 0) & 7];
1120 dr2 = db2 = d128[(i * 2 + 1) & 7];
1121 dg2 = d64[(i * 2 + 1) & 7];
1124 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1125 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1126 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1128 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1129 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1134 static av_always_inline void
1135 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1136 const int16_t **lumSrc, int lumFilterSize,
1137 const int16_t *chrFilter, const int16_t **chrUSrc,
1138 const int16_t **chrVSrc, int chrFilterSize,
1139 const int16_t **alpSrc, uint8_t *dest, int dstW,
1140 int y, enum PixelFormat target, int hasAlpha)
1144 for (i = 0; i < (dstW >> 1); i++) {
1150 int av_unused A1, A2;
1151 const void *r, *g, *b;
1153 for (j = 0; j < lumFilterSize; j++) {
1154 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1155 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1157 for (j = 0; j < chrFilterSize; j++) {
1158 U += chrUSrc[j][i] * chrFilter[j];
1159 V += chrVSrc[j][i] * chrFilter[j];
1165 if ((Y1 | Y2 | U | V) & 0x100) {
1166 Y1 = av_clip_uint8(Y1);
1167 Y2 = av_clip_uint8(Y2);
1168 U = av_clip_uint8(U);
1169 V = av_clip_uint8(V);
1174 for (j = 0; j < lumFilterSize; j++) {
1175 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1176 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1180 if ((A1 | A2) & 0x100) {
1181 A1 = av_clip_uint8(A1);
1182 A2 = av_clip_uint8(A2);
1186 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1188 g = (c->table_gU[U] + c->table_gV[V]);
1191 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1192 r, g, b, y, target, hasAlpha);
1196 static av_always_inline void
1197 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1198 const int16_t *ubuf[2], const int16_t *vbuf[2],
1199 const int16_t *abuf[2], uint8_t *dest, int dstW,
1200 int yalpha, int uvalpha, int y,
1201 enum PixelFormat target, int hasAlpha)
1203 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1204 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1205 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1206 *abuf0 = hasAlpha ? abuf[0] : NULL,
1207 *abuf1 = hasAlpha ? abuf[1] : NULL;
1208 int yalpha1 = 4095 - yalpha;
1209 int uvalpha1 = 4095 - uvalpha;
1212 for (i = 0; i < (dstW >> 1); i++) {
1213 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1214 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1215 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1216 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1218 const void *r = c->table_rV[V],
1219 *g = (c->table_gU[U] + c->table_gV[V]),
1220 *b = c->table_bU[U];
1223 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1224 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1227 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1228 r, g, b, y, target, hasAlpha);
1232 static av_always_inline void
1233 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1234 const int16_t *ubuf[2], const int16_t *vbuf[2],
1235 const int16_t *abuf0, uint8_t *dest, int dstW,
1236 int uvalpha, int y, enum PixelFormat target,
1239 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1240 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1243 if (uvalpha < 2048) {
1244 for (i = 0; i < (dstW >> 1); i++) {
1245 int Y1 = buf0[i * 2] >> 7;
1246 int Y2 = buf0[i * 2 + 1] >> 7;
1247 int U = ubuf1[i] >> 7;
1248 int V = vbuf1[i] >> 7;
1250 const void *r = c->table_rV[V],
1251 *g = (c->table_gU[U] + c->table_gV[V]),
1252 *b = c->table_bU[U];
1255 A1 = abuf0[i * 2 ] >> 7;
1256 A2 = abuf0[i * 2 + 1] >> 7;
1259 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1260 r, g, b, y, target, hasAlpha);
1263 for (i = 0; i < (dstW >> 1); i++) {
1264 int Y1 = buf0[i * 2] >> 7;
1265 int Y2 = buf0[i * 2 + 1] >> 7;
1266 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1267 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1269 const void *r = c->table_rV[V],
1270 *g = (c->table_gU[U] + c->table_gV[V]),
1271 *b = c->table_bU[U];
1274 A1 = abuf0[i * 2 ] >> 7;
1275 A2 = abuf0[i * 2 + 1] >> 7;
1278 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1279 r, g, b, y, target, hasAlpha);
1284 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1285 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1286 const int16_t **lumSrc, int lumFilterSize, \
1287 const int16_t *chrFilter, const int16_t **chrUSrc, \
1288 const int16_t **chrVSrc, int chrFilterSize, \
1289 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1292 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1293 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1294 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1296 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1297 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1298 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1299 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1300 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1301 int yalpha, int uvalpha, int y) \
1303 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1304 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1307 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1308 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1309 const int16_t *abuf0, uint8_t *dest, int dstW, \
1310 int uvalpha, int y) \
1312 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1313 dstW, uvalpha, y, fmt, hasAlpha); \
1317 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1318 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1320 #if CONFIG_SWSCALE_ALPHA
1321 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1322 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1324 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1325 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1327 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1328 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1329 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1330 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1331 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1332 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1333 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1334 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1336 static av_always_inline void
1337 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1338 const int16_t **lumSrc, int lumFilterSize,
1339 const int16_t *chrFilter, const int16_t **chrUSrc,
1340 const int16_t **chrVSrc, int chrFilterSize,
1341 const int16_t **alpSrc, uint8_t *dest,
1342 int dstW, int y, enum PixelFormat target, int hasAlpha)
1345 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1347 for (i = 0; i < dstW; i++) {
1355 for (j = 0; j < lumFilterSize; j++) {
1356 Y += lumSrc[j][i] * lumFilter[j];
1358 for (j = 0; j < chrFilterSize; j++) {
1359 U += chrUSrc[j][i] * chrFilter[j];
1360 V += chrVSrc[j][i] * chrFilter[j];
1367 for (j = 0; j < lumFilterSize; j++) {
1368 A += alpSrc[j][i] * lumFilter[j];
1372 A = av_clip_uint8(A);
1374 Y -= c->yuv2rgb_y_offset;
1375 Y *= c->yuv2rgb_y_coeff;
1377 R = Y + V*c->yuv2rgb_v2r_coeff;
1378 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1379 B = Y + U*c->yuv2rgb_u2b_coeff;
1380 if ((R | G | B) & 0xC0000000) {
1381 R = av_clip_uintp2(R, 30);
1382 G = av_clip_uintp2(G, 30);
1383 B = av_clip_uintp2(B, 30);
1388 dest[0] = hasAlpha ? A : 255;
1402 dest[3] = hasAlpha ? A : 255;
1405 dest[0] = hasAlpha ? A : 255;
1420 dest[3] = hasAlpha ? A : 255;
1428 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1429 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1430 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1431 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1433 #if CONFIG_SWSCALE_ALPHA
1434 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1435 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1436 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1437 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1439 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1440 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1441 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1442 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1444 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1445 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1447 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1448 int width, int height,
1452 uint8_t *ptr = plane + stride*y;
1453 for (i=0; i<height; i++) {
1454 memset(ptr, val, width);
1459 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1461 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1462 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1464 static av_always_inline void
1465 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1466 enum PixelFormat origin)
1469 for (i = 0; i < width; i++) {
1470 unsigned int r_b = input_pixel(&src[i*3+0]);
1471 unsigned int g = input_pixel(&src[i*3+1]);
1472 unsigned int b_r = input_pixel(&src[i*3+2]);
1474 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1478 static av_always_inline void
1479 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1480 const uint16_t *src1, const uint16_t *src2,
1481 int width, enum PixelFormat origin)
1485 for (i = 0; i < width; i++) {
1486 int r_b = input_pixel(&src1[i*3+0]);
1487 int g = input_pixel(&src1[i*3+1]);
1488 int b_r = input_pixel(&src1[i*3+2]);
1490 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1491 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1495 static av_always_inline void
1496 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1497 const uint16_t *src1, const uint16_t *src2,
1498 int width, enum PixelFormat origin)
1502 for (i = 0; i < width; i++) {
1503 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1504 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1505 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1507 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1508 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1516 #define rgb48funcs(pattern, BE_LE, origin) \
1517 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1518 int width, uint32_t *unused) \
1520 const uint16_t *src = (const uint16_t *) _src; \
1521 uint16_t *dst = (uint16_t *) _dst; \
1522 rgb48ToY_c_template(dst, src, width, origin); \
1525 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1526 const uint8_t *_src1, const uint8_t *_src2, \
1527 int width, uint32_t *unused) \
1529 const uint16_t *src1 = (const uint16_t *) _src1, \
1530 *src2 = (const uint16_t *) _src2; \
1531 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1532 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1535 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1536 const uint8_t *_src1, const uint8_t *_src2, \
1537 int width, uint32_t *unused) \
1539 const uint16_t *src1 = (const uint16_t *) _src1, \
1540 *src2 = (const uint16_t *) _src2; \
1541 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1542 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1545 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1546 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1547 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1548 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1550 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1551 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1552 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1554 static av_always_inline void
1555 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1556 int width, enum PixelFormat origin,
1557 int shr, int shg, int shb, int shp,
1558 int maskr, int maskg, int maskb,
1559 int rsh, int gsh, int bsh, int S)
1561 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1562 rnd = 33 << (S - 1);
1565 for (i = 0; i < width; i++) {
1566 int px = input_pixel(i) >> shp;
1567 int b = (px & maskb) >> shb;
1568 int g = (px & maskg) >> shg;
1569 int r = (px & maskr) >> shr;
1571 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1575 static av_always_inline void
1576 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1577 const uint8_t *src, int width,
1578 enum PixelFormat origin,
1579 int shr, int shg, int shb, int shp,
1580 int maskr, int maskg, int maskb,
1581 int rsh, int gsh, int bsh, int S)
1583 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1584 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1585 rnd = 257 << (S - 1);
1588 for (i = 0; i < width; i++) {
1589 int px = input_pixel(i) >> shp;
1590 int b = (px & maskb) >> shb;
1591 int g = (px & maskg) >> shg;
1592 int r = (px & maskr) >> shr;
1594 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1595 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1599 static av_always_inline void
1600 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1601 const uint8_t *src, int width,
1602 enum PixelFormat origin,
1603 int shr, int shg, int shb, int shp,
1604 int maskr, int maskg, int maskb,
1605 int rsh, int gsh, int bsh, int S)
1607 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1608 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1609 rnd = 257 << S, maskgx = ~(maskr | maskb);
1612 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1613 for (i = 0; i < width; i++) {
1614 int px0 = input_pixel(2 * i + 0) >> shp;
1615 int px1 = input_pixel(2 * i + 1) >> shp;
1616 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1617 int rb = px0 + px1 - g;
1619 b = (rb & maskb) >> shb;
1620 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1621 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1624 g = (g & maskg) >> shg;
1626 r = (rb & maskr) >> shr;
1628 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1629 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1635 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1636 maskg, maskb, rsh, gsh, bsh, S) \
1637 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1638 int width, uint32_t *unused) \
1640 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1641 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1644 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1645 const uint8_t *src, const uint8_t *dummy, \
1646 int width, uint32_t *unused) \
1648 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1649 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1652 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1653 const uint8_t *src, const uint8_t *dummy, \
1654 int width, uint32_t *unused) \
1656 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1657 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1660 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1661 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1662 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1663 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1664 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1665 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1666 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1667 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1668 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1669 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1670 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1671 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1673 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1676 for (i=0; i<width; i++) {
1681 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1684 for (i=0; i<width; i++) {
1689 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1692 for (i=0; i<width; i++) {
1695 dst[i]= pal[d] & 0xFF;
1699 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1700 const uint8_t *src1, const uint8_t *src2,
1701 int width, uint32_t *pal)
1704 assert(src1 == src2);
1705 for (i=0; i<width; i++) {
1706 int p= pal[src1[i]];
1713 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1714 int width, uint32_t *unused)
1717 for (i=0; i<width/8; i++) {
1720 dst[8*i+j]= ((d>>(7-j))&1)*255;
1724 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1725 int width, uint32_t *unused)
1728 for (i=0; i<width/8; i++) {
1731 dst[8*i+j]= ((d>>(7-j))&1)*255;
1735 //FIXME yuy2* can read up to 7 samples too much
1737 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1741 for (i=0; i<width; i++)
1745 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1746 const uint8_t *src2, int width, uint32_t *unused)
1749 for (i=0; i<width; i++) {
1750 dstU[i]= src1[4*i + 1];
1751 dstV[i]= src1[4*i + 3];
1753 assert(src1 == src2);
1756 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1759 const uint16_t *src = (const uint16_t *) _src;
1760 uint16_t *dst = (uint16_t *) _dst;
1761 for (i=0; i<width; i++) {
1762 dst[i] = av_bswap16(src[i]);
1766 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1767 const uint8_t *_src2, int width, uint32_t *unused)
1770 const uint16_t *src1 = (const uint16_t *) _src1,
1771 *src2 = (const uint16_t *) _src2;
1772 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1773 for (i=0; i<width; i++) {
1774 dstU[i] = av_bswap16(src1[i]);
1775 dstV[i] = av_bswap16(src2[i]);
1779 /* This is almost identical to the previous, end exists only because
1780 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1781 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1785 for (i=0; i<width; i++)
1789 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1790 const uint8_t *src2, int width, uint32_t *unused)
1793 for (i=0; i<width; i++) {
1794 dstU[i]= src1[4*i + 0];
1795 dstV[i]= src1[4*i + 2];
1797 assert(src1 == src2);
1800 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1801 const uint8_t *src, int width)
1804 for (i = 0; i < width; i++) {
1805 dst1[i] = src[2*i+0];
1806 dst2[i] = src[2*i+1];
1810 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1811 const uint8_t *src1, const uint8_t *src2,
1812 int width, uint32_t *unused)
1814 nvXXtoUV_c(dstU, dstV, src1, width);
1817 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1818 const uint8_t *src1, const uint8_t *src2,
1819 int width, uint32_t *unused)
1821 nvXXtoUV_c(dstV, dstU, src1, width);
1824 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1826 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1827 int width, uint32_t *unused)
1830 for (i=0; i<width; i++) {
1835 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1839 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1840 const uint8_t *src2, int width, uint32_t *unused)
1843 for (i=0; i<width; i++) {
1844 int b= src1[3*i + 0];
1845 int g= src1[3*i + 1];
1846 int r= src1[3*i + 2];
1848 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1849 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1851 assert(src1 == src2);
1854 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1855 const uint8_t *src2, int width, uint32_t *unused)
1858 for (i=0; i<width; i++) {
1859 int b= src1[6*i + 0] + src1[6*i + 3];
1860 int g= src1[6*i + 1] + src1[6*i + 4];
1861 int r= src1[6*i + 2] + src1[6*i + 5];
1863 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1864 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1866 assert(src1 == src2);
1869 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1873 for (i=0; i<width; i++) {
1878 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1882 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1883 const uint8_t *src2, int width, uint32_t *unused)
1887 for (i=0; i<width; i++) {
1888 int r= src1[3*i + 0];
1889 int g= src1[3*i + 1];
1890 int b= src1[3*i + 2];
1892 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1893 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1897 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1898 const uint8_t *src2, int width, uint32_t *unused)
1902 for (i=0; i<width; i++) {
1903 int r= src1[6*i + 0] + src1[6*i + 3];
1904 int g= src1[6*i + 1] + src1[6*i + 4];
1905 int b= src1[6*i + 2] + src1[6*i + 5];
1907 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1908 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1912 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1913 const int16_t *filter,
1914 const int16_t *filterPos, int filterSize)
1917 int32_t *dst = (int32_t *) _dst;
1918 const uint16_t *src = (const uint16_t *) _src;
1919 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1920 int sh = (bits <= 7) ? 11 : (bits - 4);
1922 for (i = 0; i < dstW; i++) {
1924 int srcPos = filterPos[i];
1927 for (j = 0; j < filterSize; j++) {
1928 val += src[srcPos + j] * filter[filterSize * i + j];
1930 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1931 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1935 static void hScale10_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1936 const int16_t *filter,
1937 const int16_t *filterPos, int filterSize)
1940 const uint16_t *src = (const uint16_t *) _src;
1941 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1943 for (i = 0; i < dstW; i++) {
1945 int srcPos = filterPos[i];
1948 for (j = 0; j < filterSize; j++) {
1949 val += src[srcPos + j] * filter[filterSize * i + j];
1951 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1952 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1956 // bilinear / bicubic scaling
1957 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1958 const int16_t *filter, const int16_t *filterPos,
1962 for (i=0; i<dstW; i++) {
1964 int srcPos= filterPos[i];
1966 for (j=0; j<filterSize; j++) {
1967 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1969 //filter += hFilterSize;
1970 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1975 //FIXME all pal and rgb srcFormats could do this convertion as well
1976 //FIXME all scalers more complex than bilinear could do half of this transform
1977 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1980 for (i = 0; i < width; i++) {
1981 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1982 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1985 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1988 for (i = 0; i < width; i++) {
1989 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1990 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1993 static void lumRangeToJpeg_c(int16_t *dst, int width)
1996 for (i = 0; i < width; i++)
1997 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1999 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2002 for (i = 0; i < width; i++)
2003 dst[i] = (dst[i]*14071 + 33561947)>>14;
2006 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2009 int32_t *dstU = (int32_t *) _dstU;
2010 int32_t *dstV = (int32_t *) _dstV;
2011 for (i = 0; i < width; i++) {
2012 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2013 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2016 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2019 int32_t *dstU = (int32_t *) _dstU;
2020 int32_t *dstV = (int32_t *) _dstV;
2021 for (i = 0; i < width; i++) {
2022 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2023 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2026 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2029 int32_t *dst = (int32_t *) _dst;
2030 for (i = 0; i < width; i++)
2031 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2033 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2036 int32_t *dst = (int32_t *) _dst;
2037 for (i = 0; i < width; i++)
2038 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2041 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2042 const uint8_t *src, int srcW, int xInc)
2045 unsigned int xpos=0;
2046 for (i=0;i<dstWidth;i++) {
2047 register unsigned int xx=xpos>>16;
2048 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2049 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2054 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
2057 uint8_t *dst = (uint8_t *) _dst;
2058 for (i = len - 1; i >= 0; i--) {
2059 dst[i * 2] = dst[i * 2 + 1] = src[i];
2063 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
2066 for (i = 0; i < len; i++) {
2067 dst[i] = src[i] >> 4;
2071 // *** horizontal scale Y line to temp buffer
2072 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2073 const uint8_t *src, int srcW, int xInc,
2074 const int16_t *hLumFilter,
2075 const int16_t *hLumFilterPos, int hLumFilterSize,
2076 uint8_t *formatConvBuffer,
2077 uint32_t *pal, int isAlpha)
2079 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2080 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2083 toYV12(formatConvBuffer, src, srcW, pal);
2084 src= formatConvBuffer;
2087 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2088 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2089 src = formatConvBuffer;
2092 if (!c->hyscale_fast) {
2093 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2094 } else { // fast bilinear upscale / crap downscale
2095 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2099 convertRange(dst, dstWidth);
2101 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) {
2102 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2106 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2107 int dstWidth, const uint8_t *src1,
2108 const uint8_t *src2, int srcW, int xInc)
2111 unsigned int xpos=0;
2112 for (i=0;i<dstWidth;i++) {
2113 register unsigned int xx=xpos>>16;
2114 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2115 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2116 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2121 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2122 const uint8_t *src1, const uint8_t *src2,
2123 int srcW, int xInc, const int16_t *hChrFilter,
2124 const int16_t *hChrFilterPos, int hChrFilterSize,
2125 uint8_t *formatConvBuffer, uint32_t *pal)
2128 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->scalingBpp, 8) >> 3, 16);
2129 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2130 src1= formatConvBuffer;
2134 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2135 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
2136 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2137 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2138 src1 = formatConvBuffer;
2142 if (!c->hcscale_fast) {
2143 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2144 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2145 } else { // fast bilinear upscale / crap downscale
2146 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2149 if (c->chrConvertRange)
2150 c->chrConvertRange(dst1, dst2, dstWidth);
2152 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) {
2153 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2154 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2158 static av_always_inline void
2159 find_c_packed_planar_out_funcs(SwsContext *c,
2160 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2161 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2162 yuv2packedX_fn *yuv2packedX)
2164 enum PixelFormat dstFormat = c->dstFormat;
2166 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2167 *yuv2yuvX = yuv2nv12X_c;
2168 } else if (is16BPS(dstFormat)) {
2169 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2170 } else if (is9_OR_10BPS(dstFormat)) {
2171 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2172 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2174 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2177 *yuv2yuv1 = yuv2yuv1_c;
2178 *yuv2yuvX = yuv2yuvX_c;
2180 if(c->flags & SWS_FULL_CHR_H_INT) {
2181 switch (dstFormat) {
2184 *yuv2packedX = yuv2rgba32_full_X_c;
2186 #if CONFIG_SWSCALE_ALPHA
2188 *yuv2packedX = yuv2rgba32_full_X_c;
2190 #endif /* CONFIG_SWSCALE_ALPHA */
2192 *yuv2packedX = yuv2rgbx32_full_X_c;
2194 #endif /* !CONFIG_SMALL */
2198 *yuv2packedX = yuv2argb32_full_X_c;
2200 #if CONFIG_SWSCALE_ALPHA
2202 *yuv2packedX = yuv2argb32_full_X_c;
2204 #endif /* CONFIG_SWSCALE_ALPHA */
2206 *yuv2packedX = yuv2xrgb32_full_X_c;
2208 #endif /* !CONFIG_SMALL */
2212 *yuv2packedX = yuv2bgra32_full_X_c;
2214 #if CONFIG_SWSCALE_ALPHA
2216 *yuv2packedX = yuv2bgra32_full_X_c;
2218 #endif /* CONFIG_SWSCALE_ALPHA */
2220 *yuv2packedX = yuv2bgrx32_full_X_c;
2222 #endif /* !CONFIG_SMALL */
2226 *yuv2packedX = yuv2abgr32_full_X_c;
2228 #if CONFIG_SWSCALE_ALPHA
2230 *yuv2packedX = yuv2abgr32_full_X_c;
2232 #endif /* CONFIG_SWSCALE_ALPHA */
2234 *yuv2packedX = yuv2xbgr32_full_X_c;
2236 #endif /* !CONFIG_SMALL */
2239 *yuv2packedX = yuv2rgb24_full_X_c;
2242 *yuv2packedX = yuv2bgr24_full_X_c;
2246 switch (dstFormat) {
2247 case PIX_FMT_GRAY16BE:
2248 *yuv2packed1 = yuv2gray16BE_1_c;
2249 *yuv2packed2 = yuv2gray16BE_2_c;
2250 *yuv2packedX = yuv2gray16BE_X_c;
2252 case PIX_FMT_GRAY16LE:
2253 *yuv2packed1 = yuv2gray16LE_1_c;
2254 *yuv2packed2 = yuv2gray16LE_2_c;
2255 *yuv2packedX = yuv2gray16LE_X_c;
2257 case PIX_FMT_MONOWHITE:
2258 *yuv2packed1 = yuv2monowhite_1_c;
2259 *yuv2packed2 = yuv2monowhite_2_c;
2260 *yuv2packedX = yuv2monowhite_X_c;
2262 case PIX_FMT_MONOBLACK:
2263 *yuv2packed1 = yuv2monoblack_1_c;
2264 *yuv2packed2 = yuv2monoblack_2_c;
2265 *yuv2packedX = yuv2monoblack_X_c;
2267 case PIX_FMT_YUYV422:
2268 *yuv2packed1 = yuv2yuyv422_1_c;
2269 *yuv2packed2 = yuv2yuyv422_2_c;
2270 *yuv2packedX = yuv2yuyv422_X_c;
2272 case PIX_FMT_UYVY422:
2273 *yuv2packed1 = yuv2uyvy422_1_c;
2274 *yuv2packed2 = yuv2uyvy422_2_c;
2275 *yuv2packedX = yuv2uyvy422_X_c;
2277 case PIX_FMT_RGB48LE:
2278 *yuv2packed1 = yuv2rgb48le_1_c;
2279 *yuv2packed2 = yuv2rgb48le_2_c;
2280 *yuv2packedX = yuv2rgb48le_X_c;
2282 case PIX_FMT_RGB48BE:
2283 *yuv2packed1 = yuv2rgb48be_1_c;
2284 *yuv2packed2 = yuv2rgb48be_2_c;
2285 *yuv2packedX = yuv2rgb48be_X_c;
2287 case PIX_FMT_BGR48LE:
2288 *yuv2packed1 = yuv2bgr48le_1_c;
2289 *yuv2packed2 = yuv2bgr48le_2_c;
2290 *yuv2packedX = yuv2bgr48le_X_c;
2292 case PIX_FMT_BGR48BE:
2293 *yuv2packed1 = yuv2bgr48be_1_c;
2294 *yuv2packed2 = yuv2bgr48be_2_c;
2295 *yuv2packedX = yuv2bgr48be_X_c;
2300 *yuv2packed1 = yuv2rgb32_1_c;
2301 *yuv2packed2 = yuv2rgb32_2_c;
2302 *yuv2packedX = yuv2rgb32_X_c;
2304 #if CONFIG_SWSCALE_ALPHA
2306 *yuv2packed1 = yuv2rgba32_1_c;
2307 *yuv2packed2 = yuv2rgba32_2_c;
2308 *yuv2packedX = yuv2rgba32_X_c;
2310 #endif /* CONFIG_SWSCALE_ALPHA */
2312 *yuv2packed1 = yuv2rgbx32_1_c;
2313 *yuv2packed2 = yuv2rgbx32_2_c;
2314 *yuv2packedX = yuv2rgbx32_X_c;
2316 #endif /* !CONFIG_SMALL */
2318 case PIX_FMT_RGB32_1:
2319 case PIX_FMT_BGR32_1:
2321 *yuv2packed1 = yuv2rgb32_1_1_c;
2322 *yuv2packed2 = yuv2rgb32_1_2_c;
2323 *yuv2packedX = yuv2rgb32_1_X_c;
2325 #if CONFIG_SWSCALE_ALPHA
2327 *yuv2packed1 = yuv2rgba32_1_1_c;
2328 *yuv2packed2 = yuv2rgba32_1_2_c;
2329 *yuv2packedX = yuv2rgba32_1_X_c;
2331 #endif /* CONFIG_SWSCALE_ALPHA */
2333 *yuv2packed1 = yuv2rgbx32_1_1_c;
2334 *yuv2packed2 = yuv2rgbx32_1_2_c;
2335 *yuv2packedX = yuv2rgbx32_1_X_c;
2337 #endif /* !CONFIG_SMALL */
2340 *yuv2packed1 = yuv2rgb24_1_c;
2341 *yuv2packed2 = yuv2rgb24_2_c;
2342 *yuv2packedX = yuv2rgb24_X_c;
2345 *yuv2packed1 = yuv2bgr24_1_c;
2346 *yuv2packed2 = yuv2bgr24_2_c;
2347 *yuv2packedX = yuv2bgr24_X_c;
2349 case PIX_FMT_RGB565LE:
2350 case PIX_FMT_RGB565BE:
2351 case PIX_FMT_BGR565LE:
2352 case PIX_FMT_BGR565BE:
2353 *yuv2packed1 = yuv2rgb16_1_c;
2354 *yuv2packed2 = yuv2rgb16_2_c;
2355 *yuv2packedX = yuv2rgb16_X_c;
2357 case PIX_FMT_RGB555LE:
2358 case PIX_FMT_RGB555BE:
2359 case PIX_FMT_BGR555LE:
2360 case PIX_FMT_BGR555BE:
2361 *yuv2packed1 = yuv2rgb15_1_c;
2362 *yuv2packed2 = yuv2rgb15_2_c;
2363 *yuv2packedX = yuv2rgb15_X_c;
2365 case PIX_FMT_RGB444LE:
2366 case PIX_FMT_RGB444BE:
2367 case PIX_FMT_BGR444LE:
2368 case PIX_FMT_BGR444BE:
2369 *yuv2packed1 = yuv2rgb12_1_c;
2370 *yuv2packed2 = yuv2rgb12_2_c;
2371 *yuv2packedX = yuv2rgb12_X_c;
2375 *yuv2packed1 = yuv2rgb8_1_c;
2376 *yuv2packed2 = yuv2rgb8_2_c;
2377 *yuv2packedX = yuv2rgb8_X_c;
2381 *yuv2packed1 = yuv2rgb4_1_c;
2382 *yuv2packed2 = yuv2rgb4_2_c;
2383 *yuv2packedX = yuv2rgb4_X_c;
2385 case PIX_FMT_RGB4_BYTE:
2386 case PIX_FMT_BGR4_BYTE:
2387 *yuv2packed1 = yuv2rgb4b_1_c;
2388 *yuv2packed2 = yuv2rgb4b_2_c;
2389 *yuv2packedX = yuv2rgb4b_X_c;
2395 #define DEBUG_SWSCALE_BUFFERS 0
2396 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2398 static int swScale(SwsContext *c, const uint8_t* src[],
2399 int srcStride[], int srcSliceY,
2400 int srcSliceH, uint8_t* dst[], int dstStride[])
2402 /* load a few things into local vars to make the code more readable? and faster */
2403 const int srcW= c->srcW;
2404 const int dstW= c->dstW;
2405 const int dstH= c->dstH;
2406 const int chrDstW= c->chrDstW;
2407 const int chrSrcW= c->chrSrcW;
2408 const int lumXInc= c->lumXInc;
2409 const int chrXInc= c->chrXInc;
2410 const enum PixelFormat dstFormat= c->dstFormat;
2411 const int flags= c->flags;
2412 int16_t *vLumFilterPos= c->vLumFilterPos;
2413 int16_t *vChrFilterPos= c->vChrFilterPos;
2414 int16_t *hLumFilterPos= c->hLumFilterPos;
2415 int16_t *hChrFilterPos= c->hChrFilterPos;
2416 int16_t *vLumFilter= c->vLumFilter;
2417 int16_t *vChrFilter= c->vChrFilter;
2418 int16_t *hLumFilter= c->hLumFilter;
2419 int16_t *hChrFilter= c->hChrFilter;
2420 int32_t *lumMmxFilter= c->lumMmxFilter;
2421 int32_t *chrMmxFilter= c->chrMmxFilter;
2422 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2423 const int vLumFilterSize= c->vLumFilterSize;
2424 const int vChrFilterSize= c->vChrFilterSize;
2425 const int hLumFilterSize= c->hLumFilterSize;
2426 const int hChrFilterSize= c->hChrFilterSize;
2427 int16_t **lumPixBuf= c->lumPixBuf;
2428 int16_t **chrUPixBuf= c->chrUPixBuf;
2429 int16_t **chrVPixBuf= c->chrVPixBuf;
2430 int16_t **alpPixBuf= c->alpPixBuf;
2431 const int vLumBufSize= c->vLumBufSize;
2432 const int vChrBufSize= c->vChrBufSize;
2433 uint8_t *formatConvBuffer= c->formatConvBuffer;
2434 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2435 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2437 uint32_t *pal=c->pal_yuv;
2438 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2439 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2440 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2441 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2442 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2443 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2445 /* vars which will change and which we need to store back in the context */
2447 int lumBufIndex= c->lumBufIndex;
2448 int chrBufIndex= c->chrBufIndex;
2449 int lastInLumBuf= c->lastInLumBuf;
2450 int lastInChrBuf= c->lastInChrBuf;
2452 if (isPacked(c->srcFormat)) {
2460 srcStride[3]= srcStride[0];
2462 srcStride[1]<<= c->vChrDrop;
2463 srcStride[2]<<= c->vChrDrop;
2465 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2466 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2467 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2468 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2469 srcSliceY, srcSliceH, dstY, dstH);
2470 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2471 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2473 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2474 static int warnedAlready=0; //FIXME move this into the context perhaps
2475 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2476 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2477 " ->cannot do aligned memory accesses anymore\n");
2482 /* Note the user might start scaling the picture in the middle so this
2483 will not get executed. This is not really intended but works
2484 currently, so people might do it. */
2485 if (srcSliceY ==0) {
2493 if (!should_dither) {
2494 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2498 for (;dstY < dstH; dstY++) {
2499 const int chrDstY= dstY>>c->chrDstVSubSample;
2500 uint8_t *dest[4] = {
2501 dst[0] + dstStride[0] * dstY,
2502 dst[1] + dstStride[1] * chrDstY,
2503 dst[2] + dstStride[2] * chrDstY,
2504 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2507 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2508 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2509 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2510 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2511 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2512 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2515 //handle holes (FAST_BILINEAR & weird filters)
2516 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2517 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2518 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2519 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2521 DEBUG_BUFFERS("dstY: %d\n", dstY);
2522 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2523 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2524 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2525 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2527 // Do we have enough lines in this slice to output the dstY line
2528 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2530 if (!enough_lines) {
2531 lastLumSrcY = srcSliceY + srcSliceH - 1;
2532 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2533 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2534 lastLumSrcY, lastChrSrcY);
2537 //Do horizontal scaling
2538 while(lastInLumBuf < lastLumSrcY) {
2539 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2540 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2542 assert(lumBufIndex < 2*vLumBufSize);
2543 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2544 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2545 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2546 hLumFilter, hLumFilterPos, hLumFilterSize,
2549 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2550 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2551 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2555 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2556 lumBufIndex, lastInLumBuf);
2558 while(lastInChrBuf < lastChrSrcY) {
2559 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2560 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2562 assert(chrBufIndex < 2*vChrBufSize);
2563 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2564 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2565 //FIXME replace parameters through context struct (some at least)
2567 if (c->needs_hcscale)
2568 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2569 chrDstW, src1, src2, chrSrcW, chrXInc,
2570 hChrFilter, hChrFilterPos, hChrFilterSize,
2571 formatConvBuffer, pal);
2573 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2574 chrBufIndex, lastInChrBuf);
2576 //wrap buf index around to stay inside the ring buffer
2577 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2578 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2580 break; //we can't output a dstY line so let's try with the next slice
2583 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2585 if (should_dither) {
2586 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2587 c->lumDither8 = dither_8x8_128[dstY & 7];
2589 if (dstY >= dstH-2) {
2590 // hmm looks like we can't use MMX here without overwriting this array's tail
2591 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2592 &yuv2packed1, &yuv2packed2,
2597 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2598 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2599 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2600 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2601 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2602 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2603 if ((dstY&chrSkipMask) || isGray(dstFormat))
2604 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2605 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2606 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2607 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2608 dest, dstW, chrDstW);
2609 } else { //General YV12
2610 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2611 lumSrcPtr, vLumFilterSize,
2612 vChrFilter + chrDstY * vChrFilterSize,
2613 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2614 alpSrcPtr, dest, dstW, chrDstW);
2617 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2618 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2619 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2620 int chrAlpha = vChrFilter[2 * dstY + 1];
2621 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2622 alpPixBuf ? *alpSrcPtr : NULL,
2623 dest[0], dstW, chrAlpha, dstY);
2624 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2625 int lumAlpha = vLumFilter[2 * dstY + 1];
2626 int chrAlpha = vChrFilter[2 * dstY + 1];
2628 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2630 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2631 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2632 alpPixBuf ? alpSrcPtr : NULL,
2633 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2634 } else { //general RGB
2635 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2636 lumSrcPtr, vLumFilterSize,
2637 vChrFilter + dstY * vChrFilterSize,
2638 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2639 alpSrcPtr, dest[0], dstW, dstY);
2645 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2646 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2649 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2650 __asm__ volatile("sfence":::"memory");
2654 /* store changed local vars back in the context */
2656 c->lumBufIndex= lumBufIndex;
2657 c->chrBufIndex= chrBufIndex;
2658 c->lastInLumBuf= lastInLumBuf;
2659 c->lastInChrBuf= lastInChrBuf;
2661 return dstY - lastDstY;
2664 static av_cold void sws_init_swScale_c(SwsContext *c)
2666 enum PixelFormat srcFormat = c->srcFormat;
2668 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2669 &c->yuv2packed1, &c->yuv2packed2,
2672 c->chrToYV12 = NULL;
2674 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2675 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2676 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2677 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2681 case PIX_FMT_BGR4_BYTE:
2682 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2684 case PIX_FMT_YUV444P9LE:
2685 case PIX_FMT_YUV420P9LE:
2686 case PIX_FMT_YUV422P10LE:
2687 case PIX_FMT_YUV444P10LE:
2688 case PIX_FMT_YUV420P10LE:
2689 case PIX_FMT_YUV420P16LE:
2690 case PIX_FMT_YUV422P16LE:
2691 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2693 case PIX_FMT_YUV444P9BE:
2694 case PIX_FMT_YUV420P9BE:
2695 case PIX_FMT_YUV444P10BE:
2696 case PIX_FMT_YUV422P10BE:
2697 case PIX_FMT_YUV420P10BE:
2698 case PIX_FMT_YUV420P16BE:
2699 case PIX_FMT_YUV422P16BE:
2700 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2703 if (c->chrSrcHSubSample) {
2705 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2706 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2707 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2708 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2709 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2710 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2711 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2712 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2713 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2714 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2715 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2716 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2717 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2718 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2719 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2720 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2721 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2722 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2726 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2727 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2728 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2729 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2730 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2731 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2732 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2733 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2734 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2735 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2736 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2737 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2738 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2739 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2740 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2741 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2742 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2743 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2747 c->lumToYV12 = NULL;
2748 c->alpToYV12 = NULL;
2749 switch (srcFormat) {
2751 case PIX_FMT_YUV444P9LE:
2752 case PIX_FMT_YUV420P9LE:
2753 case PIX_FMT_YUV444P10LE:
2754 case PIX_FMT_YUV422P10LE:
2755 case PIX_FMT_YUV420P10LE:
2756 case PIX_FMT_YUV420P16LE:
2757 case PIX_FMT_YUV422P16LE:
2758 case PIX_FMT_YUV444P16LE:
2759 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2761 case PIX_FMT_YUV444P9BE:
2762 case PIX_FMT_YUV420P9BE:
2763 case PIX_FMT_YUV444P10BE:
2764 case PIX_FMT_YUV422P10BE:
2765 case PIX_FMT_YUV420P10BE:
2766 case PIX_FMT_YUV420P16BE:
2767 case PIX_FMT_YUV422P16BE:
2768 case PIX_FMT_YUV444P16BE:
2769 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2771 case PIX_FMT_YUYV422 :
2772 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2773 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2774 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2775 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2776 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2777 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2778 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2779 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2780 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2781 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2782 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2783 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2787 case PIX_FMT_BGR4_BYTE:
2788 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2789 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2790 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2791 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2792 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2793 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2794 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2795 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2796 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2797 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2798 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2801 switch (srcFormat) {
2803 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2805 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2806 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2810 if (c->scalingBpp == 8) {
2811 c->hScale = hScale_c;
2812 if (c->flags & SWS_FAST_BILINEAR) {
2813 c->hyscale_fast = hyscale_fast_c;
2814 c->hcscale_fast = hcscale_fast_c;
2817 c->hScale = c->scalingBpp == 16 ? hScale16_c : hScale10_c;
2818 c->scale19To15Fw = scale19To15Fw_c;
2819 c->scale8To16Rv = scale8To16Rv_c;
2822 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2823 if (c->scalingBpp <= 10) {
2825 c->lumConvertRange = lumRangeFromJpeg_c;
2826 c->chrConvertRange = chrRangeFromJpeg_c;
2828 c->lumConvertRange = lumRangeToJpeg_c;
2829 c->chrConvertRange = chrRangeToJpeg_c;
2833 c->lumConvertRange = lumRangeFromJpeg16_c;
2834 c->chrConvertRange = chrRangeFromJpeg16_c;
2836 c->lumConvertRange = lumRangeToJpeg16_c;
2837 c->chrConvertRange = chrRangeToJpeg16_c;
2842 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2843 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2844 c->needs_hcscale = 1;
2847 SwsFunc ff_getSwsFunc(SwsContext *c)
2849 sws_init_swScale_c(c);
2852 ff_sws_init_swScale_mmx(c);
2854 ff_sws_init_swScale_altivec(c);