2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 static av_always_inline void
199 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
200 int lumFilterSize, const int16_t *chrFilter,
201 const int32_t **chrUSrc, const int32_t **chrVSrc,
202 int chrFilterSize, const int32_t **alpSrc,
203 uint16_t *dest[4], int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
209 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
210 int shift = 15 + 16 - output_bits - 1;
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uint16(val >> shift)); \
216 AV_WL16(pos, av_clip_uint16(val >> shift)); \
218 for (i = 0; i < dstW; i++) {
219 int val = 1 << (30-output_bits - 1);
222 for (j = 0; j < lumFilterSize; j++)
223 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
225 output_pixel(&yDest[i], val);
229 for (i = 0; i < chrDstW; i++) {
230 int u = 1 << (30-output_bits - 1);
231 int v = 1 << (30-output_bits - 1);
234 for (j = 0; j < chrFilterSize; j++) {
235 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
236 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
239 output_pixel(&uDest[i], u);
240 output_pixel(&vDest[i], v);
244 if (CONFIG_SWSCALE_ALPHA && aDest) {
245 for (i = 0; i < dstW; i++) {
246 int val = 1 << (30-output_bits - 1);
249 for (j = 0; j < lumFilterSize; j++)
250 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
252 output_pixel(&aDest[i], val);
258 static av_always_inline void
259 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
260 int lumFilterSize, const int16_t *chrFilter,
261 const int16_t **chrUSrc, const int16_t **chrVSrc,
262 int chrFilterSize, const int16_t **alpSrc,
263 uint16_t *dest[4], int dstW, int chrDstW,
264 int big_endian, int output_bits)
266 //FIXME Optimize (just quickly written not optimized..)
268 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
269 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
270 int shift = 11 + 16 - output_bits - 1;
272 #define output_pixel(pos, val) \
274 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
276 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
278 for (i = 0; i < dstW; i++) {
279 int val = 1 << (26-output_bits - 1);
282 for (j = 0; j < lumFilterSize; j++)
283 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
285 output_pixel(&yDest[i], val);
289 for (i = 0; i < chrDstW; i++) {
290 int u = 1 << (26-output_bits - 1);
291 int v = 1 << (26-output_bits - 1);
294 for (j = 0; j < chrFilterSize; j++) {
295 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
296 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
299 output_pixel(&uDest[i], u);
300 output_pixel(&vDest[i], v);
304 if (CONFIG_SWSCALE_ALPHA && aDest) {
305 for (i = 0; i < dstW; i++) {
306 int val = 1 << (26-output_bits - 1);
309 for (j = 0; j < lumFilterSize; j++)
310 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
312 output_pixel(&aDest[i], val);
318 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
319 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
320 const int16_t **_lumSrc, int lumFilterSize, \
321 const int16_t *chrFilter, const int16_t **_chrUSrc, \
322 const int16_t **_chrVSrc, \
323 int chrFilterSize, const int16_t **_alpSrc, \
324 uint8_t *_dest[4], int dstW, int chrDstW) \
326 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
327 **chrUSrc = (const typeX_t **) _chrUSrc, \
328 **chrVSrc = (const typeX_t **) _chrVSrc, \
329 **alpSrc = (const typeX_t **) _alpSrc; \
330 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
331 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
332 alpSrc, (uint16_t **) _dest, \
333 dstW, chrDstW, is_be, bits); \
335 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
336 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
337 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
338 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
339 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
340 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
342 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
343 const int16_t **lumSrc, int lumFilterSize,
344 const int16_t *chrFilter, const int16_t **chrUSrc,
345 const int16_t **chrVSrc,
346 int chrFilterSize, const int16_t **alpSrc,
347 uint8_t *dest[4], int dstW, int chrDstW)
349 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
350 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
352 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
354 //FIXME Optimize (just quickly written not optimized..)
355 for (i=0; i<dstW; i++) {
356 int val = lumDither[i & 7] << 12;
358 for (j=0; j<lumFilterSize; j++)
359 val += lumSrc[j][i] * lumFilter[j];
361 yDest[i]= av_clip_uint8(val>>19);
365 for (i=0; i<chrDstW; i++) {
366 int u = chrDither[i & 7] << 12;
367 int v = chrDither[(i + 3) & 7] << 12;
369 for (j=0; j<chrFilterSize; j++) {
370 u += chrUSrc[j][i] * chrFilter[j];
371 v += chrVSrc[j][i] * chrFilter[j];
374 uDest[i]= av_clip_uint8(u>>19);
375 vDest[i]= av_clip_uint8(v>>19);
378 if (CONFIG_SWSCALE_ALPHA && aDest)
379 for (i=0; i<dstW; i++) {
380 int val = lumDither[i & 7] << 12;
382 for (j=0; j<lumFilterSize; j++)
383 val += alpSrc[j][i] * lumFilter[j];
385 aDest[i]= av_clip_uint8(val>>19);
389 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
390 const int16_t *chrUSrc, const int16_t *chrVSrc,
391 const int16_t *alpSrc,
392 uint8_t *dest[4], int dstW, int chrDstW)
394 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
395 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
397 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
399 for (i=0; i<dstW; i++) {
400 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
401 yDest[i]= av_clip_uint8(val);
405 for (i=0; i<chrDstW; i++) {
406 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
407 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
408 uDest[i]= av_clip_uint8(u);
409 vDest[i]= av_clip_uint8(v);
412 if (CONFIG_SWSCALE_ALPHA && aDest)
413 for (i=0; i<dstW; i++) {
414 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
415 aDest[i]= av_clip_uint8(val);
419 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
420 const int16_t **lumSrc, int lumFilterSize,
421 const int16_t *chrFilter, const int16_t **chrUSrc,
422 const int16_t **chrVSrc, int chrFilterSize,
423 const int16_t **alpSrc, uint8_t *dest[4],
424 int dstW, int chrDstW)
426 uint8_t *yDest = dest[0], *uDest = dest[1];
427 enum PixelFormat dstFormat = c->dstFormat;
428 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
430 //FIXME Optimize (just quickly written not optimized..)
432 for (i=0; i<dstW; i++) {
433 int val = lumDither[i & 7] << 12;
435 for (j=0; j<lumFilterSize; j++)
436 val += lumSrc[j][i] * lumFilter[j];
438 yDest[i]= av_clip_uint8(val>>19);
444 if (dstFormat == PIX_FMT_NV12)
445 for (i=0; i<chrDstW; i++) {
446 int u = chrDither[i & 7] << 12;
447 int v = chrDither[(i + 3) & 7] << 12;
449 for (j=0; j<chrFilterSize; j++) {
450 u += chrUSrc[j][i] * chrFilter[j];
451 v += chrVSrc[j][i] * chrFilter[j];
454 uDest[2*i]= av_clip_uint8(u>>19);
455 uDest[2*i+1]= av_clip_uint8(v>>19);
458 for (i=0; i<chrDstW; i++) {
459 int u = chrDither[i & 7] << 12;
460 int v = chrDither[(i + 3) & 7] << 12;
462 for (j=0; j<chrFilterSize; j++) {
463 u += chrUSrc[j][i] * chrFilter[j];
464 v += chrVSrc[j][i] * chrFilter[j];
467 uDest[2*i]= av_clip_uint8(v>>19);
468 uDest[2*i+1]= av_clip_uint8(u>>19);
472 #define output_pixel(pos, val) \
473 if (target == PIX_FMT_GRAY16BE) { \
479 static av_always_inline void
480 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
481 const int32_t **lumSrc, int lumFilterSize,
482 const int16_t *chrFilter, const int32_t **chrUSrc,
483 const int32_t **chrVSrc, int chrFilterSize,
484 const int32_t **alpSrc, uint16_t *dest, int dstW,
485 int y, enum PixelFormat target)
489 for (i = 0; i < (dstW >> 1); i++) {
494 for (j = 0; j < lumFilterSize; j++) {
495 Y1 += lumSrc[j][i * 2] * lumFilter[j];
496 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
500 if ((Y1 | Y2) & 0x10000) {
501 Y1 = av_clip_uint16(Y1);
502 Y2 = av_clip_uint16(Y2);
504 output_pixel(&dest[i * 2 + 0], Y1);
505 output_pixel(&dest[i * 2 + 1], Y2);
509 static av_always_inline void
510 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
511 const int32_t *ubuf[2], const int32_t *vbuf[2],
512 const int32_t *abuf[2], uint16_t *dest, int dstW,
513 int yalpha, int uvalpha, int y,
514 enum PixelFormat target)
516 int yalpha1 = 4095 - yalpha;
518 const int32_t *buf0 = buf[0], *buf1 = buf[1];
520 for (i = 0; i < (dstW >> 1); i++) {
521 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
522 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
524 output_pixel(&dest[i * 2 + 0], Y1);
525 output_pixel(&dest[i * 2 + 1], Y2);
529 static av_always_inline void
530 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
531 const int32_t *ubuf[2], const int32_t *vbuf[2],
532 const int32_t *abuf0, uint16_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
537 for (i = 0; i < (dstW >> 1); i++) {
538 int Y1 = buf0[i * 2 ] << 1;
539 int Y2 = buf0[i * 2 + 1] << 1;
541 output_pixel(&dest[i * 2 + 0], Y1);
542 output_pixel(&dest[i * 2 + 1], Y2);
548 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
549 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
550 const int16_t **_lumSrc, int lumFilterSize, \
551 const int16_t *chrFilter, const int16_t **_chrUSrc, \
552 const int16_t **_chrVSrc, int chrFilterSize, \
553 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
556 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
557 **chrUSrc = (const int32_t **) _chrUSrc, \
558 **chrVSrc = (const int32_t **) _chrVSrc, \
559 **alpSrc = (const int32_t **) _alpSrc; \
560 uint16_t *dest = (uint16_t *) _dest; \
561 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
562 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
563 alpSrc, dest, dstW, y, fmt); \
566 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
567 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
568 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
569 int yalpha, int uvalpha, int y) \
571 const int32_t **buf = (const int32_t **) _buf, \
572 **ubuf = (const int32_t **) _ubuf, \
573 **vbuf = (const int32_t **) _vbuf, \
574 **abuf = (const int32_t **) _abuf; \
575 uint16_t *dest = (uint16_t *) _dest; \
576 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
577 dest, dstW, yalpha, uvalpha, y, fmt); \
580 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
581 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
582 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
583 int uvalpha, int y) \
585 const int32_t *buf0 = (const int32_t *) _buf0, \
586 **ubuf = (const int32_t **) _ubuf, \
587 **vbuf = (const int32_t **) _vbuf, \
588 *abuf0 = (const int32_t *) _abuf0; \
589 uint16_t *dest = (uint16_t *) _dest; \
590 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
591 dstW, uvalpha, y, fmt); \
594 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
595 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
597 #define output_pixel(pos, acc) \
598 if (target == PIX_FMT_MONOBLACK) { \
604 static av_always_inline void
605 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
606 const int16_t **lumSrc, int lumFilterSize,
607 const int16_t *chrFilter, const int16_t **chrUSrc,
608 const int16_t **chrVSrc, int chrFilterSize,
609 const int16_t **alpSrc, uint8_t *dest, int dstW,
610 int y, enum PixelFormat target)
612 const uint8_t * const d128=dither_8x8_220[y&7];
613 uint8_t *g = c->table_gU[128] + c->table_gV[128];
617 for (i = 0; i < dstW - 1; i += 2) {
622 for (j = 0; j < lumFilterSize; j++) {
623 Y1 += lumSrc[j][i] * lumFilter[j];
624 Y2 += lumSrc[j][i+1] * lumFilter[j];
628 if ((Y1 | Y2) & 0x100) {
629 Y1 = av_clip_uint8(Y1);
630 Y2 = av_clip_uint8(Y2);
632 acc += acc + g[Y1 + d128[(i + 0) & 7]];
633 acc += acc + g[Y2 + d128[(i + 1) & 7]];
635 output_pixel(*dest++, acc);
640 static av_always_inline void
641 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
642 const int16_t *ubuf[2], const int16_t *vbuf[2],
643 const int16_t *abuf[2], uint8_t *dest, int dstW,
644 int yalpha, int uvalpha, int y,
645 enum PixelFormat target)
647 const int16_t *buf0 = buf[0], *buf1 = buf[1];
648 const uint8_t * const d128 = dither_8x8_220[y & 7];
649 uint8_t *g = c->table_gU[128] + c->table_gV[128];
650 int yalpha1 = 4095 - yalpha;
653 for (i = 0; i < dstW - 7; i += 8) {
654 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
655 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
656 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
657 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
658 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
659 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
660 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
661 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
662 output_pixel(*dest++, acc);
666 static av_always_inline void
667 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
668 const int16_t *ubuf[2], const int16_t *vbuf[2],
669 const int16_t *abuf0, uint8_t *dest, int dstW,
670 int uvalpha, int y, enum PixelFormat target)
672 const uint8_t * const d128 = dither_8x8_220[y & 7];
673 uint8_t *g = c->table_gU[128] + c->table_gV[128];
676 for (i = 0; i < dstW - 7; i += 8) {
677 int acc = g[(buf0[i ] >> 7) + d128[0]];
678 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
679 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
680 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
681 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
682 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
683 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
684 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
685 output_pixel(*dest++, acc);
691 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
692 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
693 const int16_t **lumSrc, int lumFilterSize, \
694 const int16_t *chrFilter, const int16_t **chrUSrc, \
695 const int16_t **chrVSrc, int chrFilterSize, \
696 const int16_t **alpSrc, uint8_t *dest, int dstW, \
699 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
700 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
701 alpSrc, dest, dstW, y, fmt); \
704 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
705 const int16_t *ubuf[2], const int16_t *vbuf[2], \
706 const int16_t *abuf[2], uint8_t *dest, int dstW, \
707 int yalpha, int uvalpha, int y) \
709 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
710 dest, dstW, yalpha, uvalpha, y, fmt); \
713 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
714 const int16_t *ubuf[2], const int16_t *vbuf[2], \
715 const int16_t *abuf0, uint8_t *dest, int dstW, \
716 int uvalpha, int y) \
718 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
719 abuf0, dest, dstW, uvalpha, \
723 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
724 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
726 #define output_pixels(pos, Y1, U, Y2, V) \
727 if (target == PIX_FMT_YUYV422) { \
728 dest[pos + 0] = Y1; \
730 dest[pos + 2] = Y2; \
734 dest[pos + 1] = Y1; \
736 dest[pos + 3] = Y2; \
739 static av_always_inline void
740 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
741 const int16_t **lumSrc, int lumFilterSize,
742 const int16_t *chrFilter, const int16_t **chrUSrc,
743 const int16_t **chrVSrc, int chrFilterSize,
744 const int16_t **alpSrc, uint8_t *dest, int dstW,
745 int y, enum PixelFormat target)
749 for (i = 0; i < (dstW >> 1); i++) {
756 for (j = 0; j < lumFilterSize; j++) {
757 Y1 += lumSrc[j][i * 2] * lumFilter[j];
758 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
760 for (j = 0; j < chrFilterSize; j++) {
761 U += chrUSrc[j][i] * chrFilter[j];
762 V += chrVSrc[j][i] * chrFilter[j];
768 if ((Y1 | Y2 | U | V) & 0x100) {
769 Y1 = av_clip_uint8(Y1);
770 Y2 = av_clip_uint8(Y2);
771 U = av_clip_uint8(U);
772 V = av_clip_uint8(V);
774 output_pixels(4*i, Y1, U, Y2, V);
778 static av_always_inline void
779 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
780 const int16_t *ubuf[2], const int16_t *vbuf[2],
781 const int16_t *abuf[2], uint8_t *dest, int dstW,
782 int yalpha, int uvalpha, int y,
783 enum PixelFormat target)
785 const int16_t *buf0 = buf[0], *buf1 = buf[1],
786 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
787 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
788 int yalpha1 = 4095 - yalpha;
789 int uvalpha1 = 4095 - uvalpha;
792 for (i = 0; i < (dstW >> 1); i++) {
793 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
794 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
795 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
796 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
798 output_pixels(i * 4, Y1, U, Y2, V);
802 static av_always_inline void
803 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
804 const int16_t *ubuf[2], const int16_t *vbuf[2],
805 const int16_t *abuf0, uint8_t *dest, int dstW,
806 int uvalpha, int y, enum PixelFormat target)
808 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
809 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 if (uvalpha < 2048) {
813 for (i = 0; i < (dstW >> 1); i++) {
814 int Y1 = buf0[i * 2] >> 7;
815 int Y2 = buf0[i * 2 + 1] >> 7;
816 int U = ubuf1[i] >> 7;
817 int V = vbuf1[i] >> 7;
819 output_pixels(i * 4, Y1, U, Y2, V);
822 for (i = 0; i < (dstW >> 1); i++) {
823 int Y1 = buf0[i * 2] >> 7;
824 int Y2 = buf0[i * 2 + 1] >> 7;
825 int U = (ubuf0[i] + ubuf1[i]) >> 8;
826 int V = (vbuf0[i] + vbuf1[i]) >> 8;
828 output_pixels(i * 4, Y1, U, Y2, V);
835 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
836 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
838 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
839 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
840 #define output_pixel(pos, val) \
841 if (isBE(target)) { \
847 static av_always_inline void
848 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
849 const int32_t **lumSrc, int lumFilterSize,
850 const int16_t *chrFilter, const int32_t **chrUSrc,
851 const int32_t **chrVSrc, int chrFilterSize,
852 const int32_t **alpSrc, uint16_t *dest, int dstW,
853 int y, enum PixelFormat target)
857 for (i = 0; i < (dstW >> 1); i++) {
861 int U = -128 << 23; // 19
865 for (j = 0; j < lumFilterSize; j++) {
866 Y1 += lumSrc[j][i * 2] * lumFilter[j];
867 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
869 for (j = 0; j < chrFilterSize; j++) {
870 U += chrUSrc[j][i] * chrFilter[j];
871 V += chrVSrc[j][i] * chrFilter[j];
874 // 8bit: 12+15=27; 16-bit: 12+19=31
880 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
881 Y1 -= c->yuv2rgb_y_offset;
882 Y2 -= c->yuv2rgb_y_offset;
883 Y1 *= c->yuv2rgb_y_coeff;
884 Y2 *= c->yuv2rgb_y_coeff;
887 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
889 R = V * c->yuv2rgb_v2r_coeff;
890 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
891 B = U * c->yuv2rgb_u2b_coeff;
893 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
894 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
895 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
896 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
897 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
898 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
899 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
904 static av_always_inline void
905 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
906 const int32_t *ubuf[2], const int32_t *vbuf[2],
907 const int32_t *abuf[2], uint16_t *dest, int dstW,
908 int yalpha, int uvalpha, int y,
909 enum PixelFormat target)
911 const int32_t *buf0 = buf[0], *buf1 = buf[1],
912 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
913 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
914 int yalpha1 = 4095 - yalpha;
915 int uvalpha1 = 4095 - uvalpha;
918 for (i = 0; i < (dstW >> 1); i++) {
919 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
920 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
921 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
922 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
925 Y1 -= c->yuv2rgb_y_offset;
926 Y2 -= c->yuv2rgb_y_offset;
927 Y1 *= c->yuv2rgb_y_coeff;
928 Y2 *= c->yuv2rgb_y_coeff;
932 R = V * c->yuv2rgb_v2r_coeff;
933 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
934 B = U * c->yuv2rgb_u2b_coeff;
936 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
937 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
938 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
939 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
940 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
941 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
946 static av_always_inline void
947 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
948 const int32_t *ubuf[2], const int32_t *vbuf[2],
949 const int32_t *abuf0, uint16_t *dest, int dstW,
950 int uvalpha, int y, enum PixelFormat target)
952 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
953 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
956 if (uvalpha < 2048) {
957 for (i = 0; i < (dstW >> 1); i++) {
958 int Y1 = (buf0[i * 2] ) >> 2;
959 int Y2 = (buf0[i * 2 + 1]) >> 2;
960 int U = (ubuf0[i] + (-128 << 11)) >> 2;
961 int V = (vbuf0[i] + (-128 << 11)) >> 2;
964 Y1 -= c->yuv2rgb_y_offset;
965 Y2 -= c->yuv2rgb_y_offset;
966 Y1 *= c->yuv2rgb_y_coeff;
967 Y2 *= c->yuv2rgb_y_coeff;
971 R = V * c->yuv2rgb_v2r_coeff;
972 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
973 B = U * c->yuv2rgb_u2b_coeff;
975 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
976 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
977 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
978 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
979 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
980 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
984 for (i = 0; i < (dstW >> 1); i++) {
985 int Y1 = (buf0[i * 2] ) >> 2;
986 int Y2 = (buf0[i * 2 + 1]) >> 2;
987 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
988 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
991 Y1 -= c->yuv2rgb_y_offset;
992 Y2 -= c->yuv2rgb_y_offset;
993 Y1 *= c->yuv2rgb_y_coeff;
994 Y2 *= c->yuv2rgb_y_coeff;
998 R = V * c->yuv2rgb_v2r_coeff;
999 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1000 B = U * c->yuv2rgb_u2b_coeff;
1002 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1003 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1004 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1005 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1006 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1007 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1017 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1018 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1019 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1020 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1022 static av_always_inline void
1023 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1024 int U, int V, int A1, int A2,
1025 const void *_r, const void *_g, const void *_b, int y,
1026 enum PixelFormat target, int hasAlpha)
1028 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1029 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1030 uint32_t *dest = (uint32_t *) _dest;
1031 const uint32_t *r = (const uint32_t *) _r;
1032 const uint32_t *g = (const uint32_t *) _g;
1033 const uint32_t *b = (const uint32_t *) _b;
1036 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1038 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1039 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1042 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1044 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1045 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1047 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1048 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1051 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1052 uint8_t *dest = (uint8_t *) _dest;
1053 const uint8_t *r = (const uint8_t *) _r;
1054 const uint8_t *g = (const uint8_t *) _g;
1055 const uint8_t *b = (const uint8_t *) _b;
1057 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1058 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1059 dest[i * 6 + 0] = r_b[Y1];
1060 dest[i * 6 + 1] = g[Y1];
1061 dest[i * 6 + 2] = b_r[Y1];
1062 dest[i * 6 + 3] = r_b[Y2];
1063 dest[i * 6 + 4] = g[Y2];
1064 dest[i * 6 + 5] = b_r[Y2];
1067 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1068 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1069 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1070 uint16_t *dest = (uint16_t *) _dest;
1071 const uint16_t *r = (const uint16_t *) _r;
1072 const uint16_t *g = (const uint16_t *) _g;
1073 const uint16_t *b = (const uint16_t *) _b;
1074 int dr1, dg1, db1, dr2, dg2, db2;
1076 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1077 dr1 = dither_2x2_8[ y & 1 ][0];
1078 dg1 = dither_2x2_4[ y & 1 ][0];
1079 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1080 dr2 = dither_2x2_8[ y & 1 ][1];
1081 dg2 = dither_2x2_4[ y & 1 ][1];
1082 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1083 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1084 dr1 = dither_2x2_8[ y & 1 ][0];
1085 dg1 = dither_2x2_8[ y & 1 ][1];
1086 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1087 dr2 = dither_2x2_8[ y & 1 ][1];
1088 dg2 = dither_2x2_8[ y & 1 ][0];
1089 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1091 dr1 = dither_4x4_16[ y & 3 ][0];
1092 dg1 = dither_4x4_16[ y & 3 ][1];
1093 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1094 dr2 = dither_4x4_16[ y & 3 ][1];
1095 dg2 = dither_4x4_16[ y & 3 ][0];
1096 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1099 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1100 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1101 } else /* 8/4-bit */ {
1102 uint8_t *dest = (uint8_t *) _dest;
1103 const uint8_t *r = (const uint8_t *) _r;
1104 const uint8_t *g = (const uint8_t *) _g;
1105 const uint8_t *b = (const uint8_t *) _b;
1106 int dr1, dg1, db1, dr2, dg2, db2;
1108 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1109 const uint8_t * const d64 = dither_8x8_73[y & 7];
1110 const uint8_t * const d32 = dither_8x8_32[y & 7];
1111 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1112 db1 = d64[(i * 2 + 0) & 7];
1113 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1114 db2 = d64[(i * 2 + 1) & 7];
1116 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1117 const uint8_t * const d128 = dither_8x8_220[y & 7];
1118 dr1 = db1 = d128[(i * 2 + 0) & 7];
1119 dg1 = d64[(i * 2 + 0) & 7];
1120 dr2 = db2 = d128[(i * 2 + 1) & 7];
1121 dg2 = d64[(i * 2 + 1) & 7];
1124 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1125 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1126 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1128 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1129 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1134 static av_always_inline void
1135 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1136 const int16_t **lumSrc, int lumFilterSize,
1137 const int16_t *chrFilter, const int16_t **chrUSrc,
1138 const int16_t **chrVSrc, int chrFilterSize,
1139 const int16_t **alpSrc, uint8_t *dest, int dstW,
1140 int y, enum PixelFormat target, int hasAlpha)
1144 for (i = 0; i < (dstW >> 1); i++) {
1150 int av_unused A1, A2;
1151 const void *r, *g, *b;
1153 for (j = 0; j < lumFilterSize; j++) {
1154 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1155 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1157 for (j = 0; j < chrFilterSize; j++) {
1158 U += chrUSrc[j][i] * chrFilter[j];
1159 V += chrVSrc[j][i] * chrFilter[j];
1165 if ((Y1 | Y2 | U | V) & 0x100) {
1166 Y1 = av_clip_uint8(Y1);
1167 Y2 = av_clip_uint8(Y2);
1168 U = av_clip_uint8(U);
1169 V = av_clip_uint8(V);
1174 for (j = 0; j < lumFilterSize; j++) {
1175 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1176 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1180 if ((A1 | A2) & 0x100) {
1181 A1 = av_clip_uint8(A1);
1182 A2 = av_clip_uint8(A2);
1186 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1188 g = (c->table_gU[U] + c->table_gV[V]);
1191 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1192 r, g, b, y, target, hasAlpha);
1196 static av_always_inline void
1197 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1198 const int16_t *ubuf[2], const int16_t *vbuf[2],
1199 const int16_t *abuf[2], uint8_t *dest, int dstW,
1200 int yalpha, int uvalpha, int y,
1201 enum PixelFormat target, int hasAlpha)
1203 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1204 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1205 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1206 *abuf0 = hasAlpha ? abuf[0] : NULL,
1207 *abuf1 = hasAlpha ? abuf[1] : NULL;
1208 int yalpha1 = 4095 - yalpha;
1209 int uvalpha1 = 4095 - uvalpha;
1212 for (i = 0; i < (dstW >> 1); i++) {
1213 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1214 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1215 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1216 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1218 const void *r = c->table_rV[V],
1219 *g = (c->table_gU[U] + c->table_gV[V]),
1220 *b = c->table_bU[U];
1223 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1224 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1227 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1228 r, g, b, y, target, hasAlpha);
1232 static av_always_inline void
1233 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1234 const int16_t *ubuf[2], const int16_t *vbuf[2],
1235 const int16_t *abuf0, uint8_t *dest, int dstW,
1236 int uvalpha, int y, enum PixelFormat target,
1239 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1240 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1243 if (uvalpha < 2048) {
1244 for (i = 0; i < (dstW >> 1); i++) {
1245 int Y1 = buf0[i * 2] >> 7;
1246 int Y2 = buf0[i * 2 + 1] >> 7;
1247 int U = ubuf1[i] >> 7;
1248 int V = vbuf1[i] >> 7;
1250 const void *r = c->table_rV[V],
1251 *g = (c->table_gU[U] + c->table_gV[V]),
1252 *b = c->table_bU[U];
1255 A1 = abuf0[i * 2 ] >> 7;
1256 A2 = abuf0[i * 2 + 1] >> 7;
1259 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1260 r, g, b, y, target, hasAlpha);
1263 for (i = 0; i < (dstW >> 1); i++) {
1264 int Y1 = buf0[i * 2] >> 7;
1265 int Y2 = buf0[i * 2 + 1] >> 7;
1266 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1267 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1269 const void *r = c->table_rV[V],
1270 *g = (c->table_gU[U] + c->table_gV[V]),
1271 *b = c->table_bU[U];
1274 A1 = abuf0[i * 2 ] >> 7;
1275 A2 = abuf0[i * 2 + 1] >> 7;
1278 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1279 r, g, b, y, target, hasAlpha);
1284 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1285 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1286 const int16_t **lumSrc, int lumFilterSize, \
1287 const int16_t *chrFilter, const int16_t **chrUSrc, \
1288 const int16_t **chrVSrc, int chrFilterSize, \
1289 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1292 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1293 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1294 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1296 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1297 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1298 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1299 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1300 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1301 int yalpha, int uvalpha, int y) \
1303 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1304 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1307 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1308 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1309 const int16_t *abuf0, uint8_t *dest, int dstW, \
1310 int uvalpha, int y) \
1312 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1313 dstW, uvalpha, y, fmt, hasAlpha); \
1317 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1318 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1320 #if CONFIG_SWSCALE_ALPHA
1321 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1322 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1324 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1325 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1327 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1328 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1329 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1330 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1331 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1332 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1333 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1334 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1336 static av_always_inline void
1337 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1338 const int16_t **lumSrc, int lumFilterSize,
1339 const int16_t *chrFilter, const int16_t **chrUSrc,
1340 const int16_t **chrVSrc, int chrFilterSize,
1341 const int16_t **alpSrc, uint8_t *dest,
1342 int dstW, int y, enum PixelFormat target, int hasAlpha)
1345 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1347 for (i = 0; i < dstW; i++) {
1355 for (j = 0; j < lumFilterSize; j++) {
1356 Y += lumSrc[j][i] * lumFilter[j];
1358 for (j = 0; j < chrFilterSize; j++) {
1359 U += chrUSrc[j][i] * chrFilter[j];
1360 V += chrVSrc[j][i] * chrFilter[j];
1367 for (j = 0; j < lumFilterSize; j++) {
1368 A += alpSrc[j][i] * lumFilter[j];
1372 A = av_clip_uint8(A);
1374 Y -= c->yuv2rgb_y_offset;
1375 Y *= c->yuv2rgb_y_coeff;
1377 R = Y + V*c->yuv2rgb_v2r_coeff;
1378 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1379 B = Y + U*c->yuv2rgb_u2b_coeff;
1380 if ((R | G | B) & 0xC0000000) {
1381 R = av_clip_uintp2(R, 30);
1382 G = av_clip_uintp2(G, 30);
1383 B = av_clip_uintp2(B, 30);
1388 dest[0] = hasAlpha ? A : 255;
1402 dest[3] = hasAlpha ? A : 255;
1405 dest[0] = hasAlpha ? A : 255;
1420 dest[3] = hasAlpha ? A : 255;
1428 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1429 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1430 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1431 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1433 #if CONFIG_SWSCALE_ALPHA
1434 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1435 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1436 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1437 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1439 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1440 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1441 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1442 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1444 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1445 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1447 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1448 int width, int height,
1452 uint8_t *ptr = plane + stride*y;
1453 for (i=0; i<height; i++) {
1454 memset(ptr, val, width);
1459 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1461 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1462 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1464 static av_always_inline void
1465 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1466 enum PixelFormat origin)
1469 for (i = 0; i < width; i++) {
1470 unsigned int r_b = input_pixel(&src[i*3+0]);
1471 unsigned int g = input_pixel(&src[i*3+1]);
1472 unsigned int b_r = input_pixel(&src[i*3+2]);
1474 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1478 static av_always_inline void
1479 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1480 const uint16_t *src1, const uint16_t *src2,
1481 int width, enum PixelFormat origin)
1485 for (i = 0; i < width; i++) {
1486 int r_b = input_pixel(&src1[i*3+0]);
1487 int g = input_pixel(&src1[i*3+1]);
1488 int b_r = input_pixel(&src1[i*3+2]);
1490 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1491 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1495 static av_always_inline void
1496 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1497 const uint16_t *src1, const uint16_t *src2,
1498 int width, enum PixelFormat origin)
1502 for (i = 0; i < width; i++) {
1503 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1504 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1505 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1507 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1508 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1516 #define rgb48funcs(pattern, BE_LE, origin) \
1517 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1518 int width, uint32_t *unused) \
1520 const uint16_t *src = (const uint16_t *) _src; \
1521 uint16_t *dst = (uint16_t *) _dst; \
1522 rgb48ToY_c_template(dst, src, width, origin); \
1525 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1526 const uint8_t *_src1, const uint8_t *_src2, \
1527 int width, uint32_t *unused) \
1529 const uint16_t *src1 = (const uint16_t *) _src1, \
1530 *src2 = (const uint16_t *) _src2; \
1531 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1532 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1535 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1536 const uint8_t *_src1, const uint8_t *_src2, \
1537 int width, uint32_t *unused) \
1539 const uint16_t *src1 = (const uint16_t *) _src1, \
1540 *src2 = (const uint16_t *) _src2; \
1541 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1542 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1545 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1546 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1547 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1548 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1550 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1551 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1552 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1554 static av_always_inline void
1555 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1556 int width, enum PixelFormat origin,
1557 int shr, int shg, int shb, int shp,
1558 int maskr, int maskg, int maskb,
1559 int rsh, int gsh, int bsh, int S)
1561 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1562 rnd = 33 << (S - 1);
1565 for (i = 0; i < width; i++) {
1566 int px = input_pixel(i) >> shp;
1567 int b = (px & maskb) >> shb;
1568 int g = (px & maskg) >> shg;
1569 int r = (px & maskr) >> shr;
1571 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1575 static av_always_inline void
1576 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1577 const uint8_t *src, int width,
1578 enum PixelFormat origin,
1579 int shr, int shg, int shb, int shp,
1580 int maskr, int maskg, int maskb,
1581 int rsh, int gsh, int bsh, int S)
1583 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1584 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1585 rnd = 257 << (S - 1);
1588 for (i = 0; i < width; i++) {
1589 int px = input_pixel(i) >> shp;
1590 int b = (px & maskb) >> shb;
1591 int g = (px & maskg) >> shg;
1592 int r = (px & maskr) >> shr;
1594 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1595 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1599 static av_always_inline void
1600 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1601 const uint8_t *src, int width,
1602 enum PixelFormat origin,
1603 int shr, int shg, int shb, int shp,
1604 int maskr, int maskg, int maskb,
1605 int rsh, int gsh, int bsh, int S)
1607 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1608 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1609 rnd = 257 << S, maskgx = ~(maskr | maskb);
1612 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1613 for (i = 0; i < width; i++) {
1614 int px0 = input_pixel(2 * i + 0) >> shp;
1615 int px1 = input_pixel(2 * i + 1) >> shp;
1616 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1617 int rb = px0 + px1 - g;
1619 b = (rb & maskb) >> shb;
1620 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1621 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1624 g = (g & maskg) >> shg;
1626 r = (rb & maskr) >> shr;
1628 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1629 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1635 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1636 maskg, maskb, rsh, gsh, bsh, S) \
1637 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1638 int width, uint32_t *unused) \
1640 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1641 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1644 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1645 const uint8_t *src, const uint8_t *dummy, \
1646 int width, uint32_t *unused) \
1648 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1649 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1652 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1653 const uint8_t *src, const uint8_t *dummy, \
1654 int width, uint32_t *unused) \
1656 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1657 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1660 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1661 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1662 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1663 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1664 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1665 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1666 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1667 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1668 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1669 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1670 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1671 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1673 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1676 for (i=0; i<width; i++) {
1681 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1684 for (i=0; i<width; i++) {
1689 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1692 for (i=0; i<width; i++) {
1695 dst[i]= pal[d] & 0xFF;
1699 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1700 const uint8_t *src1, const uint8_t *src2,
1701 int width, uint32_t *pal)
1704 assert(src1 == src2);
1705 for (i=0; i<width; i++) {
1706 int p= pal[src1[i]];
1713 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1714 int width, uint32_t *unused)
1717 for (i=0; i<width/8; i++) {
1720 dst[8*i+j]= ((d>>(7-j))&1)*255;
1724 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1725 int width, uint32_t *unused)
1728 for (i=0; i<width/8; i++) {
1731 dst[8*i+j]= ((d>>(7-j))&1)*255;
1735 //FIXME yuy2* can read up to 7 samples too much
1737 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1741 for (i=0; i<width; i++)
1745 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1746 const uint8_t *src2, int width, uint32_t *unused)
1749 for (i=0; i<width; i++) {
1750 dstU[i]= src1[4*i + 1];
1751 dstV[i]= src1[4*i + 3];
1753 assert(src1 == src2);
1756 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1759 const uint16_t *src = (const uint16_t *) _src;
1760 uint16_t *dst = (uint16_t *) _dst;
1761 for (i=0; i<width; i++) {
1762 dst[i] = av_bswap16(src[i]);
1766 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1767 const uint8_t *_src2, int width, uint32_t *unused)
1770 const uint16_t *src1 = (const uint16_t *) _src1,
1771 *src2 = (const uint16_t *) _src2;
1772 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1773 for (i=0; i<width; i++) {
1774 dstU[i] = av_bswap16(src1[i]);
1775 dstV[i] = av_bswap16(src2[i]);
1779 /* This is almost identical to the previous, end exists only because
1780 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1781 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1785 for (i=0; i<width; i++)
1789 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1790 const uint8_t *src2, int width, uint32_t *unused)
1793 for (i=0; i<width; i++) {
1794 dstU[i]= src1[4*i + 0];
1795 dstV[i]= src1[4*i + 2];
1797 assert(src1 == src2);
1800 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1801 const uint8_t *src, int width)
1804 for (i = 0; i < width; i++) {
1805 dst1[i] = src[2*i+0];
1806 dst2[i] = src[2*i+1];
1810 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1811 const uint8_t *src1, const uint8_t *src2,
1812 int width, uint32_t *unused)
1814 nvXXtoUV_c(dstU, dstV, src1, width);
1817 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1818 const uint8_t *src1, const uint8_t *src2,
1819 int width, uint32_t *unused)
1821 nvXXtoUV_c(dstV, dstU, src1, width);
1824 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1826 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1827 int width, uint32_t *unused)
1830 for (i=0; i<width; i++) {
1835 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1839 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1840 const uint8_t *src2, int width, uint32_t *unused)
1843 for (i=0; i<width; i++) {
1844 int b= src1[3*i + 0];
1845 int g= src1[3*i + 1];
1846 int r= src1[3*i + 2];
1848 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1849 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1851 assert(src1 == src2);
1854 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1855 const uint8_t *src2, int width, uint32_t *unused)
1858 for (i=0; i<width; i++) {
1859 int b= src1[6*i + 0] + src1[6*i + 3];
1860 int g= src1[6*i + 1] + src1[6*i + 4];
1861 int r= src1[6*i + 2] + src1[6*i + 5];
1863 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1864 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1866 assert(src1 == src2);
1869 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1873 for (i=0; i<width; i++) {
1878 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1882 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1883 const uint8_t *src2, int width, uint32_t *unused)
1887 for (i=0; i<width; i++) {
1888 int r= src1[3*i + 0];
1889 int g= src1[3*i + 1];
1890 int b= src1[3*i + 2];
1892 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1893 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1897 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1898 const uint8_t *src2, int width, uint32_t *unused)
1902 for (i=0; i<width; i++) {
1903 int r= src1[6*i + 0] + src1[6*i + 3];
1904 int g= src1[6*i + 1] + src1[6*i + 4];
1905 int b= src1[6*i + 2] + src1[6*i + 5];
1907 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1908 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1912 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1913 const int16_t *filter,
1914 const int16_t *filterPos, int filterSize)
1917 int32_t *dst = (int32_t *) _dst;
1918 const uint16_t *src = (const uint16_t *) _src;
1919 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1922 for (i = 0; i < dstW; i++) {
1924 int srcPos = filterPos[i];
1927 for (j = 0; j < filterSize; j++) {
1928 val += src[srcPos + j] * filter[filterSize * i + j];
1930 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1931 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1935 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1936 const int16_t *filter,
1937 const int16_t *filterPos, int filterSize)
1940 const uint16_t *src = (const uint16_t *) _src;
1941 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1943 for (i = 0; i < dstW; i++) {
1945 int srcPos = filterPos[i];
1948 for (j = 0; j < filterSize; j++) {
1949 val += src[srcPos + j] * filter[filterSize * i + j];
1951 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1952 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1956 // bilinear / bicubic scaling
1957 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1958 const int16_t *filter, const int16_t *filterPos,
1962 for (i=0; i<dstW; i++) {
1964 int srcPos= filterPos[i];
1966 for (j=0; j<filterSize; j++) {
1967 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1969 //filter += hFilterSize;
1970 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1975 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1976 const int16_t *filter, const int16_t *filterPos,
1980 int32_t *dst = (int32_t *) _dst;
1981 for (i=0; i<dstW; i++) {
1983 int srcPos= filterPos[i];
1985 for (j=0; j<filterSize; j++) {
1986 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1988 //filter += hFilterSize;
1989 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1994 //FIXME all pal and rgb srcFormats could do this convertion as well
1995 //FIXME all scalers more complex than bilinear could do half of this transform
1996 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1999 for (i = 0; i < width; i++) {
2000 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2001 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2004 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2007 for (i = 0; i < width; i++) {
2008 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2009 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2012 static void lumRangeToJpeg_c(int16_t *dst, int width)
2015 for (i = 0; i < width; i++)
2016 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2018 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2021 for (i = 0; i < width; i++)
2022 dst[i] = (dst[i]*14071 + 33561947)>>14;
2025 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2028 int32_t *dstU = (int32_t *) _dstU;
2029 int32_t *dstV = (int32_t *) _dstV;
2030 for (i = 0; i < width; i++) {
2031 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2032 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2035 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2038 int32_t *dstU = (int32_t *) _dstU;
2039 int32_t *dstV = (int32_t *) _dstV;
2040 for (i = 0; i < width; i++) {
2041 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2042 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2045 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2048 int32_t *dst = (int32_t *) _dst;
2049 for (i = 0; i < width; i++)
2050 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2052 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2055 int32_t *dst = (int32_t *) _dst;
2056 for (i = 0; i < width; i++)
2057 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2060 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2061 const uint8_t *src, int srcW, int xInc)
2064 unsigned int xpos=0;
2065 for (i=0;i<dstWidth;i++) {
2066 register unsigned int xx=xpos>>16;
2067 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2068 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2073 // *** horizontal scale Y line to temp buffer
2074 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2075 const uint8_t *src, int srcW, int xInc,
2076 const int16_t *hLumFilter,
2077 const int16_t *hLumFilterPos, int hLumFilterSize,
2078 uint8_t *formatConvBuffer,
2079 uint32_t *pal, int isAlpha)
2081 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2082 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2085 toYV12(formatConvBuffer, src, srcW, pal);
2086 src= formatConvBuffer;
2089 if (!c->hyscale_fast) {
2090 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2091 } else { // fast bilinear upscale / crap downscale
2092 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2096 convertRange(dst, dstWidth);
2099 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2100 int dstWidth, const uint8_t *src1,
2101 const uint8_t *src2, int srcW, int xInc)
2104 unsigned int xpos=0;
2105 for (i=0;i<dstWidth;i++) {
2106 register unsigned int xx=xpos>>16;
2107 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2108 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2109 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2114 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2115 const uint8_t *src1, const uint8_t *src2,
2116 int srcW, int xInc, const int16_t *hChrFilter,
2117 const int16_t *hChrFilterPos, int hChrFilterSize,
2118 uint8_t *formatConvBuffer, uint32_t *pal)
2121 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2122 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2123 src1= formatConvBuffer;
2127 if (!c->hcscale_fast) {
2128 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2129 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2130 } else { // fast bilinear upscale / crap downscale
2131 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2134 if (c->chrConvertRange)
2135 c->chrConvertRange(dst1, dst2, dstWidth);
2138 static av_always_inline void
2139 find_c_packed_planar_out_funcs(SwsContext *c,
2140 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2141 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2142 yuv2packedX_fn *yuv2packedX)
2144 enum PixelFormat dstFormat = c->dstFormat;
2146 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2147 *yuv2yuvX = yuv2nv12X_c;
2148 } else if (is16BPS(dstFormat)) {
2149 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2150 } else if (is9_OR_10BPS(dstFormat)) {
2151 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2152 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2154 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2157 *yuv2yuv1 = yuv2yuv1_c;
2158 *yuv2yuvX = yuv2yuvX_c;
2160 if(c->flags & SWS_FULL_CHR_H_INT) {
2161 switch (dstFormat) {
2164 *yuv2packedX = yuv2rgba32_full_X_c;
2166 #if CONFIG_SWSCALE_ALPHA
2168 *yuv2packedX = yuv2rgba32_full_X_c;
2170 #endif /* CONFIG_SWSCALE_ALPHA */
2172 *yuv2packedX = yuv2rgbx32_full_X_c;
2174 #endif /* !CONFIG_SMALL */
2178 *yuv2packedX = yuv2argb32_full_X_c;
2180 #if CONFIG_SWSCALE_ALPHA
2182 *yuv2packedX = yuv2argb32_full_X_c;
2184 #endif /* CONFIG_SWSCALE_ALPHA */
2186 *yuv2packedX = yuv2xrgb32_full_X_c;
2188 #endif /* !CONFIG_SMALL */
2192 *yuv2packedX = yuv2bgra32_full_X_c;
2194 #if CONFIG_SWSCALE_ALPHA
2196 *yuv2packedX = yuv2bgra32_full_X_c;
2198 #endif /* CONFIG_SWSCALE_ALPHA */
2200 *yuv2packedX = yuv2bgrx32_full_X_c;
2202 #endif /* !CONFIG_SMALL */
2206 *yuv2packedX = yuv2abgr32_full_X_c;
2208 #if CONFIG_SWSCALE_ALPHA
2210 *yuv2packedX = yuv2abgr32_full_X_c;
2212 #endif /* CONFIG_SWSCALE_ALPHA */
2214 *yuv2packedX = yuv2xbgr32_full_X_c;
2216 #endif /* !CONFIG_SMALL */
2219 *yuv2packedX = yuv2rgb24_full_X_c;
2222 *yuv2packedX = yuv2bgr24_full_X_c;
2226 switch (dstFormat) {
2227 case PIX_FMT_GRAY16BE:
2228 *yuv2packed1 = yuv2gray16BE_1_c;
2229 *yuv2packed2 = yuv2gray16BE_2_c;
2230 *yuv2packedX = yuv2gray16BE_X_c;
2232 case PIX_FMT_GRAY16LE:
2233 *yuv2packed1 = yuv2gray16LE_1_c;
2234 *yuv2packed2 = yuv2gray16LE_2_c;
2235 *yuv2packedX = yuv2gray16LE_X_c;
2237 case PIX_FMT_MONOWHITE:
2238 *yuv2packed1 = yuv2monowhite_1_c;
2239 *yuv2packed2 = yuv2monowhite_2_c;
2240 *yuv2packedX = yuv2monowhite_X_c;
2242 case PIX_FMT_MONOBLACK:
2243 *yuv2packed1 = yuv2monoblack_1_c;
2244 *yuv2packed2 = yuv2monoblack_2_c;
2245 *yuv2packedX = yuv2monoblack_X_c;
2247 case PIX_FMT_YUYV422:
2248 *yuv2packed1 = yuv2yuyv422_1_c;
2249 *yuv2packed2 = yuv2yuyv422_2_c;
2250 *yuv2packedX = yuv2yuyv422_X_c;
2252 case PIX_FMT_UYVY422:
2253 *yuv2packed1 = yuv2uyvy422_1_c;
2254 *yuv2packed2 = yuv2uyvy422_2_c;
2255 *yuv2packedX = yuv2uyvy422_X_c;
2257 case PIX_FMT_RGB48LE:
2258 *yuv2packed1 = yuv2rgb48le_1_c;
2259 *yuv2packed2 = yuv2rgb48le_2_c;
2260 *yuv2packedX = yuv2rgb48le_X_c;
2262 case PIX_FMT_RGB48BE:
2263 *yuv2packed1 = yuv2rgb48be_1_c;
2264 *yuv2packed2 = yuv2rgb48be_2_c;
2265 *yuv2packedX = yuv2rgb48be_X_c;
2267 case PIX_FMT_BGR48LE:
2268 *yuv2packed1 = yuv2bgr48le_1_c;
2269 *yuv2packed2 = yuv2bgr48le_2_c;
2270 *yuv2packedX = yuv2bgr48le_X_c;
2272 case PIX_FMT_BGR48BE:
2273 *yuv2packed1 = yuv2bgr48be_1_c;
2274 *yuv2packed2 = yuv2bgr48be_2_c;
2275 *yuv2packedX = yuv2bgr48be_X_c;
2280 *yuv2packed1 = yuv2rgb32_1_c;
2281 *yuv2packed2 = yuv2rgb32_2_c;
2282 *yuv2packedX = yuv2rgb32_X_c;
2284 #if CONFIG_SWSCALE_ALPHA
2286 *yuv2packed1 = yuv2rgba32_1_c;
2287 *yuv2packed2 = yuv2rgba32_2_c;
2288 *yuv2packedX = yuv2rgba32_X_c;
2290 #endif /* CONFIG_SWSCALE_ALPHA */
2292 *yuv2packed1 = yuv2rgbx32_1_c;
2293 *yuv2packed2 = yuv2rgbx32_2_c;
2294 *yuv2packedX = yuv2rgbx32_X_c;
2296 #endif /* !CONFIG_SMALL */
2298 case PIX_FMT_RGB32_1:
2299 case PIX_FMT_BGR32_1:
2301 *yuv2packed1 = yuv2rgb32_1_1_c;
2302 *yuv2packed2 = yuv2rgb32_1_2_c;
2303 *yuv2packedX = yuv2rgb32_1_X_c;
2305 #if CONFIG_SWSCALE_ALPHA
2307 *yuv2packed1 = yuv2rgba32_1_1_c;
2308 *yuv2packed2 = yuv2rgba32_1_2_c;
2309 *yuv2packedX = yuv2rgba32_1_X_c;
2311 #endif /* CONFIG_SWSCALE_ALPHA */
2313 *yuv2packed1 = yuv2rgbx32_1_1_c;
2314 *yuv2packed2 = yuv2rgbx32_1_2_c;
2315 *yuv2packedX = yuv2rgbx32_1_X_c;
2317 #endif /* !CONFIG_SMALL */
2320 *yuv2packed1 = yuv2rgb24_1_c;
2321 *yuv2packed2 = yuv2rgb24_2_c;
2322 *yuv2packedX = yuv2rgb24_X_c;
2325 *yuv2packed1 = yuv2bgr24_1_c;
2326 *yuv2packed2 = yuv2bgr24_2_c;
2327 *yuv2packedX = yuv2bgr24_X_c;
2329 case PIX_FMT_RGB565LE:
2330 case PIX_FMT_RGB565BE:
2331 case PIX_FMT_BGR565LE:
2332 case PIX_FMT_BGR565BE:
2333 *yuv2packed1 = yuv2rgb16_1_c;
2334 *yuv2packed2 = yuv2rgb16_2_c;
2335 *yuv2packedX = yuv2rgb16_X_c;
2337 case PIX_FMT_RGB555LE:
2338 case PIX_FMT_RGB555BE:
2339 case PIX_FMT_BGR555LE:
2340 case PIX_FMT_BGR555BE:
2341 *yuv2packed1 = yuv2rgb15_1_c;
2342 *yuv2packed2 = yuv2rgb15_2_c;
2343 *yuv2packedX = yuv2rgb15_X_c;
2345 case PIX_FMT_RGB444LE:
2346 case PIX_FMT_RGB444BE:
2347 case PIX_FMT_BGR444LE:
2348 case PIX_FMT_BGR444BE:
2349 *yuv2packed1 = yuv2rgb12_1_c;
2350 *yuv2packed2 = yuv2rgb12_2_c;
2351 *yuv2packedX = yuv2rgb12_X_c;
2355 *yuv2packed1 = yuv2rgb8_1_c;
2356 *yuv2packed2 = yuv2rgb8_2_c;
2357 *yuv2packedX = yuv2rgb8_X_c;
2361 *yuv2packed1 = yuv2rgb4_1_c;
2362 *yuv2packed2 = yuv2rgb4_2_c;
2363 *yuv2packedX = yuv2rgb4_X_c;
2365 case PIX_FMT_RGB4_BYTE:
2366 case PIX_FMT_BGR4_BYTE:
2367 *yuv2packed1 = yuv2rgb4b_1_c;
2368 *yuv2packed2 = yuv2rgb4b_2_c;
2369 *yuv2packedX = yuv2rgb4b_X_c;
2375 #define DEBUG_SWSCALE_BUFFERS 0
2376 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2378 static int swScale(SwsContext *c, const uint8_t* src[],
2379 int srcStride[], int srcSliceY,
2380 int srcSliceH, uint8_t* dst[], int dstStride[])
2382 /* load a few things into local vars to make the code more readable? and faster */
2383 const int srcW= c->srcW;
2384 const int dstW= c->dstW;
2385 const int dstH= c->dstH;
2386 const int chrDstW= c->chrDstW;
2387 const int chrSrcW= c->chrSrcW;
2388 const int lumXInc= c->lumXInc;
2389 const int chrXInc= c->chrXInc;
2390 const enum PixelFormat dstFormat= c->dstFormat;
2391 const int flags= c->flags;
2392 int16_t *vLumFilterPos= c->vLumFilterPos;
2393 int16_t *vChrFilterPos= c->vChrFilterPos;
2394 int16_t *hLumFilterPos= c->hLumFilterPos;
2395 int16_t *hChrFilterPos= c->hChrFilterPos;
2396 int16_t *vLumFilter= c->vLumFilter;
2397 int16_t *vChrFilter= c->vChrFilter;
2398 int16_t *hLumFilter= c->hLumFilter;
2399 int16_t *hChrFilter= c->hChrFilter;
2400 int32_t *lumMmxFilter= c->lumMmxFilter;
2401 int32_t *chrMmxFilter= c->chrMmxFilter;
2402 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2403 const int vLumFilterSize= c->vLumFilterSize;
2404 const int vChrFilterSize= c->vChrFilterSize;
2405 const int hLumFilterSize= c->hLumFilterSize;
2406 const int hChrFilterSize= c->hChrFilterSize;
2407 int16_t **lumPixBuf= c->lumPixBuf;
2408 int16_t **chrUPixBuf= c->chrUPixBuf;
2409 int16_t **chrVPixBuf= c->chrVPixBuf;
2410 int16_t **alpPixBuf= c->alpPixBuf;
2411 const int vLumBufSize= c->vLumBufSize;
2412 const int vChrBufSize= c->vChrBufSize;
2413 uint8_t *formatConvBuffer= c->formatConvBuffer;
2414 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2415 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2417 uint32_t *pal=c->pal_yuv;
2418 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2419 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2420 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2421 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2422 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2423 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2425 /* vars which will change and which we need to store back in the context */
2427 int lumBufIndex= c->lumBufIndex;
2428 int chrBufIndex= c->chrBufIndex;
2429 int lastInLumBuf= c->lastInLumBuf;
2430 int lastInChrBuf= c->lastInChrBuf;
2432 if (isPacked(c->srcFormat)) {
2440 srcStride[3]= srcStride[0];
2442 srcStride[1]<<= c->vChrDrop;
2443 srcStride[2]<<= c->vChrDrop;
2445 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2446 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2447 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2448 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2449 srcSliceY, srcSliceH, dstY, dstH);
2450 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2451 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2453 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2454 static int warnedAlready=0; //FIXME move this into the context perhaps
2455 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2456 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2457 " ->cannot do aligned memory accesses anymore\n");
2462 /* Note the user might start scaling the picture in the middle so this
2463 will not get executed. This is not really intended but works
2464 currently, so people might do it. */
2465 if (srcSliceY ==0) {
2473 if (!should_dither) {
2474 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2478 for (;dstY < dstH; dstY++) {
2479 const int chrDstY= dstY>>c->chrDstVSubSample;
2480 uint8_t *dest[4] = {
2481 dst[0] + dstStride[0] * dstY,
2482 dst[1] + dstStride[1] * chrDstY,
2483 dst[2] + dstStride[2] * chrDstY,
2484 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2487 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2488 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2489 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2490 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2491 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2492 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2495 //handle holes (FAST_BILINEAR & weird filters)
2496 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2497 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2498 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2499 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2501 DEBUG_BUFFERS("dstY: %d\n", dstY);
2502 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2503 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2504 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2505 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2507 // Do we have enough lines in this slice to output the dstY line
2508 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2510 if (!enough_lines) {
2511 lastLumSrcY = srcSliceY + srcSliceH - 1;
2512 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2513 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2514 lastLumSrcY, lastChrSrcY);
2517 //Do horizontal scaling
2518 while(lastInLumBuf < lastLumSrcY) {
2519 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2520 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2522 assert(lumBufIndex < 2*vLumBufSize);
2523 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2524 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2525 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2526 hLumFilter, hLumFilterPos, hLumFilterSize,
2529 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2530 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2531 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2535 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2536 lumBufIndex, lastInLumBuf);
2538 while(lastInChrBuf < lastChrSrcY) {
2539 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2540 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2542 assert(chrBufIndex < 2*vChrBufSize);
2543 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2544 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2545 //FIXME replace parameters through context struct (some at least)
2547 if (c->needs_hcscale)
2548 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2549 chrDstW, src1, src2, chrSrcW, chrXInc,
2550 hChrFilter, hChrFilterPos, hChrFilterSize,
2551 formatConvBuffer, pal);
2553 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2554 chrBufIndex, lastInChrBuf);
2556 //wrap buf index around to stay inside the ring buffer
2557 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2558 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2560 break; //we can't output a dstY line so let's try with the next slice
2563 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2565 if (should_dither) {
2566 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2567 c->lumDither8 = dither_8x8_128[dstY & 7];
2569 if (dstY >= dstH-2) {
2570 // hmm looks like we can't use MMX here without overwriting this array's tail
2571 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2572 &yuv2packed1, &yuv2packed2,
2577 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2578 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2579 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2580 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2581 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2582 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2583 if ((dstY&chrSkipMask) || isGray(dstFormat))
2584 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2585 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2586 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2587 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2588 dest, dstW, chrDstW);
2589 } else { //General YV12
2590 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2591 lumSrcPtr, vLumFilterSize,
2592 vChrFilter + chrDstY * vChrFilterSize,
2593 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2594 alpSrcPtr, dest, dstW, chrDstW);
2597 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2598 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2599 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2600 int chrAlpha = vChrFilter[2 * dstY + 1];
2601 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2602 alpPixBuf ? *alpSrcPtr : NULL,
2603 dest[0], dstW, chrAlpha, dstY);
2604 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2605 int lumAlpha = vLumFilter[2 * dstY + 1];
2606 int chrAlpha = vChrFilter[2 * dstY + 1];
2608 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2610 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2611 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2612 alpPixBuf ? alpSrcPtr : NULL,
2613 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2614 } else { //general RGB
2615 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2616 lumSrcPtr, vLumFilterSize,
2617 vChrFilter + dstY * vChrFilterSize,
2618 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2619 alpSrcPtr, dest[0], dstW, dstY);
2625 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2626 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2629 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2630 __asm__ volatile("sfence":::"memory");
2634 /* store changed local vars back in the context */
2636 c->lumBufIndex= lumBufIndex;
2637 c->chrBufIndex= chrBufIndex;
2638 c->lastInLumBuf= lastInLumBuf;
2639 c->lastInChrBuf= lastInChrBuf;
2641 return dstY - lastDstY;
2644 static av_cold void sws_init_swScale_c(SwsContext *c)
2646 enum PixelFormat srcFormat = c->srcFormat;
2648 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2649 &c->yuv2packed1, &c->yuv2packed2,
2652 c->chrToYV12 = NULL;
2654 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2655 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2656 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2657 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2661 case PIX_FMT_BGR4_BYTE:
2662 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2664 case PIX_FMT_YUV444P9LE:
2665 case PIX_FMT_YUV420P9LE:
2666 case PIX_FMT_YUV422P10LE:
2667 case PIX_FMT_YUV444P10LE:
2668 case PIX_FMT_YUV420P10LE:
2669 case PIX_FMT_YUV420P16LE:
2670 case PIX_FMT_YUV422P16LE:
2671 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2673 case PIX_FMT_YUV444P9BE:
2674 case PIX_FMT_YUV420P9BE:
2675 case PIX_FMT_YUV444P10BE:
2676 case PIX_FMT_YUV422P10BE:
2677 case PIX_FMT_YUV420P10BE:
2678 case PIX_FMT_YUV420P16BE:
2679 case PIX_FMT_YUV422P16BE:
2680 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2683 if (c->chrSrcHSubSample) {
2685 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2686 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2687 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2688 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2689 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2690 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2691 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2692 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2693 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2694 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2695 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2696 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2697 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2698 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2699 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2700 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2701 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2702 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2706 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2707 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2708 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2709 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2710 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2711 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2712 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2713 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2714 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2715 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2716 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2717 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2718 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2719 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2720 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2721 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2722 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2723 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2727 c->lumToYV12 = NULL;
2728 c->alpToYV12 = NULL;
2729 switch (srcFormat) {
2731 case PIX_FMT_YUV444P9LE:
2732 case PIX_FMT_YUV420P9LE:
2733 case PIX_FMT_YUV444P10LE:
2734 case PIX_FMT_YUV422P10LE:
2735 case PIX_FMT_YUV420P10LE:
2736 case PIX_FMT_YUV420P16LE:
2737 case PIX_FMT_YUV422P16LE:
2738 case PIX_FMT_YUV444P16LE:
2739 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2741 case PIX_FMT_YUV444P9BE:
2742 case PIX_FMT_YUV420P9BE:
2743 case PIX_FMT_YUV444P10BE:
2744 case PIX_FMT_YUV422P10BE:
2745 case PIX_FMT_YUV420P10BE:
2746 case PIX_FMT_YUV420P16BE:
2747 case PIX_FMT_YUV422P16BE:
2748 case PIX_FMT_YUV444P16BE:
2749 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2751 case PIX_FMT_YUYV422 :
2752 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2753 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2754 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2755 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2756 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2757 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2758 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2759 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2760 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2761 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2762 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2763 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2767 case PIX_FMT_BGR4_BYTE:
2768 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2769 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2770 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2771 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2772 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2773 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2774 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2775 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2776 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2777 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2778 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2781 switch (srcFormat) {
2783 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2785 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2786 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2790 if (c->srcBpc == 8) {
2791 if (c->dstBpc <= 10) {
2792 c->hScale = hScale8To15_c;
2793 if (c->flags & SWS_FAST_BILINEAR) {
2794 c->hyscale_fast = hyscale_fast_c;
2795 c->hcscale_fast = hcscale_fast_c;
2798 c->hScale = hScale8To19_c;
2801 c->hScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2804 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2805 if (c->dstBpc <= 10) {
2807 c->lumConvertRange = lumRangeFromJpeg_c;
2808 c->chrConvertRange = chrRangeFromJpeg_c;
2810 c->lumConvertRange = lumRangeToJpeg_c;
2811 c->chrConvertRange = chrRangeToJpeg_c;
2815 c->lumConvertRange = lumRangeFromJpeg16_c;
2816 c->chrConvertRange = chrRangeFromJpeg16_c;
2818 c->lumConvertRange = lumRangeToJpeg16_c;
2819 c->chrConvertRange = chrRangeToJpeg16_c;
2824 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2825 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2826 c->needs_hcscale = 1;
2829 SwsFunc ff_getSwsFunc(SwsContext *c)
2831 sws_init_swScale_c(c);
2834 ff_sws_init_swScale_mmx(c);
2836 ff_sws_init_swScale_altivec(c);