2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int32_t **chrUSrc, const int32_t **chrVSrc,
201 int chrFilterSize, const int32_t **alpSrc,
202 uint16_t *dest[4], int dstW, int chrDstW,
203 int big_endian, int output_bits)
205 //FIXME Optimize (just quickly written not optimized..)
207 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
208 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
209 int shift = 15 + 16 - output_bits;
211 #define output_pixel(pos, val) \
213 if (output_bits == 16) { \
214 AV_WB16(pos, av_clip_uint16(val >> shift)); \
216 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 if (output_bits == 16) { \
220 AV_WL16(pos, av_clip_uint16(val >> shift)); \
222 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
225 for (i = 0; i < dstW; i++) {
226 int val = 1 << (30-output_bits);
229 for (j = 0; j < lumFilterSize; j++)
230 val += lumSrc[j][i] * lumFilter[j];
232 output_pixel(&yDest[i], val);
236 for (i = 0; i < chrDstW; i++) {
237 int u = 1 << (30-output_bits);
238 int v = 1 << (30-output_bits);
241 for (j = 0; j < chrFilterSize; j++) {
242 u += chrUSrc[j][i] * chrFilter[j];
243 v += chrVSrc[j][i] * chrFilter[j];
246 output_pixel(&uDest[i], u);
247 output_pixel(&vDest[i], v);
251 if (CONFIG_SWSCALE_ALPHA && aDest) {
252 for (i = 0; i < dstW; i++) {
253 int val = 1 << (30-output_bits);
256 for (j = 0; j < lumFilterSize; j++)
257 val += alpSrc[j][i] * lumFilter[j];
259 output_pixel(&aDest[i], val);
265 #define yuv2NBPS(bits, BE_LE, is_be) \
266 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
267 const int16_t **_lumSrc, int lumFilterSize, \
268 const int16_t *chrFilter, const int16_t **_chrUSrc, \
269 const int16_t **_chrVSrc, \
270 int chrFilterSize, const int16_t **_alpSrc, \
271 uint8_t *_dest[4], int dstW, int chrDstW) \
273 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
274 **chrUSrc = (const int32_t **) _chrUSrc, \
275 **chrVSrc = (const int32_t **) _chrVSrc, \
276 **alpSrc = (const int32_t **) _alpSrc; \
277 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
278 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
279 alpSrc, (uint16_t **) _dest, \
280 dstW, chrDstW, is_be, bits); \
289 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
290 const int16_t **lumSrc, int lumFilterSize,
291 const int16_t *chrFilter, const int16_t **chrUSrc,
292 const int16_t **chrVSrc,
293 int chrFilterSize, const int16_t **alpSrc,
294 uint8_t *dest[4], int dstW, int chrDstW)
296 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
297 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
300 //FIXME Optimize (just quickly written not optimized..)
301 for (i=0; i<dstW; i++) {
304 for (j=0; j<lumFilterSize; j++)
305 val += lumSrc[j][i] * lumFilter[j];
307 yDest[i]= av_clip_uint8(val>>19);
311 for (i=0; i<chrDstW; i++) {
315 for (j=0; j<chrFilterSize; j++) {
316 u += chrUSrc[j][i] * chrFilter[j];
317 v += chrVSrc[j][i] * chrFilter[j];
320 uDest[i]= av_clip_uint8(u>>19);
321 vDest[i]= av_clip_uint8(v>>19);
324 if (CONFIG_SWSCALE_ALPHA && aDest)
325 for (i=0; i<dstW; i++) {
328 for (j=0; j<lumFilterSize; j++)
329 val += alpSrc[j][i] * lumFilter[j];
331 aDest[i]= av_clip_uint8(val>>19);
335 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
336 const int16_t *chrUSrc, const int16_t *chrVSrc,
337 const int16_t *alpSrc,
338 uint8_t *dest[4], int dstW, int chrDstW)
340 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
341 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
344 for (i=0; i<dstW; i++) {
345 int val= (lumSrc[i]+64)>>7;
346 yDest[i]= av_clip_uint8(val);
350 for (i=0; i<chrDstW; i++) {
351 int u=(chrUSrc[i]+64)>>7;
352 int v=(chrVSrc[i]+64)>>7;
353 uDest[i]= av_clip_uint8(u);
354 vDest[i]= av_clip_uint8(v);
357 if (CONFIG_SWSCALE_ALPHA && aDest)
358 for (i=0; i<dstW; i++) {
359 int val= (alpSrc[i]+64)>>7;
360 aDest[i]= av_clip_uint8(val);
364 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
365 const int16_t **lumSrc, int lumFilterSize,
366 const int16_t *chrFilter, const int16_t **chrUSrc,
367 const int16_t **chrVSrc, int chrFilterSize,
368 const int16_t **alpSrc, uint8_t *dest[4],
369 int dstW, int chrDstW)
371 uint8_t *yDest = dest[0], *uDest = dest[1];
372 enum PixelFormat dstFormat = c->dstFormat;
374 //FIXME Optimize (just quickly written not optimized..)
376 for (i=0; i<dstW; i++) {
379 for (j=0; j<lumFilterSize; j++)
380 val += lumSrc[j][i] * lumFilter[j];
382 yDest[i]= av_clip_uint8(val>>19);
388 if (dstFormat == PIX_FMT_NV12)
389 for (i=0; i<chrDstW; i++) {
393 for (j=0; j<chrFilterSize; j++) {
394 u += chrUSrc[j][i] * chrFilter[j];
395 v += chrVSrc[j][i] * chrFilter[j];
398 uDest[2*i]= av_clip_uint8(u>>19);
399 uDest[2*i+1]= av_clip_uint8(v>>19);
402 for (i=0; i<chrDstW; i++) {
406 for (j=0; j<chrFilterSize; j++) {
407 u += chrUSrc[j][i] * chrFilter[j];
408 v += chrVSrc[j][i] * chrFilter[j];
411 uDest[2*i]= av_clip_uint8(v>>19);
412 uDest[2*i+1]= av_clip_uint8(u>>19);
416 #define output_pixel(pos, val) \
417 if (target == PIX_FMT_GRAY16BE) { \
423 static av_always_inline void
424 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
425 const int32_t **lumSrc, int lumFilterSize,
426 const int16_t *chrFilter, const int32_t **chrUSrc,
427 const int32_t **chrVSrc, int chrFilterSize,
428 const int32_t **alpSrc, uint16_t *dest, int dstW,
429 int y, enum PixelFormat target)
433 for (i = 0; i < (dstW >> 1); i++) {
438 for (j = 0; j < lumFilterSize; j++) {
439 Y1 += lumSrc[j][i * 2] * lumFilter[j];
440 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
444 if ((Y1 | Y2) & 0x10000) {
445 Y1 = av_clip_uint16(Y1);
446 Y2 = av_clip_uint16(Y2);
448 output_pixel(&dest[i * 2 + 0], Y1);
449 output_pixel(&dest[i * 2 + 1], Y2);
453 static av_always_inline void
454 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
455 const int32_t *ubuf[2], const int32_t *vbuf[2],
456 const int32_t *abuf[2], uint16_t *dest, int dstW,
457 int yalpha, int uvalpha, int y,
458 enum PixelFormat target)
460 int yalpha1 = 4095 - yalpha;
462 const int32_t *buf0 = buf[0], *buf1 = buf[1];
464 for (i = 0; i < (dstW >> 1); i++) {
465 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
466 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
468 output_pixel(&dest[i * 2 + 0], Y1);
469 output_pixel(&dest[i * 2 + 1], Y2);
473 static av_always_inline void
474 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
475 const int32_t *ubuf[2], const int32_t *vbuf[2],
476 const int32_t *abuf0, uint16_t *dest, int dstW,
477 int uvalpha, int y, enum PixelFormat target)
481 for (i = 0; i < (dstW >> 1); i++) {
482 int Y1 = buf0[i * 2 ] << 1;
483 int Y2 = buf0[i * 2 + 1] << 1;
485 output_pixel(&dest[i * 2 + 0], Y1);
486 output_pixel(&dest[i * 2 + 1], Y2);
492 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
493 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
494 const int16_t **_lumSrc, int lumFilterSize, \
495 const int16_t *chrFilter, const int16_t **_chrUSrc, \
496 const int16_t **_chrVSrc, int chrFilterSize, \
497 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
500 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
501 **chrUSrc = (const int32_t **) _chrUSrc, \
502 **chrVSrc = (const int32_t **) _chrVSrc, \
503 **alpSrc = (const int32_t **) _alpSrc; \
504 uint16_t *dest = (uint16_t *) _dest; \
505 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
506 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
507 alpSrc, dest, dstW, y, fmt); \
510 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
511 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
512 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
513 int yalpha, int uvalpha, int y) \
515 const int32_t **buf = (const int32_t **) _buf, \
516 **ubuf = (const int32_t **) _ubuf, \
517 **vbuf = (const int32_t **) _vbuf, \
518 **abuf = (const int32_t **) _abuf; \
519 uint16_t *dest = (uint16_t *) _dest; \
520 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
521 dest, dstW, yalpha, uvalpha, y, fmt); \
524 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
525 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
526 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
527 int uvalpha, int y) \
529 const int32_t *buf0 = (const int32_t *) _buf0, \
530 **ubuf = (const int32_t **) _ubuf, \
531 **vbuf = (const int32_t **) _vbuf, \
532 *abuf0 = (const int32_t *) _abuf0; \
533 uint16_t *dest = (uint16_t *) _dest; \
534 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
535 dstW, uvalpha, y, fmt); \
538 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
539 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
541 #define output_pixel(pos, acc) \
542 if (target == PIX_FMT_MONOBLACK) { \
548 static av_always_inline void
549 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
550 const int16_t **lumSrc, int lumFilterSize,
551 const int16_t *chrFilter, const int16_t **chrUSrc,
552 const int16_t **chrVSrc, int chrFilterSize,
553 const int16_t **alpSrc, uint8_t *dest, int dstW,
554 int y, enum PixelFormat target)
556 const uint8_t * const d128=dither_8x8_220[y&7];
557 uint8_t *g = c->table_gU[128] + c->table_gV[128];
561 for (i = 0; i < dstW - 1; i += 2) {
566 for (j = 0; j < lumFilterSize; j++) {
567 Y1 += lumSrc[j][i] * lumFilter[j];
568 Y2 += lumSrc[j][i+1] * lumFilter[j];
572 if ((Y1 | Y2) & 0x100) {
573 Y1 = av_clip_uint8(Y1);
574 Y2 = av_clip_uint8(Y2);
576 acc += acc + g[Y1 + d128[(i + 0) & 7]];
577 acc += acc + g[Y2 + d128[(i + 1) & 7]];
579 output_pixel(*dest++, acc);
584 static av_always_inline void
585 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
586 const int16_t *ubuf[2], const int16_t *vbuf[2],
587 const int16_t *abuf[2], uint8_t *dest, int dstW,
588 int yalpha, int uvalpha, int y,
589 enum PixelFormat target)
591 const int16_t *buf0 = buf[0], *buf1 = buf[1];
592 const uint8_t * const d128 = dither_8x8_220[y & 7];
593 uint8_t *g = c->table_gU[128] + c->table_gV[128];
594 int yalpha1 = 4095 - yalpha;
597 for (i = 0; i < dstW - 7; i += 8) {
598 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
599 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
600 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
601 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
602 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
603 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
604 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
605 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
606 output_pixel(*dest++, acc);
610 static av_always_inline void
611 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
612 const int16_t *ubuf[2], const int16_t *vbuf[2],
613 const int16_t *abuf0, uint8_t *dest, int dstW,
614 int uvalpha, int y, enum PixelFormat target)
616 const uint8_t * const d128 = dither_8x8_220[y & 7];
617 uint8_t *g = c->table_gU[128] + c->table_gV[128];
620 for (i = 0; i < dstW - 7; i += 8) {
621 int acc = g[(buf0[i ] >> 7) + d128[0]];
622 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
623 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
624 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
625 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
626 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
627 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
628 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
629 output_pixel(*dest++, acc);
635 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
636 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
637 const int16_t **lumSrc, int lumFilterSize, \
638 const int16_t *chrFilter, const int16_t **chrUSrc, \
639 const int16_t **chrVSrc, int chrFilterSize, \
640 const int16_t **alpSrc, uint8_t *dest, int dstW, \
643 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
644 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
645 alpSrc, dest, dstW, y, fmt); \
648 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
649 const int16_t *ubuf[2], const int16_t *vbuf[2], \
650 const int16_t *abuf[2], uint8_t *dest, int dstW, \
651 int yalpha, int uvalpha, int y) \
653 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
654 dest, dstW, yalpha, uvalpha, y, fmt); \
657 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
658 const int16_t *ubuf[2], const int16_t *vbuf[2], \
659 const int16_t *abuf0, uint8_t *dest, int dstW, \
660 int uvalpha, int y) \
662 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
663 abuf0, dest, dstW, uvalpha, \
667 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
668 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
670 #define output_pixels(pos, Y1, U, Y2, V) \
671 if (target == PIX_FMT_YUYV422) { \
672 dest[pos + 0] = Y1; \
674 dest[pos + 2] = Y2; \
678 dest[pos + 1] = Y1; \
680 dest[pos + 3] = Y2; \
683 static av_always_inline void
684 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
685 const int16_t **lumSrc, int lumFilterSize,
686 const int16_t *chrFilter, const int16_t **chrUSrc,
687 const int16_t **chrVSrc, int chrFilterSize,
688 const int16_t **alpSrc, uint8_t *dest, int dstW,
689 int y, enum PixelFormat target)
693 for (i = 0; i < (dstW >> 1); i++) {
700 for (j = 0; j < lumFilterSize; j++) {
701 Y1 += lumSrc[j][i * 2] * lumFilter[j];
702 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
704 for (j = 0; j < chrFilterSize; j++) {
705 U += chrUSrc[j][i] * chrFilter[j];
706 V += chrVSrc[j][i] * chrFilter[j];
712 if ((Y1 | Y2 | U | V) & 0x100) {
713 Y1 = av_clip_uint8(Y1);
714 Y2 = av_clip_uint8(Y2);
715 U = av_clip_uint8(U);
716 V = av_clip_uint8(V);
718 output_pixels(4*i, Y1, U, Y2, V);
722 static av_always_inline void
723 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
724 const int16_t *ubuf[2], const int16_t *vbuf[2],
725 const int16_t *abuf[2], uint8_t *dest, int dstW,
726 int yalpha, int uvalpha, int y,
727 enum PixelFormat target)
729 const int16_t *buf0 = buf[0], *buf1 = buf[1],
730 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
731 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
732 int yalpha1 = 4095 - yalpha;
733 int uvalpha1 = 4095 - uvalpha;
736 for (i = 0; i < (dstW >> 1); i++) {
737 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
738 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
739 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
740 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
742 output_pixels(i * 4, Y1, U, Y2, V);
746 static av_always_inline void
747 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
748 const int16_t *ubuf[2], const int16_t *vbuf[2],
749 const int16_t *abuf0, uint8_t *dest, int dstW,
750 int uvalpha, int y, enum PixelFormat target)
752 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
753 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
756 if (uvalpha < 2048) {
757 for (i = 0; i < (dstW >> 1); i++) {
758 int Y1 = buf0[i * 2] >> 7;
759 int Y2 = buf0[i * 2 + 1] >> 7;
760 int U = ubuf1[i] >> 7;
761 int V = vbuf1[i] >> 7;
763 output_pixels(i * 4, Y1, U, Y2, V);
766 for (i = 0; i < (dstW >> 1); i++) {
767 int Y1 = buf0[i * 2] >> 7;
768 int Y2 = buf0[i * 2 + 1] >> 7;
769 int U = (ubuf0[i] + ubuf1[i]) >> 8;
770 int V = (vbuf0[i] + vbuf1[i]) >> 8;
772 output_pixels(i * 4, Y1, U, Y2, V);
779 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
780 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
782 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
783 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
784 #define output_pixel(pos, val) \
785 if (isBE(target)) { \
791 static av_always_inline void
792 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
793 const int32_t **lumSrc, int lumFilterSize,
794 const int16_t *chrFilter, const int32_t **chrUSrc,
795 const int32_t **chrVSrc, int chrFilterSize,
796 const int32_t **alpSrc, uint16_t *dest, int dstW,
797 int y, enum PixelFormat target)
801 for (i = 0; i < (dstW >> 1); i++) {
805 int U = -128 << 23; // 19
809 for (j = 0; j < lumFilterSize; j++) {
810 Y1 += lumSrc[j][i * 2] * lumFilter[j];
811 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
813 for (j = 0; j < chrFilterSize; j++) {
814 U += chrUSrc[j][i] * chrFilter[j];
815 V += chrVSrc[j][i] * chrFilter[j];
818 // 8bit: 12+15=27; 16-bit: 12+19=31
824 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
825 Y1 -= c->yuv2rgb_y_offset;
826 Y2 -= c->yuv2rgb_y_offset;
827 Y1 *= c->yuv2rgb_y_coeff;
828 Y2 *= c->yuv2rgb_y_coeff;
831 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
833 R = V * c->yuv2rgb_v2r_coeff;
834 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
835 B = U * c->yuv2rgb_u2b_coeff;
837 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
838 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
839 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
840 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
841 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
842 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
843 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
848 static av_always_inline void
849 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
850 const int32_t *ubuf[2], const int32_t *vbuf[2],
851 const int32_t *abuf[2], uint16_t *dest, int dstW,
852 int yalpha, int uvalpha, int y,
853 enum PixelFormat target)
855 const int32_t *buf0 = buf[0], *buf1 = buf[1],
856 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
857 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
858 int yalpha1 = 4095 - yalpha;
859 int uvalpha1 = 4095 - uvalpha;
862 for (i = 0; i < (dstW >> 1); i++) {
863 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
864 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
865 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
866 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
869 Y1 -= c->yuv2rgb_y_offset;
870 Y2 -= c->yuv2rgb_y_offset;
871 Y1 *= c->yuv2rgb_y_coeff;
872 Y2 *= c->yuv2rgb_y_coeff;
876 R = V * c->yuv2rgb_v2r_coeff;
877 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
878 B = U * c->yuv2rgb_u2b_coeff;
880 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
881 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
882 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
883 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
884 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
885 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
890 static av_always_inline void
891 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
892 const int32_t *ubuf[2], const int32_t *vbuf[2],
893 const int32_t *abuf0, uint16_t *dest, int dstW,
894 int uvalpha, int y, enum PixelFormat target)
896 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
897 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
900 if (uvalpha < 2048) {
901 for (i = 0; i < (dstW >> 1); i++) {
902 int Y1 = (buf0[i * 2] ) >> 2;
903 int Y2 = (buf0[i * 2 + 1]) >> 2;
904 int U = (ubuf0[i] + (-128 << 11)) >> 2;
905 int V = (vbuf0[i] + (-128 << 11)) >> 2;
908 Y1 -= c->yuv2rgb_y_offset;
909 Y2 -= c->yuv2rgb_y_offset;
910 Y1 *= c->yuv2rgb_y_coeff;
911 Y2 *= c->yuv2rgb_y_coeff;
915 R = V * c->yuv2rgb_v2r_coeff;
916 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
917 B = U * c->yuv2rgb_u2b_coeff;
919 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
920 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
921 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
922 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
923 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
924 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
928 for (i = 0; i < (dstW >> 1); i++) {
929 int Y1 = (buf0[i * 2] ) >> 2;
930 int Y2 = (buf0[i * 2 + 1]) >> 2;
931 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
932 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
935 Y1 -= c->yuv2rgb_y_offset;
936 Y2 -= c->yuv2rgb_y_offset;
937 Y1 *= c->yuv2rgb_y_coeff;
938 Y2 *= c->yuv2rgb_y_coeff;
942 R = V * c->yuv2rgb_v2r_coeff;
943 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
944 B = U * c->yuv2rgb_u2b_coeff;
946 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
947 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
948 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
949 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
950 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
951 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
961 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
962 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
963 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
964 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
966 static av_always_inline void
967 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
968 int U, int V, int A1, int A2,
969 const void *_r, const void *_g, const void *_b, int y,
970 enum PixelFormat target, int hasAlpha)
972 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
973 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
974 uint32_t *dest = (uint32_t *) _dest;
975 const uint32_t *r = (const uint32_t *) _r;
976 const uint32_t *g = (const uint32_t *) _g;
977 const uint32_t *b = (const uint32_t *) _b;
980 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
982 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
983 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
986 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
988 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
989 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
991 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
992 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
995 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
996 uint8_t *dest = (uint8_t *) _dest;
997 const uint8_t *r = (const uint8_t *) _r;
998 const uint8_t *g = (const uint8_t *) _g;
999 const uint8_t *b = (const uint8_t *) _b;
1001 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1002 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1003 dest[i * 6 + 0] = r_b[Y1];
1004 dest[i * 6 + 1] = g[Y1];
1005 dest[i * 6 + 2] = b_r[Y1];
1006 dest[i * 6 + 3] = r_b[Y2];
1007 dest[i * 6 + 4] = g[Y2];
1008 dest[i * 6 + 5] = b_r[Y2];
1011 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1012 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1013 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1014 uint16_t *dest = (uint16_t *) _dest;
1015 const uint16_t *r = (const uint16_t *) _r;
1016 const uint16_t *g = (const uint16_t *) _g;
1017 const uint16_t *b = (const uint16_t *) _b;
1018 int dr1, dg1, db1, dr2, dg2, db2;
1020 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1021 dr1 = dither_2x2_8[ y & 1 ][0];
1022 dg1 = dither_2x2_4[ y & 1 ][0];
1023 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1024 dr2 = dither_2x2_8[ y & 1 ][1];
1025 dg2 = dither_2x2_4[ y & 1 ][1];
1026 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1027 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1028 dr1 = dither_2x2_8[ y & 1 ][0];
1029 dg1 = dither_2x2_8[ y & 1 ][1];
1030 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1031 dr2 = dither_2x2_8[ y & 1 ][1];
1032 dg2 = dither_2x2_8[ y & 1 ][0];
1033 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1035 dr1 = dither_4x4_16[ y & 3 ][0];
1036 dg1 = dither_4x4_16[ y & 3 ][1];
1037 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1038 dr2 = dither_4x4_16[ y & 3 ][1];
1039 dg2 = dither_4x4_16[ y & 3 ][0];
1040 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1043 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1044 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1045 } else /* 8/4-bit */ {
1046 uint8_t *dest = (uint8_t *) _dest;
1047 const uint8_t *r = (const uint8_t *) _r;
1048 const uint8_t *g = (const uint8_t *) _g;
1049 const uint8_t *b = (const uint8_t *) _b;
1050 int dr1, dg1, db1, dr2, dg2, db2;
1052 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1053 const uint8_t * const d64 = dither_8x8_73[y & 7];
1054 const uint8_t * const d32 = dither_8x8_32[y & 7];
1055 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1056 db1 = d64[(i * 2 + 0) & 7];
1057 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1058 db2 = d64[(i * 2 + 1) & 7];
1060 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1061 const uint8_t * const d128 = dither_8x8_220[y & 7];
1062 dr1 = db1 = d128[(i * 2 + 0) & 7];
1063 dg1 = d64[(i * 2 + 0) & 7];
1064 dr2 = db2 = d128[(i * 2 + 1) & 7];
1065 dg2 = d64[(i * 2 + 1) & 7];
1068 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1069 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1070 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1072 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1073 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1078 static av_always_inline void
1079 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1080 const int16_t **lumSrc, int lumFilterSize,
1081 const int16_t *chrFilter, const int16_t **chrUSrc,
1082 const int16_t **chrVSrc, int chrFilterSize,
1083 const int16_t **alpSrc, uint8_t *dest, int dstW,
1084 int y, enum PixelFormat target, int hasAlpha)
1088 for (i = 0; i < (dstW >> 1); i++) {
1094 int av_unused A1, A2;
1095 const void *r, *g, *b;
1097 for (j = 0; j < lumFilterSize; j++) {
1098 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1099 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1101 for (j = 0; j < chrFilterSize; j++) {
1102 U += chrUSrc[j][i] * chrFilter[j];
1103 V += chrVSrc[j][i] * chrFilter[j];
1109 if ((Y1 | Y2 | U | V) & 0x100) {
1110 Y1 = av_clip_uint8(Y1);
1111 Y2 = av_clip_uint8(Y2);
1112 U = av_clip_uint8(U);
1113 V = av_clip_uint8(V);
1118 for (j = 0; j < lumFilterSize; j++) {
1119 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1120 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1124 if ((A1 | A2) & 0x100) {
1125 A1 = av_clip_uint8(A1);
1126 A2 = av_clip_uint8(A2);
1130 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1132 g = (c->table_gU[U] + c->table_gV[V]);
1135 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1136 r, g, b, y, target, hasAlpha);
1140 static av_always_inline void
1141 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1142 const int16_t *ubuf[2], const int16_t *vbuf[2],
1143 const int16_t *abuf[2], uint8_t *dest, int dstW,
1144 int yalpha, int uvalpha, int y,
1145 enum PixelFormat target, int hasAlpha)
1147 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1148 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1149 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1150 *abuf0 = abuf[0], *abuf1 = abuf[1];
1151 int yalpha1 = 4095 - yalpha;
1152 int uvalpha1 = 4095 - uvalpha;
1155 for (i = 0; i < (dstW >> 1); i++) {
1156 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1157 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1158 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1159 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1161 const void *r = c->table_rV[V],
1162 *g = (c->table_gU[U] + c->table_gV[V]),
1163 *b = c->table_bU[U];
1166 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1167 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1170 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1171 r, g, b, y, target, hasAlpha);
1175 static av_always_inline void
1176 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1177 const int16_t *ubuf[2], const int16_t *vbuf[2],
1178 const int16_t *abuf0, uint8_t *dest, int dstW,
1179 int uvalpha, int y, enum PixelFormat target,
1182 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1183 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1186 if (uvalpha < 2048) {
1187 for (i = 0; i < (dstW >> 1); i++) {
1188 int Y1 = buf0[i * 2] >> 7;
1189 int Y2 = buf0[i * 2 + 1] >> 7;
1190 int U = ubuf1[i] >> 7;
1191 int V = vbuf1[i] >> 7;
1193 const void *r = c->table_rV[V],
1194 *g = (c->table_gU[U] + c->table_gV[V]),
1195 *b = c->table_bU[U];
1198 A1 = abuf0[i * 2 ] >> 7;
1199 A2 = abuf0[i * 2 + 1] >> 7;
1202 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1203 r, g, b, y, target, hasAlpha);
1206 for (i = 0; i < (dstW >> 1); i++) {
1207 int Y1 = buf0[i * 2] >> 7;
1208 int Y2 = buf0[i * 2 + 1] >> 7;
1209 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1210 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1212 const void *r = c->table_rV[V],
1213 *g = (c->table_gU[U] + c->table_gV[V]),
1214 *b = c->table_bU[U];
1217 A1 = abuf0[i * 2 ] >> 7;
1218 A2 = abuf0[i * 2 + 1] >> 7;
1221 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1222 r, g, b, y, target, hasAlpha);
1227 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1228 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1229 const int16_t **lumSrc, int lumFilterSize, \
1230 const int16_t *chrFilter, const int16_t **chrUSrc, \
1231 const int16_t **chrVSrc, int chrFilterSize, \
1232 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1235 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1236 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1237 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1239 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1240 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1241 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1242 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1243 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1244 int yalpha, int uvalpha, int y) \
1246 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1247 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1250 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1251 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1252 const int16_t *abuf0, uint8_t *dest, int dstW, \
1253 int uvalpha, int y) \
1255 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1256 dstW, uvalpha, y, fmt, hasAlpha); \
1260 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1261 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1263 #if CONFIG_SWSCALE_ALPHA
1264 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1265 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1267 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1268 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1270 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1271 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1272 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1273 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1274 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1275 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1276 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1277 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1279 static av_always_inline void
1280 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1281 const int16_t **lumSrc, int lumFilterSize,
1282 const int16_t *chrFilter, const int16_t **chrUSrc,
1283 const int16_t **chrVSrc, int chrFilterSize,
1284 const int16_t **alpSrc, uint8_t *dest,
1285 int dstW, int y, enum PixelFormat target, int hasAlpha)
1288 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1290 for (i = 0; i < dstW; i++) {
1298 for (j = 0; j < lumFilterSize; j++) {
1299 Y += lumSrc[j][i] * lumFilter[j];
1301 for (j = 0; j < chrFilterSize; j++) {
1302 U += chrUSrc[j][i] * chrFilter[j];
1303 V += chrVSrc[j][i] * chrFilter[j];
1310 for (j = 0; j < lumFilterSize; j++) {
1311 A += alpSrc[j][i] * lumFilter[j];
1315 A = av_clip_uint8(A);
1317 Y -= c->yuv2rgb_y_offset;
1318 Y *= c->yuv2rgb_y_coeff;
1320 R = Y + V*c->yuv2rgb_v2r_coeff;
1321 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1322 B = Y + U*c->yuv2rgb_u2b_coeff;
1323 if ((R | G | B) & 0xC0000000) {
1324 R = av_clip_uintp2(R, 30);
1325 G = av_clip_uintp2(G, 30);
1326 B = av_clip_uintp2(B, 30);
1331 dest[0] = hasAlpha ? A : 255;
1345 dest[3] = hasAlpha ? A : 255;
1348 dest[0] = hasAlpha ? A : 255;
1363 dest[3] = hasAlpha ? A : 255;
1371 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1372 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1373 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1374 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1376 #if CONFIG_SWSCALE_ALPHA
1377 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1378 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1379 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1380 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1382 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1383 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1384 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1385 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1387 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1388 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1390 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1391 int width, int height,
1395 uint8_t *ptr = plane + stride*y;
1396 for (i=0; i<height; i++) {
1397 memset(ptr, val, width);
1402 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1404 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1405 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1407 static av_always_inline void
1408 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1409 enum PixelFormat origin)
1412 for (i = 0; i < width; i++) {
1413 unsigned int r_b = input_pixel(&src[i*3+0]);
1414 unsigned int g = input_pixel(&src[i*3+1]);
1415 unsigned int b_r = input_pixel(&src[i*3+2]);
1417 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1421 static av_always_inline void
1422 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1423 const uint16_t *src1, const uint16_t *src2,
1424 int width, enum PixelFormat origin)
1428 for (i = 0; i < width; i++) {
1429 int r_b = input_pixel(&src1[i*3+0]);
1430 int g = input_pixel(&src1[i*3+1]);
1431 int b_r = input_pixel(&src1[i*3+2]);
1433 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1434 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1438 static av_always_inline void
1439 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1440 const uint16_t *src1, const uint16_t *src2,
1441 int width, enum PixelFormat origin)
1445 for (i = 0; i < width; i++) {
1446 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1447 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1448 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1450 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1451 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1459 #define rgb48funcs(pattern, BE_LE, origin) \
1460 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1461 int width, uint32_t *unused) \
1463 const uint16_t *src = (const uint16_t *) _src; \
1464 uint16_t *dst = (uint16_t *) _dst; \
1465 rgb48ToY_c_template(dst, src, width, origin); \
1468 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1469 const uint8_t *_src1, const uint8_t *_src2, \
1470 int width, uint32_t *unused) \
1472 const uint16_t *src1 = (const uint16_t *) _src1, \
1473 *src2 = (const uint16_t *) _src2; \
1474 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1475 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1478 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1479 const uint8_t *_src1, const uint8_t *_src2, \
1480 int width, uint32_t *unused) \
1482 const uint16_t *src1 = (const uint16_t *) _src1, \
1483 *src2 = (const uint16_t *) _src2; \
1484 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1485 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1488 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1489 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1490 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1491 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1493 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1494 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1495 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1497 static av_always_inline void
1498 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1499 int width, enum PixelFormat origin,
1500 int shr, int shg, int shb, int shp,
1501 int maskr, int maskg, int maskb,
1502 int rsh, int gsh, int bsh, int S)
1504 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1505 rnd = 33 << (S - 1);
1508 for (i = 0; i < width; i++) {
1509 int px = input_pixel(i) >> shp;
1510 int b = (px & maskb) >> shb;
1511 int g = (px & maskg) >> shg;
1512 int r = (px & maskr) >> shr;
1514 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1518 static av_always_inline void
1519 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1520 const uint8_t *src, int width,
1521 enum PixelFormat origin,
1522 int shr, int shg, int shb, int shp,
1523 int maskr, int maskg, int maskb,
1524 int rsh, int gsh, int bsh, int S)
1526 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1527 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1528 rnd = 257 << (S - 1);
1531 for (i = 0; i < width; i++) {
1532 int px = input_pixel(i) >> shp;
1533 int b = (px & maskb) >> shb;
1534 int g = (px & maskg) >> shg;
1535 int r = (px & maskr) >> shr;
1537 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1538 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1542 static av_always_inline void
1543 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1544 const uint8_t *src, int width,
1545 enum PixelFormat origin,
1546 int shr, int shg, int shb, int shp,
1547 int maskr, int maskg, int maskb,
1548 int rsh, int gsh, int bsh, int S)
1550 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1551 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1552 rnd = 257 << S, maskgx = ~(maskr | maskb);
1555 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1556 for (i = 0; i < width; i++) {
1557 int px0 = input_pixel(2 * i + 0) >> shp;
1558 int px1 = input_pixel(2 * i + 1) >> shp;
1559 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1560 int rb = px0 + px1 - g;
1562 b = (rb & maskb) >> shb;
1563 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1564 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1567 g = (g & maskg) >> shg;
1569 r = (rb & maskr) >> shr;
1571 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1572 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1578 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1579 maskg, maskb, rsh, gsh, bsh, S) \
1580 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1581 int width, uint32_t *unused) \
1583 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1584 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1587 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1588 const uint8_t *src, const uint8_t *dummy, \
1589 int width, uint32_t *unused) \
1591 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1592 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1595 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1596 const uint8_t *src, const uint8_t *dummy, \
1597 int width, uint32_t *unused) \
1599 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1600 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1603 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1604 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1605 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1606 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1607 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1608 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1609 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1610 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1611 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1612 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1613 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1614 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1616 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1619 for (i=0; i<width; i++) {
1624 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1627 for (i=0; i<width; i++) {
1632 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1635 for (i=0; i<width; i++) {
1638 dst[i]= pal[d] & 0xFF;
1642 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1643 const uint8_t *src1, const uint8_t *src2,
1644 int width, uint32_t *pal)
1647 assert(src1 == src2);
1648 for (i=0; i<width; i++) {
1649 int p= pal[src1[i]];
1656 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1657 int width, uint32_t *unused)
1660 for (i=0; i<width/8; i++) {
1663 dst[8*i+j]= ((d>>(7-j))&1)*255;
1667 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1668 int width, uint32_t *unused)
1671 for (i=0; i<width/8; i++) {
1674 dst[8*i+j]= ((d>>(7-j))&1)*255;
1678 //FIXME yuy2* can read up to 7 samples too much
1680 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1684 for (i=0; i<width; i++)
1688 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1689 const uint8_t *src2, int width, uint32_t *unused)
1692 for (i=0; i<width; i++) {
1693 dstU[i]= src1[4*i + 1];
1694 dstV[i]= src1[4*i + 3];
1696 assert(src1 == src2);
1699 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1702 const uint16_t *src = (const uint16_t *) _src;
1703 uint16_t *dst = (uint16_t *) _dst;
1704 for (i=0; i<width; i++) {
1705 dst[i] = av_bswap16(src[i]);
1709 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1710 const uint8_t *_src2, int width, uint32_t *unused)
1713 const uint16_t *src1 = (const uint16_t *) _src1,
1714 *src2 = (const uint16_t *) _src2;
1715 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1716 for (i=0; i<width; i++) {
1717 dstU[i] = av_bswap16(src1[i]);
1718 dstV[i] = av_bswap16(src2[i]);
1722 /* This is almost identical to the previous, end exists only because
1723 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1724 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1728 for (i=0; i<width; i++)
1732 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1733 const uint8_t *src2, int width, uint32_t *unused)
1736 for (i=0; i<width; i++) {
1737 dstU[i]= src1[4*i + 0];
1738 dstV[i]= src1[4*i + 2];
1740 assert(src1 == src2);
1743 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1744 const uint8_t *src, int width)
1747 for (i = 0; i < width; i++) {
1748 dst1[i] = src[2*i+0];
1749 dst2[i] = src[2*i+1];
1753 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1754 const uint8_t *src1, const uint8_t *src2,
1755 int width, uint32_t *unused)
1757 nvXXtoUV_c(dstU, dstV, src1, width);
1760 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1761 const uint8_t *src1, const uint8_t *src2,
1762 int width, uint32_t *unused)
1764 nvXXtoUV_c(dstV, dstU, src1, width);
1767 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1769 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1770 int width, uint32_t *unused)
1773 for (i=0; i<width; i++) {
1778 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1782 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1783 const uint8_t *src2, int width, uint32_t *unused)
1786 for (i=0; i<width; i++) {
1787 int b= src1[3*i + 0];
1788 int g= src1[3*i + 1];
1789 int r= src1[3*i + 2];
1791 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1792 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1794 assert(src1 == src2);
1797 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1798 const uint8_t *src2, int width, uint32_t *unused)
1801 for (i=0; i<width; i++) {
1802 int b= src1[6*i + 0] + src1[6*i + 3];
1803 int g= src1[6*i + 1] + src1[6*i + 4];
1804 int r= src1[6*i + 2] + src1[6*i + 5];
1806 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1807 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1809 assert(src1 == src2);
1812 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1816 for (i=0; i<width; i++) {
1821 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1825 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1826 const uint8_t *src2, int width, uint32_t *unused)
1830 for (i=0; i<width; i++) {
1831 int r= src1[3*i + 0];
1832 int g= src1[3*i + 1];
1833 int b= src1[3*i + 2];
1835 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1836 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1840 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1841 const uint8_t *src2, int width, uint32_t *unused)
1845 for (i=0; i<width; i++) {
1846 int r= src1[6*i + 0] + src1[6*i + 3];
1847 int g= src1[6*i + 1] + src1[6*i + 4];
1848 int b= src1[6*i + 2] + src1[6*i + 5];
1850 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1851 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1855 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1856 const int16_t *filter,
1857 const int16_t *filterPos, int filterSize)
1860 int32_t *dst = (int32_t *) _dst;
1861 const uint16_t *src = (const uint16_t *) _src;
1862 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1863 int sh = (bits <= 7) ? 11 : (bits - 4);
1865 for (i = 0; i < dstW; i++) {
1867 int srcPos = filterPos[i];
1868 unsigned int val = 0;
1870 for (j = 0; j < filterSize; j++) {
1871 val += src[srcPos + j] * filter[filterSize * i + j];
1873 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1874 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1878 // bilinear / bicubic scaling
1879 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1880 const int16_t *filter, const int16_t *filterPos,
1884 for (i=0; i<dstW; i++) {
1886 int srcPos= filterPos[i];
1888 for (j=0; j<filterSize; j++) {
1889 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1891 //filter += hFilterSize;
1892 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1897 //FIXME all pal and rgb srcFormats could do this convertion as well
1898 //FIXME all scalers more complex than bilinear could do half of this transform
1899 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1902 for (i = 0; i < width; i++) {
1903 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1904 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1907 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1910 for (i = 0; i < width; i++) {
1911 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1912 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1915 static void lumRangeToJpeg_c(int16_t *dst, int width)
1918 for (i = 0; i < width; i++)
1919 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1921 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1924 for (i = 0; i < width; i++)
1925 dst[i] = (dst[i]*14071 + 33561947)>>14;
1928 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1931 int32_t *dstU = (int32_t *) _dstU;
1932 int32_t *dstV = (int32_t *) _dstV;
1933 for (i = 0; i < width; i++) {
1934 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1935 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1938 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1941 int32_t *dstU = (int32_t *) _dstU;
1942 int32_t *dstV = (int32_t *) _dstV;
1943 for (i = 0; i < width; i++) {
1944 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1945 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1948 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1951 int32_t *dst = (int32_t *) _dst;
1952 for (i = 0; i < width; i++)
1953 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1955 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1958 int32_t *dst = (int32_t *) _dst;
1959 for (i = 0; i < width; i++)
1960 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1963 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1964 const uint8_t *src, int srcW, int xInc)
1967 unsigned int xpos=0;
1968 for (i=0;i<dstWidth;i++) {
1969 register unsigned int xx=xpos>>16;
1970 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1971 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1976 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
1979 uint8_t *dst = (uint8_t *) _dst;
1980 for (i = len - 1; i >= 0; i--) {
1981 dst[i * 2] = dst[i * 2 + 1] = src[i];
1985 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
1988 for (i = 0; i < len; i++) {
1989 dst[i] = src[i] >> 4;
1993 // *** horizontal scale Y line to temp buffer
1994 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1995 const uint8_t *src, int srcW, int xInc,
1996 const int16_t *hLumFilter,
1997 const int16_t *hLumFilterPos, int hLumFilterSize,
1998 uint8_t *formatConvBuffer,
1999 uint32_t *pal, int isAlpha)
2001 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2002 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2005 toYV12(formatConvBuffer, src, srcW, pal);
2006 src= formatConvBuffer;
2009 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2010 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2011 src = formatConvBuffer;
2014 if (!c->hyscale_fast) {
2015 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2016 } else { // fast bilinear upscale / crap downscale
2017 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2021 convertRange(dst, dstWidth);
2023 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2024 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2028 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2029 int dstWidth, const uint8_t *src1,
2030 const uint8_t *src2, int srcW, int xInc)
2033 unsigned int xpos=0;
2034 for (i=0;i<dstWidth;i++) {
2035 register unsigned int xx=xpos>>16;
2036 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2037 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2038 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2043 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2044 const uint8_t *src1, const uint8_t *src2,
2045 int srcW, int xInc, const int16_t *hChrFilter,
2046 const int16_t *hChrFilterPos, int hChrFilterSize,
2047 uint8_t *formatConvBuffer, uint32_t *pal)
2050 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
2051 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2052 src1= formatConvBuffer;
2056 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2057 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
2058 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2059 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2060 src1 = formatConvBuffer;
2064 if (!c->hcscale_fast) {
2065 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2066 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2067 } else { // fast bilinear upscale / crap downscale
2068 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2071 if (c->chrConvertRange)
2072 c->chrConvertRange(dst1, dst2, dstWidth);
2074 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2075 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2076 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2080 static av_always_inline void
2081 find_c_packed_planar_out_funcs(SwsContext *c,
2082 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2083 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2084 yuv2packedX_fn *yuv2packedX)
2086 enum PixelFormat dstFormat = c->dstFormat;
2088 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2089 *yuv2yuvX = yuv2nv12X_c;
2090 } else if (is16BPS(dstFormat)) {
2091 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2092 } else if (is9_OR_10BPS(dstFormat)) {
2093 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2094 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2096 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2099 *yuv2yuv1 = yuv2yuv1_c;
2100 *yuv2yuvX = yuv2yuvX_c;
2102 if(c->flags & SWS_FULL_CHR_H_INT) {
2103 switch (dstFormat) {
2106 *yuv2packedX = yuv2rgba32_full_X_c;
2108 #if CONFIG_SWSCALE_ALPHA
2110 *yuv2packedX = yuv2rgba32_full_X_c;
2112 #endif /* CONFIG_SWSCALE_ALPHA */
2114 *yuv2packedX = yuv2rgbx32_full_X_c;
2116 #endif /* !CONFIG_SMALL */
2120 *yuv2packedX = yuv2argb32_full_X_c;
2122 #if CONFIG_SWSCALE_ALPHA
2124 *yuv2packedX = yuv2argb32_full_X_c;
2126 #endif /* CONFIG_SWSCALE_ALPHA */
2128 *yuv2packedX = yuv2xrgb32_full_X_c;
2130 #endif /* !CONFIG_SMALL */
2134 *yuv2packedX = yuv2bgra32_full_X_c;
2136 #if CONFIG_SWSCALE_ALPHA
2138 *yuv2packedX = yuv2bgra32_full_X_c;
2140 #endif /* CONFIG_SWSCALE_ALPHA */
2142 *yuv2packedX = yuv2bgrx32_full_X_c;
2144 #endif /* !CONFIG_SMALL */
2148 *yuv2packedX = yuv2abgr32_full_X_c;
2150 #if CONFIG_SWSCALE_ALPHA
2152 *yuv2packedX = yuv2abgr32_full_X_c;
2154 #endif /* CONFIG_SWSCALE_ALPHA */
2156 *yuv2packedX = yuv2xbgr32_full_X_c;
2158 #endif /* !CONFIG_SMALL */
2161 *yuv2packedX = yuv2rgb24_full_X_c;
2164 *yuv2packedX = yuv2bgr24_full_X_c;
2168 switch (dstFormat) {
2169 case PIX_FMT_GRAY16BE:
2170 *yuv2packed1 = yuv2gray16BE_1_c;
2171 *yuv2packed2 = yuv2gray16BE_2_c;
2172 *yuv2packedX = yuv2gray16BE_X_c;
2174 case PIX_FMT_GRAY16LE:
2175 *yuv2packed1 = yuv2gray16LE_1_c;
2176 *yuv2packed2 = yuv2gray16LE_2_c;
2177 *yuv2packedX = yuv2gray16LE_X_c;
2179 case PIX_FMT_MONOWHITE:
2180 *yuv2packed1 = yuv2monowhite_1_c;
2181 *yuv2packed2 = yuv2monowhite_2_c;
2182 *yuv2packedX = yuv2monowhite_X_c;
2184 case PIX_FMT_MONOBLACK:
2185 *yuv2packed1 = yuv2monoblack_1_c;
2186 *yuv2packed2 = yuv2monoblack_2_c;
2187 *yuv2packedX = yuv2monoblack_X_c;
2189 case PIX_FMT_YUYV422:
2190 *yuv2packed1 = yuv2yuyv422_1_c;
2191 *yuv2packed2 = yuv2yuyv422_2_c;
2192 *yuv2packedX = yuv2yuyv422_X_c;
2194 case PIX_FMT_UYVY422:
2195 *yuv2packed1 = yuv2uyvy422_1_c;
2196 *yuv2packed2 = yuv2uyvy422_2_c;
2197 *yuv2packedX = yuv2uyvy422_X_c;
2199 case PIX_FMT_RGB48LE:
2200 *yuv2packed1 = yuv2rgb48le_1_c;
2201 *yuv2packed2 = yuv2rgb48le_2_c;
2202 *yuv2packedX = yuv2rgb48le_X_c;
2204 case PIX_FMT_RGB48BE:
2205 *yuv2packed1 = yuv2rgb48be_1_c;
2206 *yuv2packed2 = yuv2rgb48be_2_c;
2207 *yuv2packedX = yuv2rgb48be_X_c;
2209 case PIX_FMT_BGR48LE:
2210 *yuv2packed1 = yuv2bgr48le_1_c;
2211 *yuv2packed2 = yuv2bgr48le_2_c;
2212 *yuv2packedX = yuv2bgr48le_X_c;
2214 case PIX_FMT_BGR48BE:
2215 *yuv2packed1 = yuv2bgr48be_1_c;
2216 *yuv2packed2 = yuv2bgr48be_2_c;
2217 *yuv2packedX = yuv2bgr48be_X_c;
2222 *yuv2packed1 = yuv2rgb32_1_c;
2223 *yuv2packed2 = yuv2rgb32_2_c;
2224 *yuv2packedX = yuv2rgb32_X_c;
2226 #if CONFIG_SWSCALE_ALPHA
2228 *yuv2packed1 = yuv2rgba32_1_c;
2229 *yuv2packed2 = yuv2rgba32_2_c;
2230 *yuv2packedX = yuv2rgba32_X_c;
2232 #endif /* CONFIG_SWSCALE_ALPHA */
2234 *yuv2packed1 = yuv2rgbx32_1_c;
2235 *yuv2packed2 = yuv2rgbx32_2_c;
2236 *yuv2packedX = yuv2rgbx32_X_c;
2238 #endif /* !CONFIG_SMALL */
2240 case PIX_FMT_RGB32_1:
2241 case PIX_FMT_BGR32_1:
2243 *yuv2packed1 = yuv2rgb32_1_1_c;
2244 *yuv2packed2 = yuv2rgb32_1_2_c;
2245 *yuv2packedX = yuv2rgb32_1_X_c;
2247 #if CONFIG_SWSCALE_ALPHA
2249 *yuv2packed1 = yuv2rgba32_1_1_c;
2250 *yuv2packed2 = yuv2rgba32_1_2_c;
2251 *yuv2packedX = yuv2rgba32_1_X_c;
2253 #endif /* CONFIG_SWSCALE_ALPHA */
2255 *yuv2packed1 = yuv2rgbx32_1_1_c;
2256 *yuv2packed2 = yuv2rgbx32_1_2_c;
2257 *yuv2packedX = yuv2rgbx32_1_X_c;
2259 #endif /* !CONFIG_SMALL */
2262 *yuv2packed1 = yuv2rgb24_1_c;
2263 *yuv2packed2 = yuv2rgb24_2_c;
2264 *yuv2packedX = yuv2rgb24_X_c;
2267 *yuv2packed1 = yuv2bgr24_1_c;
2268 *yuv2packed2 = yuv2bgr24_2_c;
2269 *yuv2packedX = yuv2bgr24_X_c;
2271 case PIX_FMT_RGB565LE:
2272 case PIX_FMT_RGB565BE:
2273 case PIX_FMT_BGR565LE:
2274 case PIX_FMT_BGR565BE:
2275 *yuv2packed1 = yuv2rgb16_1_c;
2276 *yuv2packed2 = yuv2rgb16_2_c;
2277 *yuv2packedX = yuv2rgb16_X_c;
2279 case PIX_FMT_RGB555LE:
2280 case PIX_FMT_RGB555BE:
2281 case PIX_FMT_BGR555LE:
2282 case PIX_FMT_BGR555BE:
2283 *yuv2packed1 = yuv2rgb15_1_c;
2284 *yuv2packed2 = yuv2rgb15_2_c;
2285 *yuv2packedX = yuv2rgb15_X_c;
2287 case PIX_FMT_RGB444LE:
2288 case PIX_FMT_RGB444BE:
2289 case PIX_FMT_BGR444LE:
2290 case PIX_FMT_BGR444BE:
2291 *yuv2packed1 = yuv2rgb12_1_c;
2292 *yuv2packed2 = yuv2rgb12_2_c;
2293 *yuv2packedX = yuv2rgb12_X_c;
2297 *yuv2packed1 = yuv2rgb8_1_c;
2298 *yuv2packed2 = yuv2rgb8_2_c;
2299 *yuv2packedX = yuv2rgb8_X_c;
2303 *yuv2packed1 = yuv2rgb4_1_c;
2304 *yuv2packed2 = yuv2rgb4_2_c;
2305 *yuv2packedX = yuv2rgb4_X_c;
2307 case PIX_FMT_RGB4_BYTE:
2308 case PIX_FMT_BGR4_BYTE:
2309 *yuv2packed1 = yuv2rgb4b_1_c;
2310 *yuv2packed2 = yuv2rgb4b_2_c;
2311 *yuv2packedX = yuv2rgb4b_X_c;
2317 #define DEBUG_SWSCALE_BUFFERS 0
2318 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2320 static int swScale(SwsContext *c, const uint8_t* src[],
2321 int srcStride[], int srcSliceY,
2322 int srcSliceH, uint8_t* dst[], int dstStride[])
2324 /* load a few things into local vars to make the code more readable? and faster */
2325 const int srcW= c->srcW;
2326 const int dstW= c->dstW;
2327 const int dstH= c->dstH;
2328 const int chrDstW= c->chrDstW;
2329 const int chrSrcW= c->chrSrcW;
2330 const int lumXInc= c->lumXInc;
2331 const int chrXInc= c->chrXInc;
2332 const enum PixelFormat dstFormat= c->dstFormat;
2333 const int flags= c->flags;
2334 int16_t *vLumFilterPos= c->vLumFilterPos;
2335 int16_t *vChrFilterPos= c->vChrFilterPos;
2336 int16_t *hLumFilterPos= c->hLumFilterPos;
2337 int16_t *hChrFilterPos= c->hChrFilterPos;
2338 int16_t *vLumFilter= c->vLumFilter;
2339 int16_t *vChrFilter= c->vChrFilter;
2340 int16_t *hLumFilter= c->hLumFilter;
2341 int16_t *hChrFilter= c->hChrFilter;
2342 int32_t *lumMmxFilter= c->lumMmxFilter;
2343 int32_t *chrMmxFilter= c->chrMmxFilter;
2344 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2345 const int vLumFilterSize= c->vLumFilterSize;
2346 const int vChrFilterSize= c->vChrFilterSize;
2347 const int hLumFilterSize= c->hLumFilterSize;
2348 const int hChrFilterSize= c->hChrFilterSize;
2349 int16_t **lumPixBuf= c->lumPixBuf;
2350 int16_t **chrUPixBuf= c->chrUPixBuf;
2351 int16_t **chrVPixBuf= c->chrVPixBuf;
2352 int16_t **alpPixBuf= c->alpPixBuf;
2353 const int vLumBufSize= c->vLumBufSize;
2354 const int vChrBufSize= c->vChrBufSize;
2355 uint8_t *formatConvBuffer= c->formatConvBuffer;
2356 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2357 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2359 uint32_t *pal=c->pal_yuv;
2360 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2361 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2362 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2363 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2364 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2366 /* vars which will change and which we need to store back in the context */
2368 int lumBufIndex= c->lumBufIndex;
2369 int chrBufIndex= c->chrBufIndex;
2370 int lastInLumBuf= c->lastInLumBuf;
2371 int lastInChrBuf= c->lastInChrBuf;
2373 if (isPacked(c->srcFormat)) {
2381 srcStride[3]= srcStride[0];
2383 srcStride[1]<<= c->vChrDrop;
2384 srcStride[2]<<= c->vChrDrop;
2386 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2387 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2388 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2389 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2390 srcSliceY, srcSliceH, dstY, dstH);
2391 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2392 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2394 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2395 static int warnedAlready=0; //FIXME move this into the context perhaps
2396 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2397 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2398 " ->cannot do aligned memory accesses anymore\n");
2403 /* Note the user might start scaling the picture in the middle so this
2404 will not get executed. This is not really intended but works
2405 currently, so people might do it. */
2406 if (srcSliceY ==0) {
2416 for (;dstY < dstH; dstY++) {
2417 const int chrDstY= dstY>>c->chrDstVSubSample;
2418 uint8_t *dest[4] = {
2419 dst[0] + dstStride[0] * dstY,
2420 dst[1] + dstStride[1] * chrDstY,
2421 dst[2] + dstStride[2] * chrDstY,
2422 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2425 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2426 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2427 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2428 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2429 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2430 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2433 //handle holes (FAST_BILINEAR & weird filters)
2434 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2435 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2436 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2437 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2439 DEBUG_BUFFERS("dstY: %d\n", dstY);
2440 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2441 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2442 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2443 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2445 // Do we have enough lines in this slice to output the dstY line
2446 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2448 if (!enough_lines) {
2449 lastLumSrcY = srcSliceY + srcSliceH - 1;
2450 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2451 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2452 lastLumSrcY, lastChrSrcY);
2455 //Do horizontal scaling
2456 while(lastInLumBuf < lastLumSrcY) {
2457 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2458 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2460 assert(lumBufIndex < 2*vLumBufSize);
2461 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2462 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2463 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2464 hLumFilter, hLumFilterPos, hLumFilterSize,
2467 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2468 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2469 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2473 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2474 lumBufIndex, lastInLumBuf);
2476 while(lastInChrBuf < lastChrSrcY) {
2477 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2478 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2480 assert(chrBufIndex < 2*vChrBufSize);
2481 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2482 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2483 //FIXME replace parameters through context struct (some at least)
2485 if (c->needs_hcscale)
2486 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2487 chrDstW, src1, src2, chrSrcW, chrXInc,
2488 hChrFilter, hChrFilterPos, hChrFilterSize,
2489 formatConvBuffer, pal);
2491 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2492 chrBufIndex, lastInChrBuf);
2494 //wrap buf index around to stay inside the ring buffer
2495 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2496 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2498 break; //we can't output a dstY line so let's try with the next slice
2501 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2503 if (dstY >= dstH-2) {
2504 // hmm looks like we can't use MMX here without overwriting this array's tail
2505 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2506 &yuv2packed1, &yuv2packed2,
2511 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2512 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2513 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2514 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2515 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2516 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2517 if ((dstY&chrSkipMask) || isGray(dstFormat))
2518 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2519 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2520 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2521 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2522 dest, dstW, chrDstW);
2523 } else { //General YV12
2524 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2525 lumSrcPtr, vLumFilterSize,
2526 vChrFilter + chrDstY * vChrFilterSize,
2527 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2528 alpSrcPtr, dest, dstW, chrDstW);
2531 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2532 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2533 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2534 int chrAlpha = vChrFilter[2 * dstY + 1];
2535 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2536 alpPixBuf ? *alpSrcPtr : NULL,
2537 dest[0], dstW, chrAlpha, dstY);
2538 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2539 int lumAlpha = vLumFilter[2 * dstY + 1];
2540 int chrAlpha = vChrFilter[2 * dstY + 1];
2542 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2544 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2545 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2546 alpPixBuf ? alpSrcPtr : NULL,
2547 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2548 } else { //general RGB
2549 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2550 lumSrcPtr, vLumFilterSize,
2551 vChrFilter + dstY * vChrFilterSize,
2552 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2553 alpSrcPtr, dest[0], dstW, dstY);
2559 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2560 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2563 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2564 __asm__ volatile("sfence":::"memory");
2568 /* store changed local vars back in the context */
2570 c->lumBufIndex= lumBufIndex;
2571 c->chrBufIndex= chrBufIndex;
2572 c->lastInLumBuf= lastInLumBuf;
2573 c->lastInChrBuf= lastInChrBuf;
2575 return dstY - lastDstY;
2578 static av_cold void sws_init_swScale_c(SwsContext *c)
2580 enum PixelFormat srcFormat = c->srcFormat;
2582 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2583 &c->yuv2packed1, &c->yuv2packed2,
2586 c->chrToYV12 = NULL;
2588 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2589 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2590 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2591 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2595 case PIX_FMT_BGR4_BYTE:
2596 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2598 case PIX_FMT_YUV444P9LE:
2599 case PIX_FMT_YUV420P9LE:
2600 case PIX_FMT_YUV422P10LE:
2601 case PIX_FMT_YUV444P10LE:
2602 case PIX_FMT_YUV420P10LE:
2603 case PIX_FMT_YUV420P16LE:
2604 case PIX_FMT_YUV422P16LE:
2605 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2607 case PIX_FMT_YUV444P9BE:
2608 case PIX_FMT_YUV420P9BE:
2609 case PIX_FMT_YUV444P10BE:
2610 case PIX_FMT_YUV422P10BE:
2611 case PIX_FMT_YUV420P10BE:
2612 case PIX_FMT_YUV420P16BE:
2613 case PIX_FMT_YUV422P16BE:
2614 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2617 if (c->chrSrcHSubSample) {
2619 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2620 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2621 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2622 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2623 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2624 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2625 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2626 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2627 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2628 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2629 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2630 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2631 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2632 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2633 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2634 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2635 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2636 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2640 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2641 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2642 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2643 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2644 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2645 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2646 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2647 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2648 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2649 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2650 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2651 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2652 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2653 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2654 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2655 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2656 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2657 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2661 c->lumToYV12 = NULL;
2662 c->alpToYV12 = NULL;
2663 switch (srcFormat) {
2665 case PIX_FMT_YUV444P9LE:
2666 case PIX_FMT_YUV420P9LE:
2667 case PIX_FMT_YUV444P10LE:
2668 case PIX_FMT_YUV422P10LE:
2669 case PIX_FMT_YUV420P10LE:
2670 case PIX_FMT_YUV420P16LE:
2671 case PIX_FMT_YUV422P16LE:
2672 case PIX_FMT_YUV444P16LE:
2673 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2675 case PIX_FMT_YUV444P9BE:
2676 case PIX_FMT_YUV420P9BE:
2677 case PIX_FMT_YUV444P10BE:
2678 case PIX_FMT_YUV422P10BE:
2679 case PIX_FMT_YUV420P10BE:
2680 case PIX_FMT_YUV420P16BE:
2681 case PIX_FMT_YUV422P16BE:
2682 case PIX_FMT_YUV444P16BE:
2683 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2685 case PIX_FMT_YUYV422 :
2686 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2687 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2688 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2689 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2690 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2691 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2692 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2693 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2694 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2695 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2696 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2697 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2701 case PIX_FMT_BGR4_BYTE:
2702 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2703 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2704 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2705 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2706 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2707 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2708 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2709 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2710 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2711 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2712 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2715 switch (srcFormat) {
2717 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2719 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2720 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2724 if (c->scalingBpp == 8) {
2725 c->hScale = hScale_c;
2726 if (c->flags & SWS_FAST_BILINEAR) {
2727 c->hyscale_fast = hyscale_fast_c;
2728 c->hcscale_fast = hcscale_fast_c;
2731 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2733 c->lumConvertRange = lumRangeFromJpeg_c;
2734 c->chrConvertRange = chrRangeFromJpeg_c;
2736 c->lumConvertRange = lumRangeToJpeg_c;
2737 c->chrConvertRange = chrRangeToJpeg_c;
2741 c->hScale = hScale16_c;
2742 c->scale19To15Fw = scale19To15Fw_c;
2743 c->scale8To16Rv = scale8To16Rv_c;
2745 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2747 c->lumConvertRange = lumRangeFromJpeg16_c;
2748 c->chrConvertRange = chrRangeFromJpeg16_c;
2750 c->lumConvertRange = lumRangeToJpeg16_c;
2751 c->chrConvertRange = chrRangeToJpeg16_c;
2756 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2757 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2758 c->needs_hcscale = 1;
2761 SwsFunc ff_getSwsFunc(SwsContext *c)
2763 sws_init_swScale_c(c);
2766 ff_sws_init_swScale_mmx(c);
2768 ff_sws_init_swScale_altivec(c);