2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int32_t **chrUSrc, const int32_t **chrVSrc,
201 int chrFilterSize, const int32_t **alpSrc,
202 uint16_t *dest[4], int dstW, int chrDstW,
203 int big_endian, int output_bits)
205 //FIXME Optimize (just quickly written not optimized..)
207 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
208 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
209 int shift = 15 + 16 - output_bits;
211 #define output_pixel(pos, val) \
213 if (output_bits == 16) { \
214 AV_WB16(pos, av_clip_uint16(val >> shift)); \
216 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 if (output_bits == 16) { \
220 AV_WL16(pos, av_clip_uint16(val >> shift)); \
222 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
225 for (i = 0; i < dstW; i++) {
226 int val = 1 << (30-output_bits);
229 for (j = 0; j < lumFilterSize; j++)
230 val += lumSrc[j][i] * lumFilter[j];
232 output_pixel(&yDest[i], val);
236 for (i = 0; i < chrDstW; i++) {
237 int u = 1 << (30-output_bits);
238 int v = 1 << (30-output_bits);
241 for (j = 0; j < chrFilterSize; j++) {
242 u += chrUSrc[j][i] * chrFilter[j];
243 v += chrVSrc[j][i] * chrFilter[j];
246 output_pixel(&uDest[i], u);
247 output_pixel(&vDest[i], v);
251 if (CONFIG_SWSCALE_ALPHA && aDest) {
252 for (i = 0; i < dstW; i++) {
253 int val = 1 << (30-output_bits);
256 for (j = 0; j < lumFilterSize; j++)
257 val += alpSrc[j][i] * lumFilter[j];
259 output_pixel(&aDest[i], val);
265 #define yuv2NBPS(bits, BE_LE, is_be) \
266 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
267 const int16_t **_lumSrc, int lumFilterSize, \
268 const int16_t *chrFilter, const int16_t **_chrUSrc, \
269 const int16_t **_chrVSrc, \
270 int chrFilterSize, const int16_t **_alpSrc, \
271 uint8_t *_dest[4], int dstW, int chrDstW) \
273 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
274 **chrUSrc = (const int32_t **) _chrUSrc, \
275 **chrVSrc = (const int32_t **) _chrVSrc, \
276 **alpSrc = (const int32_t **) _alpSrc; \
277 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
278 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
279 alpSrc, (uint16_t **) _dest, \
280 dstW, chrDstW, is_be, bits); \
289 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
290 const int16_t **lumSrc, int lumFilterSize,
291 const int16_t *chrFilter, const int16_t **chrUSrc,
292 const int16_t **chrVSrc,
293 int chrFilterSize, const int16_t **alpSrc,
294 uint8_t *dest[4], int dstW, int chrDstW)
296 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
297 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
300 //FIXME Optimize (just quickly written not optimized..)
301 for (i=0; i<dstW; i++) {
304 for (j=0; j<lumFilterSize; j++)
305 val += lumSrc[j][i] * lumFilter[j];
307 yDest[i]= av_clip_uint8(val>>19);
311 for (i=0; i<chrDstW; i++) {
315 for (j=0; j<chrFilterSize; j++) {
316 u += chrUSrc[j][i] * chrFilter[j];
317 v += chrVSrc[j][i] * chrFilter[j];
320 uDest[i]= av_clip_uint8(u>>19);
321 vDest[i]= av_clip_uint8(v>>19);
324 if (CONFIG_SWSCALE_ALPHA && aDest)
325 for (i=0; i<dstW; i++) {
328 for (j=0; j<lumFilterSize; j++)
329 val += alpSrc[j][i] * lumFilter[j];
331 aDest[i]= av_clip_uint8(val>>19);
335 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
336 const int16_t *chrUSrc, const int16_t *chrVSrc,
337 const int16_t *alpSrc,
338 uint8_t *dest[4], int dstW, int chrDstW)
340 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
341 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
344 for (i=0; i<dstW; i++) {
345 int val= (lumSrc[i]+64)>>7;
346 yDest[i]= av_clip_uint8(val);
350 for (i=0; i<chrDstW; i++) {
351 int u=(chrUSrc[i]+64)>>7;
352 int v=(chrVSrc[i]+64)>>7;
353 uDest[i]= av_clip_uint8(u);
354 vDest[i]= av_clip_uint8(v);
357 if (CONFIG_SWSCALE_ALPHA && aDest)
358 for (i=0; i<dstW; i++) {
359 int val= (alpSrc[i]+64)>>7;
360 aDest[i]= av_clip_uint8(val);
364 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
365 const int16_t **lumSrc, int lumFilterSize,
366 const int16_t *chrFilter, const int16_t **chrUSrc,
367 const int16_t **chrVSrc, int chrFilterSize,
368 const int16_t **alpSrc, uint8_t *dest[4],
369 int dstW, int chrDstW)
371 uint8_t *yDest = dest[0], *uDest = dest[1];
372 enum PixelFormat dstFormat = c->dstFormat;
374 //FIXME Optimize (just quickly written not optimized..)
376 for (i=0; i<dstW; i++) {
379 for (j=0; j<lumFilterSize; j++)
380 val += lumSrc[j][i] * lumFilter[j];
382 yDest[i]= av_clip_uint8(val>>19);
388 if (dstFormat == PIX_FMT_NV12)
389 for (i=0; i<chrDstW; i++) {
393 for (j=0; j<chrFilterSize; j++) {
394 u += chrUSrc[j][i] * chrFilter[j];
395 v += chrVSrc[j][i] * chrFilter[j];
398 uDest[2*i]= av_clip_uint8(u>>19);
399 uDest[2*i+1]= av_clip_uint8(v>>19);
402 for (i=0; i<chrDstW; i++) {
406 for (j=0; j<chrFilterSize; j++) {
407 u += chrUSrc[j][i] * chrFilter[j];
408 v += chrVSrc[j][i] * chrFilter[j];
411 uDest[2*i]= av_clip_uint8(v>>19);
412 uDest[2*i+1]= av_clip_uint8(u>>19);
416 #define output_pixel(pos, val) \
417 if (target == PIX_FMT_GRAY16BE) { \
423 static av_always_inline void
424 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
425 const int32_t **lumSrc, int lumFilterSize,
426 const int16_t *chrFilter, const int32_t **chrUSrc,
427 const int32_t **chrVSrc, int chrFilterSize,
428 const int32_t **alpSrc, uint16_t *dest, int dstW,
429 int y, enum PixelFormat target)
433 for (i = 0; i < (dstW >> 1); i++) {
438 for (j = 0; j < lumFilterSize; j++) {
439 Y1 += lumSrc[j][i * 2] * lumFilter[j];
440 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
444 if ((Y1 | Y2) & 0x10000) {
445 Y1 = av_clip_uint16(Y1);
446 Y2 = av_clip_uint16(Y2);
448 output_pixel(&dest[i * 2 + 0], Y1);
449 output_pixel(&dest[i * 2 + 1], Y2);
453 static av_always_inline void
454 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
455 const int32_t *ubuf[2], const int32_t *vbuf[2],
456 const int32_t *abuf[2], uint16_t *dest, int dstW,
457 int yalpha, int uvalpha, int y,
458 enum PixelFormat target)
460 int yalpha1 = 4095 - yalpha;
462 const int32_t *buf0 = buf[0], *buf1 = buf[1];
464 for (i = 0; i < (dstW >> 1); i++) {
465 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
466 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
468 output_pixel(&dest[i * 2 + 0], Y1);
469 output_pixel(&dest[i * 2 + 1], Y2);
473 static av_always_inline void
474 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
475 const int32_t *ubuf[2], const int32_t *vbuf[2],
476 const int32_t *abuf0, uint16_t *dest, int dstW,
477 int uvalpha, int y, enum PixelFormat target)
481 for (i = 0; i < (dstW >> 1); i++) {
482 int Y1 = buf0[i * 2 ] << 1;
483 int Y2 = buf0[i * 2 + 1] << 1;
485 output_pixel(&dest[i * 2 + 0], Y1);
486 output_pixel(&dest[i * 2 + 1], Y2);
492 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
493 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
494 const int16_t **_lumSrc, int lumFilterSize, \
495 const int16_t *chrFilter, const int16_t **_chrUSrc, \
496 const int16_t **_chrVSrc, int chrFilterSize, \
497 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
500 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
501 **chrUSrc = (const int32_t **) _chrUSrc, \
502 **chrVSrc = (const int32_t **) _chrVSrc, \
503 **alpSrc = (const int32_t **) _alpSrc; \
504 uint16_t *dest = (uint16_t *) _dest; \
505 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
506 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
507 alpSrc, dest, dstW, y, fmt); \
510 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
511 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
512 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
513 int yalpha, int uvalpha, int y) \
515 const int32_t **buf = (const int32_t **) _buf, \
516 **ubuf = (const int32_t **) _ubuf, \
517 **vbuf = (const int32_t **) _vbuf, \
518 **abuf = (const int32_t **) _abuf; \
519 uint16_t *dest = (uint16_t *) _dest; \
520 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
521 dest, dstW, yalpha, uvalpha, y, fmt); \
524 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
525 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
526 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
527 int uvalpha, int y) \
529 const int32_t *buf0 = (const int32_t *) _buf0, \
530 **ubuf = (const int32_t **) _ubuf, \
531 **vbuf = (const int32_t **) _vbuf, \
532 *abuf0 = (const int32_t *) _abuf0; \
533 uint16_t *dest = (uint16_t *) _dest; \
534 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
535 dstW, uvalpha, y, fmt); \
538 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
539 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
541 #define output_pixel(pos, acc) \
542 if (target == PIX_FMT_MONOBLACK) { \
548 static av_always_inline void
549 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
550 const int16_t **lumSrc, int lumFilterSize,
551 const int16_t *chrFilter, const int16_t **chrUSrc,
552 const int16_t **chrVSrc, int chrFilterSize,
553 const int16_t **alpSrc, uint8_t *dest, int dstW,
554 int y, enum PixelFormat target)
556 const uint8_t * const d128=dither_8x8_220[y&7];
557 uint8_t *g = c->table_gU[128] + c->table_gV[128];
561 for (i = 0; i < dstW - 1; i += 2) {
566 for (j = 0; j < lumFilterSize; j++) {
567 Y1 += lumSrc[j][i] * lumFilter[j];
568 Y2 += lumSrc[j][i+1] * lumFilter[j];
572 if ((Y1 | Y2) & 0x100) {
573 Y1 = av_clip_uint8(Y1);
574 Y2 = av_clip_uint8(Y2);
576 acc += acc + g[Y1 + d128[(i + 0) & 7]];
577 acc += acc + g[Y2 + d128[(i + 1) & 7]];
579 output_pixel(*dest++, acc);
584 static av_always_inline void
585 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
586 const int16_t *ubuf[2], const int16_t *vbuf[2],
587 const int16_t *abuf[2], uint8_t *dest, int dstW,
588 int yalpha, int uvalpha, int y,
589 enum PixelFormat target)
591 const int16_t *buf0 = buf[0], *buf1 = buf[1];
592 const uint8_t * const d128 = dither_8x8_220[y & 7];
593 uint8_t *g = c->table_gU[128] + c->table_gV[128];
594 int yalpha1 = 4095 - yalpha;
597 for (i = 0; i < dstW - 7; i += 8) {
598 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
599 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
600 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
601 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
602 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
603 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
604 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
605 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
606 output_pixel(*dest++, acc);
610 static av_always_inline void
611 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
612 const int16_t *ubuf[2], const int16_t *vbuf[2],
613 const int16_t *abuf0, uint8_t *dest, int dstW,
614 int uvalpha, int y, enum PixelFormat target)
616 const uint8_t * const d128 = dither_8x8_220[y & 7];
617 uint8_t *g = c->table_gU[128] + c->table_gV[128];
620 for (i = 0; i < dstW - 7; i += 8) {
621 int acc = g[(buf0[i ] >> 7) + d128[0]];
622 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
623 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
624 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
625 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
626 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
627 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
628 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
629 output_pixel(*dest++, acc);
635 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
636 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
637 const int16_t **lumSrc, int lumFilterSize, \
638 const int16_t *chrFilter, const int16_t **chrUSrc, \
639 const int16_t **chrVSrc, int chrFilterSize, \
640 const int16_t **alpSrc, uint8_t *dest, int dstW, \
643 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
644 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
645 alpSrc, dest, dstW, y, fmt); \
648 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
649 const int16_t *ubuf[2], const int16_t *vbuf[2], \
650 const int16_t *abuf[2], uint8_t *dest, int dstW, \
651 int yalpha, int uvalpha, int y) \
653 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
654 dest, dstW, yalpha, uvalpha, y, fmt); \
657 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
658 const int16_t *ubuf[2], const int16_t *vbuf[2], \
659 const int16_t *abuf0, uint8_t *dest, int dstW, \
660 int uvalpha, int y) \
662 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
663 abuf0, dest, dstW, uvalpha, \
667 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
668 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
670 #define output_pixels(pos, Y1, U, Y2, V) \
671 if (target == PIX_FMT_YUYV422) { \
672 dest[pos + 0] = Y1; \
674 dest[pos + 2] = Y2; \
678 dest[pos + 1] = Y1; \
680 dest[pos + 3] = Y2; \
683 static av_always_inline void
684 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
685 const int16_t **lumSrc, int lumFilterSize,
686 const int16_t *chrFilter, const int16_t **chrUSrc,
687 const int16_t **chrVSrc, int chrFilterSize,
688 const int16_t **alpSrc, uint8_t *dest, int dstW,
689 int y, enum PixelFormat target)
693 for (i = 0; i < (dstW >> 1); i++) {
700 for (j = 0; j < lumFilterSize; j++) {
701 Y1 += lumSrc[j][i * 2] * lumFilter[j];
702 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
704 for (j = 0; j < chrFilterSize; j++) {
705 U += chrUSrc[j][i] * chrFilter[j];
706 V += chrVSrc[j][i] * chrFilter[j];
712 if ((Y1 | Y2 | U | V) & 0x100) {
713 Y1 = av_clip_uint8(Y1);
714 Y2 = av_clip_uint8(Y2);
715 U = av_clip_uint8(U);
716 V = av_clip_uint8(V);
718 output_pixels(4*i, Y1, U, Y2, V);
722 static av_always_inline void
723 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
724 const int16_t *ubuf[2], const int16_t *vbuf[2],
725 const int16_t *abuf[2], uint8_t *dest, int dstW,
726 int yalpha, int uvalpha, int y,
727 enum PixelFormat target)
729 const int16_t *buf0 = buf[0], *buf1 = buf[1],
730 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
731 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
732 int yalpha1 = 4095 - yalpha;
733 int uvalpha1 = 4095 - uvalpha;
736 for (i = 0; i < (dstW >> 1); i++) {
737 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
738 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
739 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
740 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
742 output_pixels(i * 4, Y1, U, Y2, V);
746 static av_always_inline void
747 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
748 const int16_t *ubuf[2], const int16_t *vbuf[2],
749 const int16_t *abuf0, uint8_t *dest, int dstW,
750 int uvalpha, int y, enum PixelFormat target)
752 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
753 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
756 if (uvalpha < 2048) {
757 for (i = 0; i < (dstW >> 1); i++) {
758 int Y1 = buf0[i * 2] >> 7;
759 int Y2 = buf0[i * 2 + 1] >> 7;
760 int U = ubuf1[i] >> 7;
761 int V = vbuf1[i] >> 7;
763 output_pixels(i * 4, Y1, U, Y2, V);
766 for (i = 0; i < (dstW >> 1); i++) {
767 int Y1 = buf0[i * 2] >> 7;
768 int Y2 = buf0[i * 2 + 1] >> 7;
769 int U = (ubuf0[i] + ubuf1[i]) >> 8;
770 int V = (vbuf0[i] + vbuf1[i]) >> 8;
772 output_pixels(i * 4, Y1, U, Y2, V);
779 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
780 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
782 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
783 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
784 #define output_pixel(pos, val) \
785 if (isBE(target)) { \
791 static av_always_inline void
792 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
793 const int32_t **lumSrc, int lumFilterSize,
794 const int16_t *chrFilter, const int32_t **chrUSrc,
795 const int32_t **chrVSrc, int chrFilterSize,
796 const int32_t **alpSrc, uint16_t *dest, int dstW,
797 int y, enum PixelFormat target)
801 for (i = 0; i < (dstW >> 1); i++) {
805 int U = -128 << 23; // 19
809 for (j = 0; j < lumFilterSize; j++) {
810 Y1 += lumSrc[j][i * 2] * lumFilter[j];
811 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
813 for (j = 0; j < chrFilterSize; j++) {
814 U += chrUSrc[j][i] * chrFilter[j];
815 V += chrVSrc[j][i] * chrFilter[j];
818 // 8bit: 12+15=27; 16-bit: 12+19=31
824 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
825 Y1 -= c->yuv2rgb_y_offset;
826 Y2 -= c->yuv2rgb_y_offset;
827 Y1 *= c->yuv2rgb_y_coeff;
828 Y2 *= c->yuv2rgb_y_coeff;
831 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
833 R = V * c->yuv2rgb_v2r_coeff;
834 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
835 B = U * c->yuv2rgb_u2b_coeff;
837 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
838 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
839 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
840 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
841 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
842 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
843 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
848 static av_always_inline void
849 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
850 const int32_t *ubuf[2], const int32_t *vbuf[2],
851 const int32_t *abuf[2], uint16_t *dest, int dstW,
852 int yalpha, int uvalpha, int y,
853 enum PixelFormat target)
855 const int32_t *buf0 = buf[0], *buf1 = buf[1],
856 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
857 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
858 int yalpha1 = 4095 - yalpha;
859 int uvalpha1 = 4095 - uvalpha;
862 for (i = 0; i < (dstW >> 1); i++) {
863 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
864 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
865 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
866 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
869 Y1 -= c->yuv2rgb_y_offset;
870 Y2 -= c->yuv2rgb_y_offset;
871 Y1 *= c->yuv2rgb_y_coeff;
872 Y2 *= c->yuv2rgb_y_coeff;
876 R = V * c->yuv2rgb_v2r_coeff;
877 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
878 B = U * c->yuv2rgb_u2b_coeff;
880 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
881 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
882 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
883 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
884 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
885 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
890 static av_always_inline void
891 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
892 const int32_t *ubuf[2], const int32_t *vbuf[2],
893 const int32_t *abuf0, uint16_t *dest, int dstW,
894 int uvalpha, int y, enum PixelFormat target)
896 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
897 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
900 if (uvalpha < 2048) {
901 for (i = 0; i < (dstW >> 1); i++) {
902 int Y1 = (buf0[i * 2] ) >> 2;
903 int Y2 = (buf0[i * 2 + 1]) >> 2;
904 int U = (ubuf0[i] + (-128 << 11)) >> 2;
905 int V = (vbuf0[i] + (-128 << 11)) >> 2;
908 Y1 -= c->yuv2rgb_y_offset;
909 Y2 -= c->yuv2rgb_y_offset;
910 Y1 *= c->yuv2rgb_y_coeff;
911 Y2 *= c->yuv2rgb_y_coeff;
915 R = V * c->yuv2rgb_v2r_coeff;
916 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
917 B = U * c->yuv2rgb_u2b_coeff;
919 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
920 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
921 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
922 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
923 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
924 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
928 for (i = 0; i < (dstW >> 1); i++) {
929 int Y1 = (buf0[i * 2] ) >> 2;
930 int Y2 = (buf0[i * 2 + 1]) >> 2;
931 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
932 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
935 Y1 -= c->yuv2rgb_y_offset;
936 Y2 -= c->yuv2rgb_y_offset;
937 Y1 *= c->yuv2rgb_y_coeff;
938 Y2 *= c->yuv2rgb_y_coeff;
942 R = V * c->yuv2rgb_v2r_coeff;
943 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
944 B = U * c->yuv2rgb_u2b_coeff;
946 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
947 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
948 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
949 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
950 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
951 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
961 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
962 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
963 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
964 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
966 static av_always_inline void
967 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
968 int U, int V, int A1, int A2,
969 const void *_r, const void *_g, const void *_b, int y,
970 enum PixelFormat target, int hasAlpha)
972 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
973 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
974 uint32_t *dest = (uint32_t *) _dest;
975 const uint32_t *r = (const uint32_t *) _r;
976 const uint32_t *g = (const uint32_t *) _g;
977 const uint32_t *b = (const uint32_t *) _b;
980 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
982 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
983 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
986 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
988 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
989 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
991 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
992 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
995 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
996 uint8_t *dest = (uint8_t *) _dest;
997 const uint8_t *r = (const uint8_t *) _r;
998 const uint8_t *g = (const uint8_t *) _g;
999 const uint8_t *b = (const uint8_t *) _b;
1001 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1002 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1003 dest[i * 6 + 0] = r_b[Y1];
1004 dest[i * 6 + 1] = g[Y1];
1005 dest[i * 6 + 2] = b_r[Y1];
1006 dest[i * 6 + 3] = r_b[Y2];
1007 dest[i * 6 + 4] = g[Y2];
1008 dest[i * 6 + 5] = b_r[Y2];
1011 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1012 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1013 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1014 uint16_t *dest = (uint16_t *) _dest;
1015 const uint16_t *r = (const uint16_t *) _r;
1016 const uint16_t *g = (const uint16_t *) _g;
1017 const uint16_t *b = (const uint16_t *) _b;
1018 int dr1, dg1, db1, dr2, dg2, db2;
1020 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1021 dr1 = dither_2x2_8[ y & 1 ][0];
1022 dg1 = dither_2x2_4[ y & 1 ][0];
1023 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1024 dr2 = dither_2x2_8[ y & 1 ][1];
1025 dg2 = dither_2x2_4[ y & 1 ][1];
1026 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1027 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1028 dr1 = dither_2x2_8[ y & 1 ][0];
1029 dg1 = dither_2x2_8[ y & 1 ][1];
1030 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1031 dr2 = dither_2x2_8[ y & 1 ][1];
1032 dg2 = dither_2x2_8[ y & 1 ][0];
1033 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1035 dr1 = dither_4x4_16[ y & 3 ][0];
1036 dg1 = dither_4x4_16[ y & 3 ][1];
1037 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1038 dr2 = dither_4x4_16[ y & 3 ][1];
1039 dg2 = dither_4x4_16[ y & 3 ][0];
1040 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1043 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1044 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1045 } else /* 8/4-bit */ {
1046 uint8_t *dest = (uint8_t *) _dest;
1047 const uint8_t *r = (const uint8_t *) _r;
1048 const uint8_t *g = (const uint8_t *) _g;
1049 const uint8_t *b = (const uint8_t *) _b;
1050 int dr1, dg1, db1, dr2, dg2, db2;
1052 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1053 const uint8_t * const d64 = dither_8x8_73[y & 7];
1054 const uint8_t * const d32 = dither_8x8_32[y & 7];
1055 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1056 db1 = d64[(i * 2 + 0) & 7];
1057 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1058 db2 = d64[(i * 2 + 1) & 7];
1060 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1061 const uint8_t * const d128 = dither_8x8_220[y & 7];
1062 dr1 = db1 = d128[(i * 2 + 0) & 7];
1063 dg1 = d64[(i * 2 + 0) & 7];
1064 dr2 = db2 = d128[(i * 2 + 1) & 7];
1065 dg2 = d64[(i * 2 + 1) & 7];
1068 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1069 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1070 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1072 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1073 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1078 static av_always_inline void
1079 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1080 const int16_t **lumSrc, int lumFilterSize,
1081 const int16_t *chrFilter, const int16_t **chrUSrc,
1082 const int16_t **chrVSrc, int chrFilterSize,
1083 const int16_t **alpSrc, uint8_t *dest, int dstW,
1084 int y, enum PixelFormat target, int hasAlpha)
1088 for (i = 0; i < (dstW >> 1); i++) {
1094 int av_unused A1, A2;
1095 const void *r, *g, *b;
1097 for (j = 0; j < lumFilterSize; j++) {
1098 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1099 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1101 for (j = 0; j < chrFilterSize; j++) {
1102 U += chrUSrc[j][i] * chrFilter[j];
1103 V += chrVSrc[j][i] * chrFilter[j];
1109 if ((Y1 | Y2 | U | V) & 0x100) {
1110 Y1 = av_clip_uint8(Y1);
1111 Y2 = av_clip_uint8(Y2);
1112 U = av_clip_uint8(U);
1113 V = av_clip_uint8(V);
1118 for (j = 0; j < lumFilterSize; j++) {
1119 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1120 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1124 if ((A1 | A2) & 0x100) {
1125 A1 = av_clip_uint8(A1);
1126 A2 = av_clip_uint8(A2);
1130 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1132 g = (c->table_gU[U] + c->table_gV[V]);
1135 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1136 r, g, b, y, target, hasAlpha);
1140 static av_always_inline void
1141 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1142 const int16_t *ubuf[2], const int16_t *vbuf[2],
1143 const int16_t *abuf[2], uint8_t *dest, int dstW,
1144 int yalpha, int uvalpha, int y,
1145 enum PixelFormat target, int hasAlpha)
1147 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1148 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1149 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1150 *abuf0 = abuf[0], *abuf1 = abuf[1];
1151 int yalpha1 = 4095 - yalpha;
1152 int uvalpha1 = 4095 - uvalpha;
1155 for (i = 0; i < (dstW >> 1); i++) {
1156 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1157 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1158 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1159 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1161 const void *r = c->table_rV[V],
1162 *g = (c->table_gU[U] + c->table_gV[V]),
1163 *b = c->table_bU[U];
1166 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1167 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1170 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1171 r, g, b, y, target, hasAlpha);
1175 static av_always_inline void
1176 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1177 const int16_t *ubuf[2], const int16_t *vbuf[2],
1178 const int16_t *abuf0, uint8_t *dest, int dstW,
1179 int uvalpha, int y, enum PixelFormat target,
1182 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1183 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1186 if (uvalpha < 2048) {
1187 for (i = 0; i < (dstW >> 1); i++) {
1188 int Y1 = buf0[i * 2] >> 7;
1189 int Y2 = buf0[i * 2 + 1] >> 7;
1190 int U = ubuf1[i] >> 7;
1191 int V = vbuf1[i] >> 7;
1193 const void *r = c->table_rV[V],
1194 *g = (c->table_gU[U] + c->table_gV[V]),
1195 *b = c->table_bU[U];
1198 A1 = abuf0[i * 2 ] >> 7;
1199 A2 = abuf0[i * 2 + 1] >> 7;
1202 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1203 r, g, b, y, target, hasAlpha);
1206 for (i = 0; i < (dstW >> 1); i++) {
1207 int Y1 = buf0[i * 2] >> 7;
1208 int Y2 = buf0[i * 2 + 1] >> 7;
1209 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1210 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1212 const void *r = c->table_rV[V],
1213 *g = (c->table_gU[U] + c->table_gV[V]),
1214 *b = c->table_bU[U];
1217 A1 = abuf0[i * 2 ] >> 7;
1218 A2 = abuf0[i * 2 + 1] >> 7;
1221 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1222 r, g, b, y, target, hasAlpha);
1227 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1228 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1229 const int16_t **lumSrc, int lumFilterSize, \
1230 const int16_t *chrFilter, const int16_t **chrUSrc, \
1231 const int16_t **chrVSrc, int chrFilterSize, \
1232 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1235 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1236 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1237 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1239 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1240 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1241 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1242 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1243 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1244 int yalpha, int uvalpha, int y) \
1246 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1247 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1250 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1251 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1252 const int16_t *abuf0, uint8_t *dest, int dstW, \
1253 int uvalpha, int y) \
1255 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1256 dstW, uvalpha, y, fmt, hasAlpha); \
1260 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1261 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1263 #if CONFIG_SWSCALE_ALPHA
1264 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1265 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1267 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1268 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1270 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1271 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1272 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1273 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1274 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1275 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1276 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1277 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1279 static av_always_inline void
1280 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1281 const int16_t **lumSrc, int lumFilterSize,
1282 const int16_t *chrFilter, const int16_t **chrUSrc,
1283 const int16_t **chrVSrc, int chrFilterSize,
1284 const int16_t **alpSrc, uint8_t *dest,
1285 int dstW, int y, enum PixelFormat target, int hasAlpha)
1288 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1290 for (i = 0; i < dstW; i++) {
1298 for (j = 0; j < lumFilterSize; j++) {
1299 Y += lumSrc[j][i] * lumFilter[j];
1301 for (j = 0; j < chrFilterSize; j++) {
1302 U += chrUSrc[j][i] * chrFilter[j];
1303 V += chrVSrc[j][i] * chrFilter[j];
1310 for (j = 0; j < lumFilterSize; j++) {
1311 A += alpSrc[j][i] * lumFilter[j];
1315 A = av_clip_uint8(A);
1317 Y -= c->yuv2rgb_y_offset;
1318 Y *= c->yuv2rgb_y_coeff;
1320 R = Y + V*c->yuv2rgb_v2r_coeff;
1321 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1322 B = Y + U*c->yuv2rgb_u2b_coeff;
1323 if ((R | G | B) & 0xC0000000) {
1324 R = av_clip_uintp2(R, 30);
1325 G = av_clip_uintp2(G, 30);
1326 B = av_clip_uintp2(B, 30);
1331 dest[0] = hasAlpha ? A : 255;
1345 dest[3] = hasAlpha ? A : 255;
1348 dest[0] = hasAlpha ? A : 255;
1363 dest[3] = hasAlpha ? A : 255;
1371 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1372 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1373 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1374 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1376 #if CONFIG_SWSCALE_ALPHA
1377 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1378 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1379 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1380 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1382 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1383 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1384 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1385 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1387 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1388 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1390 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1391 int width, int height,
1395 uint8_t *ptr = plane + stride*y;
1396 for (i=0; i<height; i++) {
1397 memset(ptr, val, width);
1402 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1404 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1405 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1407 static av_always_inline void
1408 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1409 enum PixelFormat origin)
1412 for (i = 0; i < width; i++) {
1413 unsigned int r_b = input_pixel(&src[i*3+0]);
1414 unsigned int g = input_pixel(&src[i*3+1]);
1415 unsigned int b_r = input_pixel(&src[i*3+2]);
1417 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1421 static av_always_inline void
1422 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1423 const uint16_t *src1, const uint16_t *src2,
1424 int width, enum PixelFormat origin)
1428 for (i = 0; i < width; i++) {
1429 int r_b = input_pixel(&src1[i*3+0]);
1430 int g = input_pixel(&src1[i*3+1]);
1431 int b_r = input_pixel(&src1[i*3+2]);
1433 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1434 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1438 static av_always_inline void
1439 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1440 const uint16_t *src1, const uint16_t *src2,
1441 int width, enum PixelFormat origin)
1445 for (i = 0; i < width; i++) {
1446 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1447 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1448 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1450 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1451 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1459 #define rgb48funcs(pattern, BE_LE, origin) \
1460 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1461 int width, uint32_t *unused) \
1463 const uint16_t *src = (const uint16_t *) _src; \
1464 uint16_t *dst = (uint16_t *) _dst; \
1465 rgb48ToY_c_template(dst, src, width, origin); \
1468 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1469 const uint8_t *_src1, const uint8_t *_src2, \
1470 int width, uint32_t *unused) \
1472 const uint16_t *src1 = (const uint16_t *) _src1, \
1473 *src2 = (const uint16_t *) _src2; \
1474 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1475 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1478 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1479 const uint8_t *_src1, const uint8_t *_src2, \
1480 int width, uint32_t *unused) \
1482 const uint16_t *src1 = (const uint16_t *) _src1, \
1483 *src2 = (const uint16_t *) _src2; \
1484 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1485 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1488 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1489 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1490 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1491 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1493 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1494 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1495 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1497 static av_always_inline void
1498 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1499 int width, enum PixelFormat origin,
1500 int shr, int shg, int shb, int shp,
1501 int maskr, int maskg, int maskb,
1502 int rsh, int gsh, int bsh, int S)
1504 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1505 rnd = 33 << (S - 1);
1508 for (i = 0; i < width; i++) {
1509 int px = input_pixel(i) >> shp;
1510 int b = (px & maskb) >> shb;
1511 int g = (px & maskg) >> shg;
1512 int r = (px & maskr) >> shr;
1514 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1518 static av_always_inline void
1519 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1520 const uint8_t *src, int width,
1521 enum PixelFormat origin,
1522 int shr, int shg, int shb, int shp,
1523 int maskr, int maskg, int maskb,
1524 int rsh, int gsh, int bsh, int S)
1526 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1527 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1528 rnd = 257 << (S - 1);
1531 for (i = 0; i < width; i++) {
1532 int px = input_pixel(i) >> shp;
1533 int b = (px & maskb) >> shb;
1534 int g = (px & maskg) >> shg;
1535 int r = (px & maskr) >> shr;
1537 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1538 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1542 static av_always_inline void
1543 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1544 const uint8_t *src, int width,
1545 enum PixelFormat origin,
1546 int shr, int shg, int shb, int shp,
1547 int maskr, int maskg, int maskb,
1548 int rsh, int gsh, int bsh, int S)
1550 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1551 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1552 rnd = 257 << S, maskgx = ~(maskr | maskb);
1555 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1556 for (i = 0; i < width; i++) {
1557 int px0 = input_pixel(2 * i + 0) >> shp;
1558 int px1 = input_pixel(2 * i + 1) >> shp;
1559 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1560 int rb = px0 + px1 - g;
1562 b = (rb & maskb) >> shb;
1563 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1564 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1567 g = (g & maskg) >> shg;
1569 r = (rb & maskr) >> shr;
1571 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1572 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1578 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1579 maskg, maskb, rsh, gsh, bsh, S) \
1580 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1581 int width, uint32_t *unused) \
1583 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1584 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1587 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1588 const uint8_t *src, const uint8_t *dummy, \
1589 int width, uint32_t *unused) \
1591 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1592 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1595 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1596 const uint8_t *src, const uint8_t *dummy, \
1597 int width, uint32_t *unused) \
1599 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1600 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1603 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1604 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1605 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1606 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1607 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1608 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1609 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1610 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1611 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1612 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1613 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1614 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1616 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1619 for (i=0; i<width; i++) {
1624 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1627 for (i=0; i<width; i++) {
1632 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1635 for (i=0; i<width; i++) {
1638 dst[i]= pal[d] & 0xFF;
1642 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1643 const uint8_t *src1, const uint8_t *src2,
1644 int width, uint32_t *pal)
1647 assert(src1 == src2);
1648 for (i=0; i<width; i++) {
1649 int p= pal[src1[i]];
1656 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1657 int width, uint32_t *unused)
1660 for (i=0; i<width/8; i++) {
1663 dst[8*i+j]= ((d>>(7-j))&1)*255;
1667 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1668 int width, uint32_t *unused)
1671 for (i=0; i<width/8; i++) {
1674 dst[8*i+j]= ((d>>(7-j))&1)*255;
1678 //FIXME yuy2* can read up to 7 samples too much
1680 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1684 for (i=0; i<width; i++)
1688 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1689 const uint8_t *src2, int width, uint32_t *unused)
1692 for (i=0; i<width; i++) {
1693 dstU[i]= src1[4*i + 1];
1694 dstV[i]= src1[4*i + 3];
1696 assert(src1 == src2);
1699 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1702 const uint16_t *src = (const uint16_t *) _src;
1703 uint16_t *dst = (uint16_t *) _dst;
1704 for (i=0; i<width; i++) {
1705 dst[i] = av_bswap16(src[i]);
1709 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1710 const uint8_t *_src2, int width, uint32_t *unused)
1713 const uint16_t *src1 = (const uint16_t *) _src1,
1714 *src2 = (const uint16_t *) _src2;
1715 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1716 for (i=0; i<width; i++) {
1717 dstU[i] = av_bswap16(src1[i]);
1718 dstV[i] = av_bswap16(src2[i]);
1722 /* This is almost identical to the previous, end exists only because
1723 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1724 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1728 for (i=0; i<width; i++)
1732 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1733 const uint8_t *src2, int width, uint32_t *unused)
1736 for (i=0; i<width; i++) {
1737 dstU[i]= src1[4*i + 0];
1738 dstV[i]= src1[4*i + 2];
1740 assert(src1 == src2);
1743 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1744 const uint8_t *src, int width)
1747 for (i = 0; i < width; i++) {
1748 dst1[i] = src[2*i+0];
1749 dst2[i] = src[2*i+1];
1753 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1754 const uint8_t *src1, const uint8_t *src2,
1755 int width, uint32_t *unused)
1757 nvXXtoUV_c(dstU, dstV, src1, width);
1760 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1761 const uint8_t *src1, const uint8_t *src2,
1762 int width, uint32_t *unused)
1764 nvXXtoUV_c(dstV, dstU, src1, width);
1767 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1769 // FIXME Maybe dither instead.
1770 static av_always_inline void
1771 yuv9_OR_10ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1772 const uint16_t *srcU, const uint16_t *srcV,
1773 int width, enum PixelFormat origin, int depth)
1777 for (i = 0; i < width; i++) {
1778 int upx = input_pixel(&srcU[i]);
1779 int vpx = input_pixel(&srcV[i]);
1780 dstU[i] = (upx << (16 - depth)) | (upx >> (2 * depth - 16));
1781 dstV[i] = (vpx << (16 - depth)) | (vpx >> (2 * depth - 16));
1785 static av_always_inline void
1786 yuv9_or_10ToY_c_template(uint16_t *dstY, const uint16_t *srcY,
1787 int width, enum PixelFormat origin, int depth)
1791 for (i = 0; i < width; i++) {
1792 int px = input_pixel(&srcY[i]);
1793 dstY[i] = (px << (16 - depth)) | (px >> (2 * depth - 16));
1799 #define YUV_NBPS(depth, BE_LE, origin) \
1800 static void BE_LE ## depth ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1801 const uint8_t *_srcU, const uint8_t *_srcV, \
1802 int width, uint32_t *unused) \
1804 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1805 const uint16_t *srcU = (const uint16_t *) _srcU, \
1806 *srcV = (const uint16_t *) _srcV; \
1807 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1809 static void BE_LE ## depth ## ToY_c(uint8_t *_dstY, const uint8_t *_srcY, \
1810 int width, uint32_t *unused) \
1812 uint16_t *dstY = (uint16_t *) _dstY; \
1813 const uint16_t *srcY = (const uint16_t *) _srcY; \
1814 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1817 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1818 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1819 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1820 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1822 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1823 int width, uint32_t *unused)
1826 for (i=0; i<width; i++) {
1831 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1835 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1836 const uint8_t *src2, int width, uint32_t *unused)
1839 for (i=0; i<width; i++) {
1840 int b= src1[3*i + 0];
1841 int g= src1[3*i + 1];
1842 int r= src1[3*i + 2];
1844 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1845 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1847 assert(src1 == src2);
1850 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1851 const uint8_t *src2, int width, uint32_t *unused)
1854 for (i=0; i<width; i++) {
1855 int b= src1[6*i + 0] + src1[6*i + 3];
1856 int g= src1[6*i + 1] + src1[6*i + 4];
1857 int r= src1[6*i + 2] + src1[6*i + 5];
1859 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1860 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1862 assert(src1 == src2);
1865 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1869 for (i=0; i<width; i++) {
1874 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1878 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1879 const uint8_t *src2, int width, uint32_t *unused)
1883 for (i=0; i<width; i++) {
1884 int r= src1[3*i + 0];
1885 int g= src1[3*i + 1];
1886 int b= src1[3*i + 2];
1888 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1889 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1893 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1894 const uint8_t *src2, int width, uint32_t *unused)
1898 for (i=0; i<width; i++) {
1899 int r= src1[6*i + 0] + src1[6*i + 3];
1900 int g= src1[6*i + 1] + src1[6*i + 4];
1901 int b= src1[6*i + 2] + src1[6*i + 5];
1903 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1904 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1908 static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
1909 const int16_t *filter,
1910 const int16_t *filterPos, int filterSize)
1913 int32_t *dst = (int32_t *) _dst;
1914 const uint16_t *src = (const uint16_t *) _src;
1916 for (i = 0; i < dstW; i++) {
1918 int srcPos = filterPos[i];
1919 unsigned int val = 0;
1921 for (j = 0; j < filterSize; j++) {
1922 val += src[srcPos + j] * filter[filterSize * i + j];
1924 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1925 dst[i] = FFMIN(val >> 11, (1 << 19) - 1);
1929 // bilinear / bicubic scaling
1930 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1931 const int16_t *filter, const int16_t *filterPos,
1935 for (i=0; i<dstW; i++) {
1937 int srcPos= filterPos[i];
1939 for (j=0; j<filterSize; j++) {
1940 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1942 //filter += hFilterSize;
1943 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1948 //FIXME all pal and rgb srcFormats could do this convertion as well
1949 //FIXME all scalers more complex than bilinear could do half of this transform
1950 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1953 for (i = 0; i < width; i++) {
1954 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1955 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1958 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1961 for (i = 0; i < width; i++) {
1962 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1963 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1966 static void lumRangeToJpeg_c(int16_t *dst, int width)
1969 for (i = 0; i < width; i++)
1970 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1972 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1975 for (i = 0; i < width; i++)
1976 dst[i] = (dst[i]*14071 + 33561947)>>14;
1979 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1982 int32_t *dstU = (int32_t *) _dstU;
1983 int32_t *dstV = (int32_t *) _dstV;
1984 for (i = 0; i < width; i++) {
1985 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1986 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1989 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1992 int32_t *dstU = (int32_t *) _dstU;
1993 int32_t *dstV = (int32_t *) _dstV;
1994 for (i = 0; i < width; i++) {
1995 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1996 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1999 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2002 int32_t *dst = (int32_t *) _dst;
2003 for (i = 0; i < width; i++)
2004 dst[i] = (FFMIN(dst[i],30189<<4)*19077 - (39057361<<4))>>14;
2006 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2009 int32_t *dst = (int32_t *) _dst;
2010 for (i = 0; i < width; i++)
2011 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2014 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2015 const uint8_t *src, int srcW, int xInc)
2018 unsigned int xpos=0;
2019 for (i=0;i<dstWidth;i++) {
2020 register unsigned int xx=xpos>>16;
2021 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2022 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2027 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
2030 uint8_t *dst = (uint8_t *) _dst;
2031 for (i = len - 1; i >= 0; i--) {
2032 dst[i * 2] = dst[i * 2 + 1] = src[i];
2036 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
2039 for (i = 0; i < len; i++) {
2040 dst[i] = src[i] >> 4;
2044 // *** horizontal scale Y line to temp buffer
2045 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2046 const uint8_t *src, int srcW, int xInc,
2047 const int16_t *hLumFilter,
2048 const int16_t *hLumFilterPos, int hLumFilterSize,
2049 uint8_t *formatConvBuffer,
2050 uint32_t *pal, int isAlpha)
2052 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2053 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2056 toYV12(formatConvBuffer, src, srcW, pal);
2057 src= formatConvBuffer;
2060 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2061 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2062 src = formatConvBuffer;
2065 if (!c->hyscale_fast) {
2066 c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2067 } else { // fast bilinear upscale / crap downscale
2068 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2072 convertRange(dst, dstWidth);
2074 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2075 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2079 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2080 int dstWidth, const uint8_t *src1,
2081 const uint8_t *src2, int srcW, int xInc)
2084 unsigned int xpos=0;
2085 for (i=0;i<dstWidth;i++) {
2086 register unsigned int xx=xpos>>16;
2087 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2088 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2089 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2094 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2095 const uint8_t *src1, const uint8_t *src2,
2096 int srcW, int xInc, const int16_t *hChrFilter,
2097 const int16_t *hChrFilterPos, int hChrFilterSize,
2098 uint8_t *formatConvBuffer, uint32_t *pal)
2101 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
2102 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2103 src1= formatConvBuffer;
2107 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2108 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
2109 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2110 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2111 src1 = formatConvBuffer;
2115 if (!c->hcscale_fast) {
2116 c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2117 c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2118 } else { // fast bilinear upscale / crap downscale
2119 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2122 if (c->chrConvertRange)
2123 c->chrConvertRange(dst1, dst2, dstWidth);
2125 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2126 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2127 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2131 static av_always_inline void
2132 find_c_packed_planar_out_funcs(SwsContext *c,
2133 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2134 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2135 yuv2packedX_fn *yuv2packedX)
2137 enum PixelFormat dstFormat = c->dstFormat;
2139 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2140 *yuv2yuvX = yuv2nv12X_c;
2141 } else if (is16BPS(dstFormat)) {
2142 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2143 } else if (is9_OR_10BPS(dstFormat)) {
2144 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2145 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2147 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2150 *yuv2yuv1 = yuv2yuv1_c;
2151 *yuv2yuvX = yuv2yuvX_c;
2153 if(c->flags & SWS_FULL_CHR_H_INT) {
2154 switch (dstFormat) {
2157 *yuv2packedX = yuv2rgba32_full_X_c;
2159 #if CONFIG_SWSCALE_ALPHA
2161 *yuv2packedX = yuv2rgba32_full_X_c;
2163 #endif /* CONFIG_SWSCALE_ALPHA */
2165 *yuv2packedX = yuv2rgbx32_full_X_c;
2167 #endif /* !CONFIG_SMALL */
2171 *yuv2packedX = yuv2argb32_full_X_c;
2173 #if CONFIG_SWSCALE_ALPHA
2175 *yuv2packedX = yuv2argb32_full_X_c;
2177 #endif /* CONFIG_SWSCALE_ALPHA */
2179 *yuv2packedX = yuv2xrgb32_full_X_c;
2181 #endif /* !CONFIG_SMALL */
2185 *yuv2packedX = yuv2bgra32_full_X_c;
2187 #if CONFIG_SWSCALE_ALPHA
2189 *yuv2packedX = yuv2bgra32_full_X_c;
2191 #endif /* CONFIG_SWSCALE_ALPHA */
2193 *yuv2packedX = yuv2bgrx32_full_X_c;
2195 #endif /* !CONFIG_SMALL */
2199 *yuv2packedX = yuv2abgr32_full_X_c;
2201 #if CONFIG_SWSCALE_ALPHA
2203 *yuv2packedX = yuv2abgr32_full_X_c;
2205 #endif /* CONFIG_SWSCALE_ALPHA */
2207 *yuv2packedX = yuv2xbgr32_full_X_c;
2209 #endif /* !CONFIG_SMALL */
2212 *yuv2packedX = yuv2rgb24_full_X_c;
2215 *yuv2packedX = yuv2bgr24_full_X_c;
2219 switch (dstFormat) {
2220 case PIX_FMT_GRAY16BE:
2221 *yuv2packed1 = yuv2gray16BE_1_c;
2222 *yuv2packed2 = yuv2gray16BE_2_c;
2223 *yuv2packedX = yuv2gray16BE_X_c;
2225 case PIX_FMT_GRAY16LE:
2226 *yuv2packed1 = yuv2gray16LE_1_c;
2227 *yuv2packed2 = yuv2gray16LE_2_c;
2228 *yuv2packedX = yuv2gray16LE_X_c;
2230 case PIX_FMT_MONOWHITE:
2231 *yuv2packed1 = yuv2monowhite_1_c;
2232 *yuv2packed2 = yuv2monowhite_2_c;
2233 *yuv2packedX = yuv2monowhite_X_c;
2235 case PIX_FMT_MONOBLACK:
2236 *yuv2packed1 = yuv2monoblack_1_c;
2237 *yuv2packed2 = yuv2monoblack_2_c;
2238 *yuv2packedX = yuv2monoblack_X_c;
2240 case PIX_FMT_YUYV422:
2241 *yuv2packed1 = yuv2yuyv422_1_c;
2242 *yuv2packed2 = yuv2yuyv422_2_c;
2243 *yuv2packedX = yuv2yuyv422_X_c;
2245 case PIX_FMT_UYVY422:
2246 *yuv2packed1 = yuv2uyvy422_1_c;
2247 *yuv2packed2 = yuv2uyvy422_2_c;
2248 *yuv2packedX = yuv2uyvy422_X_c;
2250 case PIX_FMT_RGB48LE:
2251 *yuv2packed1 = yuv2rgb48le_1_c;
2252 *yuv2packed2 = yuv2rgb48le_2_c;
2253 *yuv2packedX = yuv2rgb48le_X_c;
2255 case PIX_FMT_RGB48BE:
2256 *yuv2packed1 = yuv2rgb48be_1_c;
2257 *yuv2packed2 = yuv2rgb48be_2_c;
2258 *yuv2packedX = yuv2rgb48be_X_c;
2260 case PIX_FMT_BGR48LE:
2261 *yuv2packed1 = yuv2bgr48le_1_c;
2262 *yuv2packed2 = yuv2bgr48le_2_c;
2263 *yuv2packedX = yuv2bgr48le_X_c;
2265 case PIX_FMT_BGR48BE:
2266 *yuv2packed1 = yuv2bgr48be_1_c;
2267 *yuv2packed2 = yuv2bgr48be_2_c;
2268 *yuv2packedX = yuv2bgr48be_X_c;
2273 *yuv2packed1 = yuv2rgb32_1_c;
2274 *yuv2packed2 = yuv2rgb32_2_c;
2275 *yuv2packedX = yuv2rgb32_X_c;
2277 #if CONFIG_SWSCALE_ALPHA
2279 *yuv2packed1 = yuv2rgba32_1_c;
2280 *yuv2packed2 = yuv2rgba32_2_c;
2281 *yuv2packedX = yuv2rgba32_X_c;
2283 #endif /* CONFIG_SWSCALE_ALPHA */
2285 *yuv2packed1 = yuv2rgbx32_1_c;
2286 *yuv2packed2 = yuv2rgbx32_2_c;
2287 *yuv2packedX = yuv2rgbx32_X_c;
2289 #endif /* !CONFIG_SMALL */
2291 case PIX_FMT_RGB32_1:
2292 case PIX_FMT_BGR32_1:
2294 *yuv2packed1 = yuv2rgb32_1_1_c;
2295 *yuv2packed2 = yuv2rgb32_1_2_c;
2296 *yuv2packedX = yuv2rgb32_1_X_c;
2298 #if CONFIG_SWSCALE_ALPHA
2300 *yuv2packed1 = yuv2rgba32_1_1_c;
2301 *yuv2packed2 = yuv2rgba32_1_2_c;
2302 *yuv2packedX = yuv2rgba32_1_X_c;
2304 #endif /* CONFIG_SWSCALE_ALPHA */
2306 *yuv2packed1 = yuv2rgbx32_1_1_c;
2307 *yuv2packed2 = yuv2rgbx32_1_2_c;
2308 *yuv2packedX = yuv2rgbx32_1_X_c;
2310 #endif /* !CONFIG_SMALL */
2313 *yuv2packed1 = yuv2rgb24_1_c;
2314 *yuv2packed2 = yuv2rgb24_2_c;
2315 *yuv2packedX = yuv2rgb24_X_c;
2318 *yuv2packed1 = yuv2bgr24_1_c;
2319 *yuv2packed2 = yuv2bgr24_2_c;
2320 *yuv2packedX = yuv2bgr24_X_c;
2322 case PIX_FMT_RGB565LE:
2323 case PIX_FMT_RGB565BE:
2324 case PIX_FMT_BGR565LE:
2325 case PIX_FMT_BGR565BE:
2326 *yuv2packed1 = yuv2rgb16_1_c;
2327 *yuv2packed2 = yuv2rgb16_2_c;
2328 *yuv2packedX = yuv2rgb16_X_c;
2330 case PIX_FMT_RGB555LE:
2331 case PIX_FMT_RGB555BE:
2332 case PIX_FMT_BGR555LE:
2333 case PIX_FMT_BGR555BE:
2334 *yuv2packed1 = yuv2rgb15_1_c;
2335 *yuv2packed2 = yuv2rgb15_2_c;
2336 *yuv2packedX = yuv2rgb15_X_c;
2338 case PIX_FMT_RGB444LE:
2339 case PIX_FMT_RGB444BE:
2340 case PIX_FMT_BGR444LE:
2341 case PIX_FMT_BGR444BE:
2342 *yuv2packed1 = yuv2rgb12_1_c;
2343 *yuv2packed2 = yuv2rgb12_2_c;
2344 *yuv2packedX = yuv2rgb12_X_c;
2348 *yuv2packed1 = yuv2rgb8_1_c;
2349 *yuv2packed2 = yuv2rgb8_2_c;
2350 *yuv2packedX = yuv2rgb8_X_c;
2354 *yuv2packed1 = yuv2rgb4_1_c;
2355 *yuv2packed2 = yuv2rgb4_2_c;
2356 *yuv2packedX = yuv2rgb4_X_c;
2358 case PIX_FMT_RGB4_BYTE:
2359 case PIX_FMT_BGR4_BYTE:
2360 *yuv2packed1 = yuv2rgb4b_1_c;
2361 *yuv2packed2 = yuv2rgb4b_2_c;
2362 *yuv2packedX = yuv2rgb4b_X_c;
2368 #define DEBUG_SWSCALE_BUFFERS 0
2369 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2371 static int swScale(SwsContext *c, const uint8_t* src[],
2372 int srcStride[], int srcSliceY,
2373 int srcSliceH, uint8_t* dst[], int dstStride[])
2375 /* load a few things into local vars to make the code more readable? and faster */
2376 const int srcW= c->srcW;
2377 const int dstW= c->dstW;
2378 const int dstH= c->dstH;
2379 const int chrDstW= c->chrDstW;
2380 const int chrSrcW= c->chrSrcW;
2381 const int lumXInc= c->lumXInc;
2382 const int chrXInc= c->chrXInc;
2383 const enum PixelFormat dstFormat= c->dstFormat;
2384 const int flags= c->flags;
2385 int16_t *vLumFilterPos= c->vLumFilterPos;
2386 int16_t *vChrFilterPos= c->vChrFilterPos;
2387 int16_t *hLumFilterPos= c->hLumFilterPos;
2388 int16_t *hChrFilterPos= c->hChrFilterPos;
2389 int16_t *vLumFilter= c->vLumFilter;
2390 int16_t *vChrFilter= c->vChrFilter;
2391 int16_t *hLumFilter= c->hLumFilter;
2392 int16_t *hChrFilter= c->hChrFilter;
2393 int32_t *lumMmxFilter= c->lumMmxFilter;
2394 int32_t *chrMmxFilter= c->chrMmxFilter;
2395 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2396 const int vLumFilterSize= c->vLumFilterSize;
2397 const int vChrFilterSize= c->vChrFilterSize;
2398 const int hLumFilterSize= c->hLumFilterSize;
2399 const int hChrFilterSize= c->hChrFilterSize;
2400 int16_t **lumPixBuf= c->lumPixBuf;
2401 int16_t **chrUPixBuf= c->chrUPixBuf;
2402 int16_t **chrVPixBuf= c->chrVPixBuf;
2403 int16_t **alpPixBuf= c->alpPixBuf;
2404 const int vLumBufSize= c->vLumBufSize;
2405 const int vChrBufSize= c->vChrBufSize;
2406 uint8_t *formatConvBuffer= c->formatConvBuffer;
2407 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2408 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2410 uint32_t *pal=c->pal_yuv;
2411 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2412 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2413 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2414 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2415 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2417 /* vars which will change and which we need to store back in the context */
2419 int lumBufIndex= c->lumBufIndex;
2420 int chrBufIndex= c->chrBufIndex;
2421 int lastInLumBuf= c->lastInLumBuf;
2422 int lastInChrBuf= c->lastInChrBuf;
2424 if (isPacked(c->srcFormat)) {
2432 srcStride[3]= srcStride[0];
2434 srcStride[1]<<= c->vChrDrop;
2435 srcStride[2]<<= c->vChrDrop;
2437 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2438 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2439 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2440 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2441 srcSliceY, srcSliceH, dstY, dstH);
2442 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2443 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2445 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2446 static int warnedAlready=0; //FIXME move this into the context perhaps
2447 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2448 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2449 " ->cannot do aligned memory accesses anymore\n");
2454 /* Note the user might start scaling the picture in the middle so this
2455 will not get executed. This is not really intended but works
2456 currently, so people might do it. */
2457 if (srcSliceY ==0) {
2467 for (;dstY < dstH; dstY++) {
2468 const int chrDstY= dstY>>c->chrDstVSubSample;
2469 uint8_t *dest[4] = {
2470 dst[0] + dstStride[0] * dstY,
2471 dst[1] + dstStride[1] * chrDstY,
2472 dst[2] + dstStride[2] * chrDstY,
2473 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2476 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2477 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2478 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2479 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2480 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2481 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2484 //handle holes (FAST_BILINEAR & weird filters)
2485 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2486 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2487 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2488 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2490 DEBUG_BUFFERS("dstY: %d\n", dstY);
2491 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2492 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2493 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2494 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2496 // Do we have enough lines in this slice to output the dstY line
2497 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2499 if (!enough_lines) {
2500 lastLumSrcY = srcSliceY + srcSliceH - 1;
2501 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2502 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2503 lastLumSrcY, lastChrSrcY);
2506 //Do horizontal scaling
2507 while(lastInLumBuf < lastLumSrcY) {
2508 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2509 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2511 assert(lumBufIndex < 2*vLumBufSize);
2512 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2513 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2514 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2515 hLumFilter, hLumFilterPos, hLumFilterSize,
2518 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2519 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2520 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2524 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2525 lumBufIndex, lastInLumBuf);
2527 while(lastInChrBuf < lastChrSrcY) {
2528 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2529 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2531 assert(chrBufIndex < 2*vChrBufSize);
2532 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2533 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2534 //FIXME replace parameters through context struct (some at least)
2536 if (c->needs_hcscale)
2537 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2538 chrDstW, src1, src2, chrSrcW, chrXInc,
2539 hChrFilter, hChrFilterPos, hChrFilterSize,
2540 formatConvBuffer, pal);
2542 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2543 chrBufIndex, lastInChrBuf);
2545 //wrap buf index around to stay inside the ring buffer
2546 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2547 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2549 break; //we can't output a dstY line so let's try with the next slice
2552 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2554 if (dstY >= dstH-2) {
2555 // hmm looks like we can't use MMX here without overwriting this array's tail
2556 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2557 &yuv2packed1, &yuv2packed2,
2562 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2563 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2564 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2565 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2566 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2567 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2568 if ((dstY&chrSkipMask) || isGray(dstFormat))
2569 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2570 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2571 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2572 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2573 dest, dstW, chrDstW);
2574 } else { //General YV12
2575 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2576 lumSrcPtr, vLumFilterSize,
2577 vChrFilter + chrDstY * vChrFilterSize,
2578 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2579 alpSrcPtr, dest, dstW, chrDstW);
2582 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2583 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2584 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2585 int chrAlpha = vChrFilter[2 * dstY + 1];
2586 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2587 alpPixBuf ? *alpSrcPtr : NULL,
2588 dest[0], dstW, chrAlpha, dstY);
2589 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2590 int lumAlpha = vLumFilter[2 * dstY + 1];
2591 int chrAlpha = vChrFilter[2 * dstY + 1];
2593 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2595 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2596 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2597 alpPixBuf ? alpSrcPtr : NULL,
2598 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2599 } else { //general RGB
2600 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2601 lumSrcPtr, vLumFilterSize,
2602 vChrFilter + dstY * vChrFilterSize,
2603 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2604 alpSrcPtr, dest[0], dstW, dstY);
2610 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2611 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2614 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2615 __asm__ volatile("sfence":::"memory");
2619 /* store changed local vars back in the context */
2621 c->lumBufIndex= lumBufIndex;
2622 c->chrBufIndex= chrBufIndex;
2623 c->lastInLumBuf= lastInLumBuf;
2624 c->lastInChrBuf= lastInChrBuf;
2626 return dstY - lastDstY;
2629 static av_cold void sws_init_swScale_c(SwsContext *c)
2631 enum PixelFormat srcFormat = c->srcFormat;
2633 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2634 &c->yuv2packed1, &c->yuv2packed2,
2637 c->chrToYV12 = NULL;
2639 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2640 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2641 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2642 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2646 case PIX_FMT_BGR4_BYTE:
2647 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2648 case PIX_FMT_YUV444P9LE:
2649 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
2650 case PIX_FMT_YUV422P10LE:
2651 case PIX_FMT_YUV444P10LE:
2652 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
2653 case PIX_FMT_YUV444P9BE:
2654 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
2655 case PIX_FMT_YUV444P10BE:
2656 case PIX_FMT_YUV422P10BE:
2657 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
2659 case PIX_FMT_YUV420P16LE:
2660 case PIX_FMT_YUV422P16LE:
2661 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2663 case PIX_FMT_YUV420P16BE:
2664 case PIX_FMT_YUV422P16BE:
2665 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2668 if (c->chrSrcHSubSample) {
2670 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2671 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2672 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2673 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2674 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2675 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2676 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2677 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2678 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2679 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2680 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2681 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2682 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2683 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2684 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2685 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2686 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2687 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2691 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2692 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2693 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2694 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2695 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2696 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2697 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2698 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2699 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2700 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2701 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2702 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2703 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2704 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2705 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2706 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2707 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2708 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2712 c->lumToYV12 = NULL;
2713 c->alpToYV12 = NULL;
2714 switch (srcFormat) {
2715 case PIX_FMT_YUV444P9LE:
2716 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2717 case PIX_FMT_YUV444P10LE:
2718 case PIX_FMT_YUV422P10LE:
2719 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2720 case PIX_FMT_YUV444P9BE:
2721 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2722 case PIX_FMT_YUV444P10BE:
2723 case PIX_FMT_YUV422P10BE:
2724 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2726 case PIX_FMT_YUV420P16LE:
2727 case PIX_FMT_YUV422P16LE:
2728 case PIX_FMT_YUV444P16LE:
2729 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2731 case PIX_FMT_YUV420P16BE:
2732 case PIX_FMT_YUV422P16BE:
2733 case PIX_FMT_YUV444P16BE:
2734 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2736 case PIX_FMT_YUYV422 :
2737 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2738 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2739 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2740 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2741 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2742 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2743 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2744 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2745 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2746 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2747 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2748 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2752 case PIX_FMT_BGR4_BYTE:
2753 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2754 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2755 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2756 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2757 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2758 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2759 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2760 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2761 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2762 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2763 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2766 switch (srcFormat) {
2768 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2770 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2771 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2775 if (c->scalingBpp == 8) {
2776 c->hScale = hScale_c;
2777 if (c->flags & SWS_FAST_BILINEAR) {
2778 c->hyscale_fast = hyscale_fast_c;
2779 c->hcscale_fast = hcscale_fast_c;
2782 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2784 c->lumConvertRange = lumRangeFromJpeg_c;
2785 c->chrConvertRange = chrRangeFromJpeg_c;
2787 c->lumConvertRange = lumRangeToJpeg_c;
2788 c->chrConvertRange = chrRangeToJpeg_c;
2792 c->hScale = hScale16_c;
2793 c->scale19To15Fw = scale19To15Fw_c;
2794 c->scale8To16Rv = scale8To16Rv_c;
2796 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2798 c->lumConvertRange = lumRangeFromJpeg16_c;
2799 c->chrConvertRange = chrRangeFromJpeg16_c;
2801 c->lumConvertRange = lumRangeToJpeg16_c;
2802 c->chrConvertRange = chrRangeToJpeg16_c;
2807 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2808 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2809 c->needs_hcscale = 1;
2812 SwsFunc ff_getSwsFunc(SwsContext *c)
2814 sws_init_swScale_c(c);
2817 ff_sws_init_swScale_mmx(c);
2819 ff_sws_init_swScale_altivec(c);