2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest[4], int dstW, int chrDstW,
203 int big_endian, int output_bits)
205 //FIXME Optimize (just quickly written not optimized..)
207 int shift = 11 + 16 - output_bits;
208 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
209 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
211 #define output_pixel(pos, val) \
213 if (output_bits == 16) { \
214 AV_WB16(pos, av_clip_uint16(val >> shift)); \
216 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 if (output_bits == 16) { \
220 AV_WL16(pos, av_clip_uint16(val >> shift)); \
222 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
225 for (i = 0; i < dstW; i++) {
226 int val = 1 << (26-output_bits);
229 for (j = 0; j < lumFilterSize; j++)
230 val += lumSrc[j][i] * lumFilter[j];
232 output_pixel(&yDest[i], val);
236 for (i = 0; i < chrDstW; i++) {
237 int u = 1 << (26-output_bits);
238 int v = 1 << (26-output_bits);
241 for (j = 0; j < chrFilterSize; j++) {
242 u += chrUSrc[j][i] * chrFilter[j];
243 v += chrVSrc[j][i] * chrFilter[j];
246 output_pixel(&uDest[i], u);
247 output_pixel(&vDest[i], v);
251 if (CONFIG_SWSCALE_ALPHA && aDest) {
252 for (i = 0; i < dstW; i++) {
253 int val = 1 << (26-output_bits);
256 for (j = 0; j < lumFilterSize; j++)
257 val += alpSrc[j][i] * lumFilter[j];
259 output_pixel(&aDest[i], val);
265 #define yuv2NBPS(bits, BE_LE, is_be) \
266 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
267 const int16_t **lumSrc, int lumFilterSize, \
268 const int16_t *chrFilter, const int16_t **chrUSrc, \
269 const int16_t **chrVSrc, \
270 int chrFilterSize, const int16_t **alpSrc, \
271 uint8_t *_dest[4], int dstW, int chrDstW) \
273 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
274 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
275 alpSrc, (uint16_t **) _dest, \
276 dstW, chrDstW, is_be, bits); \
285 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
286 const int16_t **lumSrc, int lumFilterSize,
287 const int16_t *chrFilter, const int16_t **chrUSrc,
288 const int16_t **chrVSrc,
289 int chrFilterSize, const int16_t **alpSrc,
290 uint8_t *dest[4], int dstW, int chrDstW)
292 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
293 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
296 //FIXME Optimize (just quickly written not optimized..)
297 for (i=0; i<dstW; i++) {
300 for (j=0; j<lumFilterSize; j++)
301 val += lumSrc[j][i] * lumFilter[j];
303 yDest[i]= av_clip_uint8(val>>19);
307 for (i=0; i<chrDstW; i++) {
311 for (j=0; j<chrFilterSize; j++) {
312 u += chrUSrc[j][i] * chrFilter[j];
313 v += chrVSrc[j][i] * chrFilter[j];
316 uDest[i]= av_clip_uint8(u>>19);
317 vDest[i]= av_clip_uint8(v>>19);
320 if (CONFIG_SWSCALE_ALPHA && aDest)
321 for (i=0; i<dstW; i++) {
324 for (j=0; j<lumFilterSize; j++)
325 val += alpSrc[j][i] * lumFilter[j];
327 aDest[i]= av_clip_uint8(val>>19);
331 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
332 const int16_t *chrUSrc, const int16_t *chrVSrc,
333 const int16_t *alpSrc,
334 uint8_t *dest[4], int dstW, int chrDstW)
336 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
337 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
340 for (i=0; i<dstW; i++) {
341 int val= (lumSrc[i]+64)>>7;
342 yDest[i]= av_clip_uint8(val);
346 for (i=0; i<chrDstW; i++) {
347 int u=(chrUSrc[i]+64)>>7;
348 int v=(chrVSrc[i]+64)>>7;
349 uDest[i]= av_clip_uint8(u);
350 vDest[i]= av_clip_uint8(v);
353 if (CONFIG_SWSCALE_ALPHA && aDest)
354 for (i=0; i<dstW; i++) {
355 int val= (alpSrc[i]+64)>>7;
356 aDest[i]= av_clip_uint8(val);
360 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
361 const int16_t **lumSrc, int lumFilterSize,
362 const int16_t *chrFilter, const int16_t **chrUSrc,
363 const int16_t **chrVSrc, int chrFilterSize,
364 const int16_t **alpSrc, uint8_t *dest[4],
365 int dstW, int chrDstW)
367 uint8_t *yDest = dest[0], *uDest = dest[1];
368 enum PixelFormat dstFormat = c->dstFormat;
370 //FIXME Optimize (just quickly written not optimized..)
372 for (i=0; i<dstW; i++) {
375 for (j=0; j<lumFilterSize; j++)
376 val += lumSrc[j][i] * lumFilter[j];
378 yDest[i]= av_clip_uint8(val>>19);
384 if (dstFormat == PIX_FMT_NV12)
385 for (i=0; i<chrDstW; i++) {
389 for (j=0; j<chrFilterSize; j++) {
390 u += chrUSrc[j][i] * chrFilter[j];
391 v += chrVSrc[j][i] * chrFilter[j];
394 uDest[2*i]= av_clip_uint8(u>>19);
395 uDest[2*i+1]= av_clip_uint8(v>>19);
398 for (i=0; i<chrDstW; i++) {
402 for (j=0; j<chrFilterSize; j++) {
403 u += chrUSrc[j][i] * chrFilter[j];
404 v += chrVSrc[j][i] * chrFilter[j];
407 uDest[2*i]= av_clip_uint8(v>>19);
408 uDest[2*i+1]= av_clip_uint8(u>>19);
412 #define output_pixel(pos, val) \
413 if (target == PIX_FMT_GRAY16BE) { \
419 static av_always_inline void
420 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
421 const int16_t **lumSrc, int lumFilterSize,
422 const int16_t *chrFilter, const int16_t **chrUSrc,
423 const int16_t **chrVSrc, int chrFilterSize,
424 const int16_t **alpSrc, uint8_t *dest, int dstW,
425 int y, enum PixelFormat target)
429 for (i = 0; i < (dstW >> 1); i++) {
433 const int i2 = 2 * i;
435 for (j = 0; j < lumFilterSize; j++) {
436 Y1 += lumSrc[j][i2] * lumFilter[j];
437 Y2 += lumSrc[j][i2+1] * lumFilter[j];
441 if ((Y1 | Y2) & 0x10000) {
442 Y1 = av_clip_uint16(Y1);
443 Y2 = av_clip_uint16(Y2);
445 output_pixel(&dest[2 * i2 + 0], Y1);
446 output_pixel(&dest[2 * i2 + 2], Y2);
450 static av_always_inline void
451 yuv2gray16_2_c_template(SwsContext *c, const int16_t *buf[2],
452 const int16_t *ubuf[2], const int16_t *vbuf[2],
453 const int16_t *abuf[2], uint8_t *dest, int dstW,
454 int yalpha, int uvalpha, int y,
455 enum PixelFormat target)
457 int yalpha1 = 4095 - yalpha;
459 const int16_t *buf0 = buf[0], *buf1 = buf[1];
461 for (i = 0; i < (dstW >> 1); i++) {
462 const int i2 = 2 * i;
463 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
464 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
466 output_pixel(&dest[2 * i2 + 0], Y1);
467 output_pixel(&dest[2 * i2 + 2], Y2);
471 static av_always_inline void
472 yuv2gray16_1_c_template(SwsContext *c, const int16_t *buf0,
473 const int16_t *ubuf[2], const int16_t *vbuf[2],
474 const int16_t *abuf0, uint8_t *dest, int dstW,
475 int uvalpha, int y, enum PixelFormat target)
479 for (i = 0; i < (dstW >> 1); i++) {
480 const int i2 = 2 * i;
481 int Y1 = buf0[i2 ] << 1;
482 int Y2 = buf0[i2+1] << 1;
484 output_pixel(&dest[2 * i2 + 0], Y1);
485 output_pixel(&dest[2 * i2 + 2], Y2);
491 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
492 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
493 const int16_t **lumSrc, int lumFilterSize, \
494 const int16_t *chrFilter, const int16_t **chrUSrc, \
495 const int16_t **chrVSrc, int chrFilterSize, \
496 const int16_t **alpSrc, uint8_t *dest, int dstW, \
499 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
500 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
501 alpSrc, dest, dstW, y, fmt); \
504 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
505 const int16_t *ubuf[2], const int16_t *vbuf[2], \
506 const int16_t *abuf[2], uint8_t *dest, int dstW, \
507 int yalpha, int uvalpha, int y) \
509 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
510 dest, dstW, yalpha, uvalpha, y, fmt); \
513 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
514 const int16_t *ubuf[2], const int16_t *vbuf[2], \
515 const int16_t *abuf0, uint8_t *dest, int dstW, \
516 int uvalpha, int y) \
518 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
519 dstW, uvalpha, y, fmt); \
522 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
523 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
525 #define output_pixel(pos, acc) \
526 if (target == PIX_FMT_MONOBLACK) { \
532 static av_always_inline void
533 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
534 const int16_t **lumSrc, int lumFilterSize,
535 const int16_t *chrFilter, const int16_t **chrUSrc,
536 const int16_t **chrVSrc, int chrFilterSize,
537 const int16_t **alpSrc, uint8_t *dest, int dstW,
538 int y, enum PixelFormat target)
540 const uint8_t * const d128=dither_8x8_220[y&7];
541 uint8_t *g = c->table_gU[128] + c->table_gV[128];
545 for (i = 0; i < dstW - 1; i += 2) {
550 for (j = 0; j < lumFilterSize; j++) {
551 Y1 += lumSrc[j][i] * lumFilter[j];
552 Y2 += lumSrc[j][i+1] * lumFilter[j];
556 if ((Y1 | Y2) & 0x100) {
557 Y1 = av_clip_uint8(Y1);
558 Y2 = av_clip_uint8(Y2);
560 acc += acc + g[Y1 + d128[(i + 0) & 7]];
561 acc += acc + g[Y2 + d128[(i + 1) & 7]];
563 output_pixel(*dest++, acc);
568 static av_always_inline void
569 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
570 const int16_t *ubuf[2], const int16_t *vbuf[2],
571 const int16_t *abuf[2], uint8_t *dest, int dstW,
572 int yalpha, int uvalpha, int y,
573 enum PixelFormat target)
575 const int16_t *buf0 = buf[0], *buf1 = buf[1];
576 const uint8_t * const d128 = dither_8x8_220[y & 7];
577 uint8_t *g = c->table_gU[128] + c->table_gV[128];
578 int yalpha1 = 4095 - yalpha;
581 for (i = 0; i < dstW - 7; i += 8) {
582 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
583 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
584 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
585 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
586 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
587 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
588 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
589 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
590 output_pixel(*dest++, acc);
594 static av_always_inline void
595 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
596 const int16_t *ubuf[2], const int16_t *vbuf[2],
597 const int16_t *abuf0, uint8_t *dest, int dstW,
598 int uvalpha, int y, enum PixelFormat target)
600 const uint8_t * const d128 = dither_8x8_220[y & 7];
601 uint8_t *g = c->table_gU[128] + c->table_gV[128];
604 for (i = 0; i < dstW - 7; i += 8) {
605 int acc = g[(buf0[i ] >> 7) + d128[0]];
606 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
607 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
608 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
609 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
610 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
611 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
612 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
613 output_pixel(*dest++, acc);
619 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
620 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
622 #define output_pixels(pos, Y1, U, Y2, V) \
623 if (target == PIX_FMT_YUYV422) { \
624 dest[pos + 0] = Y1; \
626 dest[pos + 2] = Y2; \
630 dest[pos + 1] = Y1; \
632 dest[pos + 3] = Y2; \
635 static av_always_inline void
636 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
637 const int16_t **lumSrc, int lumFilterSize,
638 const int16_t *chrFilter, const int16_t **chrUSrc,
639 const int16_t **chrVSrc, int chrFilterSize,
640 const int16_t **alpSrc, uint8_t *dest, int dstW,
641 int y, enum PixelFormat target)
645 for (i = 0; i < (dstW >> 1); i++) {
652 for (j = 0; j < lumFilterSize; j++) {
653 Y1 += lumSrc[j][i * 2] * lumFilter[j];
654 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
656 for (j = 0; j < chrFilterSize; j++) {
657 U += chrUSrc[j][i] * chrFilter[j];
658 V += chrVSrc[j][i] * chrFilter[j];
664 if ((Y1 | Y2 | U | V) & 0x100) {
665 Y1 = av_clip_uint8(Y1);
666 Y2 = av_clip_uint8(Y2);
667 U = av_clip_uint8(U);
668 V = av_clip_uint8(V);
670 output_pixels(4*i, Y1, U, Y2, V);
674 static av_always_inline void
675 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
676 const int16_t *ubuf[2], const int16_t *vbuf[2],
677 const int16_t *abuf[2], uint8_t *dest, int dstW,
678 int yalpha, int uvalpha, int y,
679 enum PixelFormat target)
681 const int16_t *buf0 = buf[0], *buf1 = buf[1],
682 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
683 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
684 int yalpha1 = 4095 - yalpha;
685 int uvalpha1 = 4095 - uvalpha;
688 for (i = 0; i < (dstW >> 1); i++) {
689 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
690 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
691 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
692 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
694 output_pixels(i * 4, Y1, U, Y2, V);
698 static av_always_inline void
699 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
700 const int16_t *ubuf[2], const int16_t *vbuf[2],
701 const int16_t *abuf0, uint8_t *dest, int dstW,
702 int uvalpha, int y, enum PixelFormat target)
704 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
705 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
708 if (uvalpha < 2048) {
709 for (i = 0; i < (dstW >> 1); i++) {
710 int Y1 = buf0[i * 2] >> 7;
711 int Y2 = buf0[i * 2 + 1] >> 7;
712 int U = ubuf1[i] >> 7;
713 int V = vbuf1[i] >> 7;
715 output_pixels(i * 4, Y1, U, Y2, V);
718 for (i = 0; i < (dstW >> 1); i++) {
719 int Y1 = buf0[i * 2] >> 7;
720 int Y2 = buf0[i * 2 + 1] >> 7;
721 int U = (ubuf0[i] + ubuf1[i]) >> 8;
722 int V = (vbuf0[i] + vbuf1[i]) >> 8;
724 output_pixels(i * 4, Y1, U, Y2, V);
731 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
732 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
734 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
735 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
737 static av_always_inline void
738 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
739 const int16_t **lumSrc, int lumFilterSize,
740 const int16_t *chrFilter, const int16_t **chrUSrc,
741 const int16_t **chrVSrc, int chrFilterSize,
742 const int16_t **alpSrc, uint8_t *dest, int dstW,
743 int y, enum PixelFormat target)
747 for (i = 0; i < (dstW >> 1); i++) {
753 const uint8_t *r, *g, *b;
755 for (j = 0; j < lumFilterSize; j++) {
756 Y1 += lumSrc[j][i * 2] * lumFilter[j];
757 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
759 for (j = 0; j < chrFilterSize; j++) {
760 U += chrUSrc[j][i] * chrFilter[j];
761 V += chrVSrc[j][i] * chrFilter[j];
767 if ((Y1 | Y2 | U | V) & 0x100) {
768 Y1 = av_clip_uint8(Y1);
769 Y2 = av_clip_uint8(Y2);
770 U = av_clip_uint8(U);
771 V = av_clip_uint8(V);
774 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
775 r = (const uint8_t *) c->table_rV[V];
776 g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
777 b = (const uint8_t *) c->table_bU[U];
779 dest[ 0] = dest[ 1] = r_b[Y1];
780 dest[ 2] = dest[ 3] = g[Y1];
781 dest[ 4] = dest[ 5] = b_r[Y1];
782 dest[ 6] = dest[ 7] = r_b[Y2];
783 dest[ 8] = dest[ 9] = g[Y2];
784 dest[10] = dest[11] = b_r[Y2];
789 static av_always_inline void
790 yuv2rgb48_2_c_template(SwsContext *c, const int16_t *buf[2],
791 const int16_t *ubuf[2], const int16_t *vbuf[2],
792 const int16_t *abuf[2], uint8_t *dest, int dstW,
793 int yalpha, int uvalpha, int y,
794 enum PixelFormat target)
796 const int16_t *buf0 = buf[0], *buf1 = buf[1],
797 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
798 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
799 int yalpha1 = 4095 - yalpha;
800 int uvalpha1 = 4095 - uvalpha;
803 for (i = 0; i < (dstW >> 1); i++) {
804 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
805 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
806 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
807 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
808 const uint8_t *r = (const uint8_t *) c->table_rV[V],
809 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
810 *b = (const uint8_t *) c->table_bU[U];
812 dest[ 0] = dest[ 1] = r_b[Y1];
813 dest[ 2] = dest[ 3] = g[Y1];
814 dest[ 4] = dest[ 5] = b_r[Y1];
815 dest[ 6] = dest[ 7] = r_b[Y2];
816 dest[ 8] = dest[ 9] = g[Y2];
817 dest[10] = dest[11] = b_r[Y2];
822 static av_always_inline void
823 yuv2rgb48_1_c_template(SwsContext *c, const int16_t *buf0,
824 const int16_t *ubuf[2], const int16_t *vbuf[2],
825 const int16_t *abuf0, uint8_t *dest, int dstW,
826 int uvalpha, int y, enum PixelFormat target)
828 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
829 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
832 if (uvalpha < 2048) {
833 for (i = 0; i < (dstW >> 1); i++) {
834 int Y1 = buf0[i * 2] >> 7;
835 int Y2 = buf0[i * 2 + 1] >> 7;
836 int U = ubuf1[i] >> 7;
837 int V = vbuf1[i] >> 7;
838 const uint8_t *r = (const uint8_t *) c->table_rV[V],
839 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
840 *b = (const uint8_t *) c->table_bU[U];
842 dest[ 0] = dest[ 1] = r_b[Y1];
843 dest[ 2] = dest[ 3] = g[Y1];
844 dest[ 4] = dest[ 5] = b_r[Y1];
845 dest[ 6] = dest[ 7] = r_b[Y2];
846 dest[ 8] = dest[ 9] = g[Y2];
847 dest[10] = dest[11] = b_r[Y2];
851 for (i = 0; i < (dstW >> 1); i++) {
852 int Y1 = buf0[i * 2] >> 7;
853 int Y2 = buf0[i * 2 + 1] >> 7;
854 int U = (ubuf0[i] + ubuf1[i]) >> 8;
855 int V = (vbuf0[i] + vbuf1[i]) >> 8;
856 const uint8_t *r = (const uint8_t *) c->table_rV[V],
857 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
858 *b = (const uint8_t *) c->table_bU[U];
860 dest[ 0] = dest[ 1] = r_b[Y1];
861 dest[ 2] = dest[ 3] = g[Y1];
862 dest[ 4] = dest[ 5] = b_r[Y1];
863 dest[ 6] = dest[ 7] = r_b[Y2];
864 dest[ 8] = dest[ 9] = g[Y2];
865 dest[10] = dest[11] = b_r[Y2];
874 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
875 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
876 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
877 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
879 static av_always_inline void
880 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
881 int U, int V, int A1, int A2,
882 const void *_r, const void *_g, const void *_b, int y,
883 enum PixelFormat target, int hasAlpha)
885 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
886 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
887 uint32_t *dest = (uint32_t *) _dest;
888 const uint32_t *r = (const uint32_t *) _r;
889 const uint32_t *g = (const uint32_t *) _g;
890 const uint32_t *b = (const uint32_t *) _b;
893 int sh = hasAlpha ? ((fmt == PIX_FMT_RGB32_1 || fmt == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
895 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
896 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
899 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
901 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
902 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
904 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
905 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
908 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
909 uint8_t *dest = (uint8_t *) _dest;
910 const uint8_t *r = (const uint8_t *) _r;
911 const uint8_t *g = (const uint8_t *) _g;
912 const uint8_t *b = (const uint8_t *) _b;
914 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
915 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
916 dest[i * 6 + 0] = r_b[Y1];
917 dest[i * 6 + 1] = g[Y1];
918 dest[i * 6 + 2] = b_r[Y1];
919 dest[i * 6 + 3] = r_b[Y2];
920 dest[i * 6 + 4] = g[Y2];
921 dest[i * 6 + 5] = b_r[Y2];
924 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
925 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
926 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
927 uint16_t *dest = (uint16_t *) _dest;
928 const uint16_t *r = (const uint16_t *) _r;
929 const uint16_t *g = (const uint16_t *) _g;
930 const uint16_t *b = (const uint16_t *) _b;
931 int dr1, dg1, db1, dr2, dg2, db2;
933 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
934 dr1 = dither_2x2_8[ y & 1 ][0];
935 dg1 = dither_2x2_4[ y & 1 ][0];
936 db1 = dither_2x2_8[(y & 1) ^ 1][0];
937 dr2 = dither_2x2_8[ y & 1 ][1];
938 dg2 = dither_2x2_4[ y & 1 ][1];
939 db2 = dither_2x2_8[(y & 1) ^ 1][1];
940 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
941 dr1 = dither_2x2_8[ y & 1 ][0];
942 dg1 = dither_2x2_8[ y & 1 ][1];
943 db1 = dither_2x2_8[(y & 1) ^ 1][0];
944 dr2 = dither_2x2_8[ y & 1 ][1];
945 dg2 = dither_2x2_8[ y & 1 ][0];
946 db2 = dither_2x2_8[(y & 1) ^ 1][1];
948 dr1 = dither_4x4_16[ y & 3 ][0];
949 dg1 = dither_4x4_16[ y & 3 ][1];
950 db1 = dither_4x4_16[(y & 3) ^ 3][0];
951 dr2 = dither_4x4_16[ y & 3 ][1];
952 dg2 = dither_4x4_16[ y & 3 ][0];
953 db2 = dither_4x4_16[(y & 3) ^ 3][1];
956 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
957 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
958 } else /* 8/4-bit */ {
959 uint8_t *dest = (uint8_t *) _dest;
960 const uint8_t *r = (const uint8_t *) _r;
961 const uint8_t *g = (const uint8_t *) _g;
962 const uint8_t *b = (const uint8_t *) _b;
963 int dr1, dg1, db1, dr2, dg2, db2;
965 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
966 const uint8_t * const d64 = dither_8x8_73[y & 7];
967 const uint8_t * const d32 = dither_8x8_32[y & 7];
968 dr1 = dg1 = d32[(i * 2 + 0) & 7];
969 db1 = d64[(i * 2 + 0) & 7];
970 dr2 = dg2 = d32[(i * 2 + 1) & 7];
971 db2 = d64[(i * 2 + 1) & 7];
973 const uint8_t * const d64 = dither_8x8_73 [y & 7];
974 const uint8_t * const d128 = dither_8x8_220[y & 7];
975 dr1 = db1 = d128[(i * 2 + 0) & 7];
976 dg1 = d64[(i * 2 + 0) & 7];
977 dr2 = db2 = d128[(i * 2 + 1) & 7];
978 dg2 = d64[(i * 2 + 1) & 7];
981 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
982 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
983 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
985 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
986 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
991 static av_always_inline void
992 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
993 const int16_t **lumSrc, int lumFilterSize,
994 const int16_t *chrFilter, const int16_t **chrUSrc,
995 const int16_t **chrVSrc, int chrFilterSize,
996 const int16_t **alpSrc, uint8_t *dest, int dstW,
997 int y, enum PixelFormat target, int hasAlpha)
1001 for (i = 0; i < (dstW >> 1); i++) {
1007 int av_unused A1, A2;
1008 const void *r, *g, *b;
1010 for (j = 0; j < lumFilterSize; j++) {
1011 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1012 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1014 for (j = 0; j < chrFilterSize; j++) {
1015 U += chrUSrc[j][i] * chrFilter[j];
1016 V += chrVSrc[j][i] * chrFilter[j];
1022 if ((Y1 | Y2 | U | V) & 0x100) {
1023 Y1 = av_clip_uint8(Y1);
1024 Y2 = av_clip_uint8(Y2);
1025 U = av_clip_uint8(U);
1026 V = av_clip_uint8(V);
1031 for (j = 0; j < lumFilterSize; j++) {
1032 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1033 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1037 if ((A1 | A2) & 0x100) {
1038 A1 = av_clip_uint8(A1);
1039 A2 = av_clip_uint8(A2);
1043 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1045 g = (c->table_gU[U] + c->table_gV[V]);
1048 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1049 r, g, b, y, target, hasAlpha);
1053 static av_always_inline void
1054 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1055 const int16_t *ubuf[2], const int16_t *vbuf[2],
1056 const int16_t *abuf[2], uint8_t *dest, int dstW,
1057 int yalpha, int uvalpha, int y,
1058 enum PixelFormat target, int hasAlpha)
1060 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1061 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1062 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1063 *abuf0 = abuf[0], *abuf1 = abuf[1];
1064 int yalpha1 = 4095 - yalpha;
1065 int uvalpha1 = 4095 - uvalpha;
1068 for (i = 0; i < (dstW >> 1); i++) {
1069 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1070 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1071 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1072 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1074 const void *r = c->table_rV[V],
1075 *g = (c->table_gU[U] + c->table_gV[V]),
1076 *b = c->table_bU[U];
1079 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1080 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1083 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1084 r, g, b, y, target, hasAlpha);
1088 static av_always_inline void
1089 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1090 const int16_t *ubuf[2], const int16_t *vbuf[2],
1091 const int16_t *abuf0, uint8_t *dest, int dstW,
1092 int uvalpha, int y, enum PixelFormat target,
1095 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1096 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1099 if (uvalpha < 2048) {
1100 for (i = 0; i < (dstW >> 1); i++) {
1101 int Y1 = buf0[i * 2] >> 7;
1102 int Y2 = buf0[i * 2 + 1] >> 7;
1103 int U = ubuf1[i] >> 7;
1104 int V = vbuf1[i] >> 7;
1106 const void *r = c->table_rV[V],
1107 *g = (c->table_gU[U] + c->table_gV[V]),
1108 *b = c->table_bU[U];
1111 A1 = abuf0[i * 2 ] >> 7;
1112 A2 = abuf0[i * 2 + 1] >> 7;
1115 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1116 r, g, b, y, target, hasAlpha);
1119 for (i = 0; i < (dstW >> 1); i++) {
1120 int Y1 = buf0[i * 2] >> 7;
1121 int Y2 = buf0[i * 2 + 1] >> 7;
1122 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1123 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1125 const void *r = c->table_rV[V],
1126 *g = (c->table_gU[U] + c->table_gV[V]),
1127 *b = c->table_bU[U];
1130 A1 = abuf0[i * 2 ] >> 7;
1131 A2 = abuf0[i * 2 + 1] >> 7;
1134 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1135 r, g, b, y, target, hasAlpha);
1140 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1141 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1142 const int16_t **lumSrc, int lumFilterSize, \
1143 const int16_t *chrFilter, const int16_t **chrUSrc, \
1144 const int16_t **chrVSrc, int chrFilterSize, \
1145 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1148 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1149 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1150 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1152 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1153 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1154 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1155 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1156 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1157 int yalpha, int uvalpha, int y) \
1159 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1160 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1163 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1164 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1165 const int16_t *abuf0, uint8_t *dest, int dstW, \
1166 int uvalpha, int y) \
1168 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1169 dstW, uvalpha, y, fmt, hasAlpha); \
1173 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1174 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1176 #if CONFIG_SWSCALE_ALPHA
1177 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1178 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1180 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1181 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1183 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1184 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1185 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1186 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1187 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1188 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1189 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1190 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1192 static av_always_inline void
1193 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1194 const int16_t **lumSrc, int lumFilterSize,
1195 const int16_t *chrFilter, const int16_t **chrUSrc,
1196 const int16_t **chrVSrc, int chrFilterSize,
1197 const int16_t **alpSrc, uint8_t *dest,
1198 int dstW, int y, enum PixelFormat target, int hasAlpha)
1201 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1203 for (i = 0; i < dstW; i++) {
1211 for (j = 0; j < lumFilterSize; j++) {
1212 Y += lumSrc[j][i] * lumFilter[j];
1214 for (j = 0; j < chrFilterSize; j++) {
1215 U += chrUSrc[j][i] * chrFilter[j];
1216 V += chrVSrc[j][i] * chrFilter[j];
1223 for (j = 0; j < lumFilterSize; j++) {
1224 A += alpSrc[j][i] * lumFilter[j];
1228 A = av_clip_uint8(A);
1230 Y -= c->yuv2rgb_y_offset;
1231 Y *= c->yuv2rgb_y_coeff;
1233 R = Y + V*c->yuv2rgb_v2r_coeff;
1234 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1235 B = Y + U*c->yuv2rgb_u2b_coeff;
1236 if ((R | G | B) & 0xC0000000) {
1237 R = av_clip_uintp2(R, 30);
1238 G = av_clip_uintp2(G, 30);
1239 B = av_clip_uintp2(B, 30);
1244 dest[0] = hasAlpha ? A : 255;
1258 dest[3] = hasAlpha ? A : 255;
1261 dest[0] = hasAlpha ? A : 255;
1276 dest[3] = hasAlpha ? A : 255;
1284 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1285 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1286 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1287 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1289 #if CONFIG_SWSCALE_ALPHA
1290 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1291 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1292 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1293 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1295 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1296 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1297 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1298 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1300 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1303 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1304 int width, int height,
1308 uint8_t *ptr = plane + stride*y;
1309 for (i=0; i<height; i++) {
1310 memset(ptr, val, width);
1315 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1317 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1318 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1320 static av_always_inline void
1321 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
1322 enum PixelFormat origin)
1325 for (i = 0; i < width; i++) {
1326 int r_b = input_pixel(&src[i*6+0]) >> 8;
1327 int g = input_pixel(&src[i*6+2]) >> 8;
1328 int b_r = input_pixel(&src[i*6+4]) >> 8;
1330 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1334 static av_always_inline void
1335 rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1336 const uint8_t *src1, const uint8_t *src2,
1337 int width, enum PixelFormat origin)
1341 for (i = 0; i < width; i++) {
1342 int r_b = input_pixel(&src1[i*6+0]) >> 8;
1343 int g = input_pixel(&src1[i*6+2]) >> 8;
1344 int b_r = input_pixel(&src1[i*6+4]) >> 8;
1346 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1347 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1351 static av_always_inline void
1352 rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1353 const uint8_t *src1, const uint8_t *src2,
1354 int width, enum PixelFormat origin)
1358 for (i = 0; i < width; i++) {
1359 int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
1360 int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
1361 int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
1363 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1364 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1372 #define rgb48funcs(pattern, BE_LE, origin) \
1373 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
1374 int width, uint32_t *unused) \
1376 rgb48ToY_c_template(dst, src, width, origin); \
1379 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1380 const uint8_t *src1, const uint8_t *src2, \
1381 int width, uint32_t *unused) \
1383 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1386 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1387 const uint8_t *src1, const uint8_t *src2, \
1388 int width, uint32_t *unused) \
1390 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1393 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1394 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1395 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1396 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1398 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1399 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1400 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1402 static av_always_inline void
1403 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1404 int width, enum PixelFormat origin,
1405 int shr, int shg, int shb, int shp,
1406 int maskr, int maskg, int maskb,
1407 int rsh, int gsh, int bsh, int S)
1409 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1410 rnd = 33 << (S - 1);
1413 for (i = 0; i < width; i++) {
1414 int px = input_pixel(i) >> shp;
1415 int b = (px & maskb) >> shb;
1416 int g = (px & maskg) >> shg;
1417 int r = (px & maskr) >> shr;
1419 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1423 static av_always_inline void
1424 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1425 const uint8_t *src, int width,
1426 enum PixelFormat origin,
1427 int shr, int shg, int shb, int shp,
1428 int maskr, int maskg, int maskb,
1429 int rsh, int gsh, int bsh, int S)
1431 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1432 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1433 rnd = 257 << (S - 1);
1436 for (i = 0; i < width; i++) {
1437 int px = input_pixel(i) >> shp;
1438 int b = (px & maskb) >> shb;
1439 int g = (px & maskg) >> shg;
1440 int r = (px & maskr) >> shr;
1442 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1443 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1447 static av_always_inline void
1448 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1449 const uint8_t *src, int width,
1450 enum PixelFormat origin,
1451 int shr, int shg, int shb, int shp,
1452 int maskr, int maskg, int maskb,
1453 int rsh, int gsh, int bsh, int S)
1455 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1456 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1457 rnd = 257 << S, maskgx = ~(maskr | maskb);
1460 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1461 for (i = 0; i < width; i++) {
1462 int px0 = input_pixel(2 * i + 0) >> shp;
1463 int px1 = input_pixel(2 * i + 1) >> shp;
1464 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1465 int rb = px0 + px1 - g;
1467 b = (rb & maskb) >> shb;
1468 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1469 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1472 g = (g & maskg) >> shg;
1474 r = (rb & maskr) >> shr;
1476 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1477 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1483 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1484 maskg, maskb, rsh, gsh, bsh, S) \
1485 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1486 int width, uint32_t *unused) \
1488 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1489 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1492 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1493 const uint8_t *src, const uint8_t *dummy, \
1494 int width, uint32_t *unused) \
1496 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1497 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1500 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1501 const uint8_t *src, const uint8_t *dummy, \
1502 int width, uint32_t *unused) \
1504 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1505 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1508 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1509 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1510 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1511 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1512 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1513 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1514 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1515 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1516 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1517 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1518 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1519 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1521 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1524 for (i=0; i<width; i++) {
1529 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1532 for (i=0; i<width; i++) {
1537 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1540 for (i=0; i<width; i++) {
1543 dst[i]= pal[d] & 0xFF;
1547 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1548 const uint8_t *src1, const uint8_t *src2,
1549 int width, uint32_t *pal)
1552 assert(src1 == src2);
1553 for (i=0; i<width; i++) {
1554 int p= pal[src1[i]];
1561 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1562 int width, uint32_t *unused)
1565 for (i=0; i<width/8; i++) {
1568 dst[8*i+j]= ((d>>(7-j))&1)*255;
1572 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1573 int width, uint32_t *unused)
1576 for (i=0; i<width/8; i++) {
1579 dst[8*i+j]= ((d>>(7-j))&1)*255;
1583 //FIXME yuy2* can read up to 7 samples too much
1585 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1589 for (i=0; i<width; i++)
1593 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1594 const uint8_t *src2, int width, uint32_t *unused)
1597 for (i=0; i<width; i++) {
1598 dstU[i]= src1[4*i + 1];
1599 dstV[i]= src1[4*i + 3];
1601 assert(src1 == src2);
1604 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1605 const uint8_t *src2, int width, uint32_t *unused)
1608 for (i=0; i<width; i++) {
1609 dstU[i]= src1[2*i + 1];
1610 dstV[i]= src2[2*i + 1];
1614 /* This is almost identical to the previous, end exists only because
1615 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1616 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1620 for (i=0; i<width; i++)
1624 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1625 const uint8_t *src2, int width, uint32_t *unused)
1628 for (i=0; i<width; i++) {
1629 dstU[i]= src1[4*i + 0];
1630 dstV[i]= src1[4*i + 2];
1632 assert(src1 == src2);
1635 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1636 const uint8_t *src2, int width, uint32_t *unused)
1639 for (i=0; i<width; i++) {
1645 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1646 const uint8_t *src, int width)
1649 for (i = 0; i < width; i++) {
1650 dst1[i] = src[2*i+0];
1651 dst2[i] = src[2*i+1];
1655 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1656 const uint8_t *src1, const uint8_t *src2,
1657 int width, uint32_t *unused)
1659 nvXXtoUV_c(dstU, dstV, src1, width);
1662 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1663 const uint8_t *src1, const uint8_t *src2,
1664 int width, uint32_t *unused)
1666 nvXXtoUV_c(dstV, dstU, src1, width);
1669 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1671 // FIXME Maybe dither instead.
1672 static av_always_inline void
1673 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1674 const uint8_t *_srcU, const uint8_t *_srcV,
1675 int width, enum PixelFormat origin, int depth)
1678 const uint16_t *srcU = (const uint16_t *) _srcU;
1679 const uint16_t *srcV = (const uint16_t *) _srcV;
1681 for (i = 0; i < width; i++) {
1682 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1683 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1687 static av_always_inline void
1688 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1689 int width, enum PixelFormat origin, int depth)
1692 const uint16_t *srcY = (const uint16_t*)_srcY;
1694 for (i = 0; i < width; i++)
1695 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1700 #define YUV_NBPS(depth, BE_LE, origin) \
1701 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1702 const uint8_t *srcU, const uint8_t *srcV, \
1703 int width, uint32_t *unused) \
1705 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1707 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1708 int width, uint32_t *unused) \
1710 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1713 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1714 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1715 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1716 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1718 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1719 int width, uint32_t *unused)
1722 for (i=0; i<width; i++) {
1727 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1731 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1732 const uint8_t *src2, int width, uint32_t *unused)
1735 for (i=0; i<width; i++) {
1736 int b= src1[3*i + 0];
1737 int g= src1[3*i + 1];
1738 int r= src1[3*i + 2];
1740 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1741 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1743 assert(src1 == src2);
1746 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1747 const uint8_t *src2, int width, uint32_t *unused)
1750 for (i=0; i<width; i++) {
1751 int b= src1[6*i + 0] + src1[6*i + 3];
1752 int g= src1[6*i + 1] + src1[6*i + 4];
1753 int r= src1[6*i + 2] + src1[6*i + 5];
1755 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1756 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1758 assert(src1 == src2);
1761 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1765 for (i=0; i<width; i++) {
1770 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1774 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1775 const uint8_t *src2, int width, uint32_t *unused)
1779 for (i=0; i<width; i++) {
1780 int r= src1[3*i + 0];
1781 int g= src1[3*i + 1];
1782 int b= src1[3*i + 2];
1784 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1785 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1789 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1790 const uint8_t *src2, int width, uint32_t *unused)
1794 for (i=0; i<width; i++) {
1795 int r= src1[6*i + 0] + src1[6*i + 3];
1796 int g= src1[6*i + 1] + src1[6*i + 4];
1797 int b= src1[6*i + 2] + src1[6*i + 5];
1799 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1800 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1804 // bilinear / bicubic scaling
1805 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1806 const int16_t *filter, const int16_t *filterPos,
1810 for (i=0; i<dstW; i++) {
1812 int srcPos= filterPos[i];
1814 for (j=0; j<filterSize; j++) {
1815 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1817 //filter += hFilterSize;
1818 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1823 //FIXME all pal and rgb srcFormats could do this convertion as well
1824 //FIXME all scalers more complex than bilinear could do half of this transform
1825 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1828 for (i = 0; i < width; i++) {
1829 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1830 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1833 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1836 for (i = 0; i < width; i++) {
1837 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1838 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1841 static void lumRangeToJpeg_c(int16_t *dst, int width)
1844 for (i = 0; i < width; i++)
1845 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1847 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1850 for (i = 0; i < width; i++)
1851 dst[i] = (dst[i]*14071 + 33561947)>>14;
1854 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1855 const uint8_t *src, int srcW, int xInc)
1858 unsigned int xpos=0;
1859 for (i=0;i<dstWidth;i++) {
1860 register unsigned int xx=xpos>>16;
1861 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1862 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1867 // *** horizontal scale Y line to temp buffer
1868 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1869 const uint8_t *src, int srcW, int xInc,
1870 const int16_t *hLumFilter,
1871 const int16_t *hLumFilterPos, int hLumFilterSize,
1872 uint8_t *formatConvBuffer,
1873 uint32_t *pal, int isAlpha)
1875 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1876 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1879 toYV12(formatConvBuffer, src, srcW, pal);
1880 src= formatConvBuffer;
1883 if (!c->hyscale_fast) {
1884 c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1885 } else { // fast bilinear upscale / crap downscale
1886 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1890 convertRange(dst, dstWidth);
1893 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1894 int dstWidth, const uint8_t *src1,
1895 const uint8_t *src2, int srcW, int xInc)
1898 unsigned int xpos=0;
1899 for (i=0;i<dstWidth;i++) {
1900 register unsigned int xx=xpos>>16;
1901 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1902 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1903 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1908 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1909 const uint8_t *src1, const uint8_t *src2,
1910 int srcW, int xInc, const int16_t *hChrFilter,
1911 const int16_t *hChrFilterPos, int hChrFilterSize,
1912 uint8_t *formatConvBuffer, uint32_t *pal)
1915 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1916 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1917 src1= formatConvBuffer;
1921 if (!c->hcscale_fast) {
1922 c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
1923 c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
1924 } else { // fast bilinear upscale / crap downscale
1925 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1928 if (c->chrConvertRange)
1929 c->chrConvertRange(dst1, dst2, dstWidth);
1932 static av_always_inline void
1933 find_c_packed_planar_out_funcs(SwsContext *c,
1934 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1935 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1936 yuv2packedX_fn *yuv2packedX)
1938 enum PixelFormat dstFormat = c->dstFormat;
1940 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1941 *yuv2yuvX = yuv2nv12X_c;
1942 } else if (is16BPS(dstFormat)) {
1943 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1944 } else if (is9_OR_10BPS(dstFormat)) {
1945 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
1946 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1948 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1951 *yuv2yuv1 = yuv2yuv1_c;
1952 *yuv2yuvX = yuv2yuvX_c;
1954 if(c->flags & SWS_FULL_CHR_H_INT) {
1955 switch (dstFormat) {
1958 *yuv2packedX = yuv2rgba32_full_X_c;
1960 #if CONFIG_SWSCALE_ALPHA
1962 *yuv2packedX = yuv2rgba32_full_X_c;
1964 #endif /* CONFIG_SWSCALE_ALPHA */
1966 *yuv2packedX = yuv2rgbx32_full_X_c;
1968 #endif /* !CONFIG_SMALL */
1972 *yuv2packedX = yuv2argb32_full_X_c;
1974 #if CONFIG_SWSCALE_ALPHA
1976 *yuv2packedX = yuv2argb32_full_X_c;
1978 #endif /* CONFIG_SWSCALE_ALPHA */
1980 *yuv2packedX = yuv2xrgb32_full_X_c;
1982 #endif /* !CONFIG_SMALL */
1986 *yuv2packedX = yuv2bgra32_full_X_c;
1988 #if CONFIG_SWSCALE_ALPHA
1990 *yuv2packedX = yuv2bgra32_full_X_c;
1992 #endif /* CONFIG_SWSCALE_ALPHA */
1994 *yuv2packedX = yuv2bgrx32_full_X_c;
1996 #endif /* !CONFIG_SMALL */
2000 *yuv2packedX = yuv2abgr32_full_X_c;
2002 #if CONFIG_SWSCALE_ALPHA
2004 *yuv2packedX = yuv2abgr32_full_X_c;
2006 #endif /* CONFIG_SWSCALE_ALPHA */
2008 *yuv2packedX = yuv2xbgr32_full_X_c;
2010 #endif /* !CONFIG_SMALL */
2013 *yuv2packedX = yuv2rgb24_full_X_c;
2016 *yuv2packedX = yuv2bgr24_full_X_c;
2020 switch (dstFormat) {
2021 case PIX_FMT_GRAY16BE:
2022 *yuv2packed1 = yuv2gray16BE_1_c;
2023 *yuv2packed2 = yuv2gray16BE_2_c;
2024 *yuv2packedX = yuv2gray16BE_X_c;
2026 case PIX_FMT_GRAY16LE:
2027 *yuv2packed1 = yuv2gray16LE_1_c;
2028 *yuv2packed2 = yuv2gray16LE_2_c;
2029 *yuv2packedX = yuv2gray16LE_X_c;
2031 case PIX_FMT_MONOWHITE:
2032 *yuv2packed1 = yuv2monowhite_1_c;
2033 *yuv2packed2 = yuv2monowhite_2_c;
2034 *yuv2packedX = yuv2monowhite_X_c;
2036 case PIX_FMT_MONOBLACK:
2037 *yuv2packed1 = yuv2monoblack_1_c;
2038 *yuv2packed2 = yuv2monoblack_2_c;
2039 *yuv2packedX = yuv2monoblack_X_c;
2041 case PIX_FMT_YUYV422:
2042 *yuv2packed1 = yuv2yuyv422_1_c;
2043 *yuv2packed2 = yuv2yuyv422_2_c;
2044 *yuv2packedX = yuv2yuyv422_X_c;
2046 case PIX_FMT_UYVY422:
2047 *yuv2packed1 = yuv2uyvy422_1_c;
2048 *yuv2packed2 = yuv2uyvy422_2_c;
2049 *yuv2packedX = yuv2uyvy422_X_c;
2051 case PIX_FMT_RGB48LE:
2052 //*yuv2packed1 = yuv2rgb48le_1_c;
2053 //*yuv2packed2 = yuv2rgb48le_2_c;
2054 //*yuv2packedX = yuv2rgb48le_X_c;
2056 case PIX_FMT_RGB48BE:
2057 *yuv2packed1 = yuv2rgb48be_1_c;
2058 *yuv2packed2 = yuv2rgb48be_2_c;
2059 *yuv2packedX = yuv2rgb48be_X_c;
2061 case PIX_FMT_BGR48LE:
2062 //*yuv2packed1 = yuv2bgr48le_1_c;
2063 //*yuv2packed2 = yuv2bgr48le_2_c;
2064 //*yuv2packedX = yuv2bgr48le_X_c;
2066 case PIX_FMT_BGR48BE:
2067 *yuv2packed1 = yuv2bgr48be_1_c;
2068 *yuv2packed2 = yuv2bgr48be_2_c;
2069 *yuv2packedX = yuv2bgr48be_X_c;
2074 *yuv2packed1 = yuv2rgb32_1_c;
2075 *yuv2packed2 = yuv2rgb32_2_c;
2076 *yuv2packedX = yuv2rgb32_X_c;
2078 #if CONFIG_SWSCALE_ALPHA
2080 *yuv2packed1 = yuv2rgba32_1_c;
2081 *yuv2packed2 = yuv2rgba32_2_c;
2082 *yuv2packedX = yuv2rgba32_X_c;
2084 #endif /* CONFIG_SWSCALE_ALPHA */
2086 *yuv2packed1 = yuv2rgbx32_1_c;
2087 *yuv2packed2 = yuv2rgbx32_2_c;
2088 *yuv2packedX = yuv2rgbx32_X_c;
2090 #endif /* !CONFIG_SMALL */
2092 case PIX_FMT_RGB32_1:
2093 case PIX_FMT_BGR32_1:
2095 *yuv2packed1 = yuv2rgb32_1_1_c;
2096 *yuv2packed2 = yuv2rgb32_1_2_c;
2097 *yuv2packedX = yuv2rgb32_1_X_c;
2099 #if CONFIG_SWSCALE_ALPHA
2101 *yuv2packed1 = yuv2rgba32_1_1_c;
2102 *yuv2packed2 = yuv2rgba32_1_2_c;
2103 *yuv2packedX = yuv2rgba32_1_X_c;
2105 #endif /* CONFIG_SWSCALE_ALPHA */
2107 *yuv2packed1 = yuv2rgbx32_1_1_c;
2108 *yuv2packed2 = yuv2rgbx32_1_2_c;
2109 *yuv2packedX = yuv2rgbx32_1_X_c;
2111 #endif /* !CONFIG_SMALL */
2114 *yuv2packed1 = yuv2rgb24_1_c;
2115 *yuv2packed2 = yuv2rgb24_2_c;
2116 *yuv2packedX = yuv2rgb24_X_c;
2119 *yuv2packed1 = yuv2bgr24_1_c;
2120 *yuv2packed2 = yuv2bgr24_2_c;
2121 *yuv2packedX = yuv2bgr24_X_c;
2123 case PIX_FMT_RGB565LE:
2124 case PIX_FMT_RGB565BE:
2125 case PIX_FMT_BGR565LE:
2126 case PIX_FMT_BGR565BE:
2127 *yuv2packed1 = yuv2rgb16_1_c;
2128 *yuv2packed2 = yuv2rgb16_2_c;
2129 *yuv2packedX = yuv2rgb16_X_c;
2131 case PIX_FMT_RGB555LE:
2132 case PIX_FMT_RGB555BE:
2133 case PIX_FMT_BGR555LE:
2134 case PIX_FMT_BGR555BE:
2135 *yuv2packed1 = yuv2rgb15_1_c;
2136 *yuv2packed2 = yuv2rgb15_2_c;
2137 *yuv2packedX = yuv2rgb15_X_c;
2139 case PIX_FMT_RGB444LE:
2140 case PIX_FMT_RGB444BE:
2141 case PIX_FMT_BGR444LE:
2142 case PIX_FMT_BGR444BE:
2143 *yuv2packed1 = yuv2rgb12_1_c;
2144 *yuv2packed2 = yuv2rgb12_2_c;
2145 *yuv2packedX = yuv2rgb12_X_c;
2149 *yuv2packed1 = yuv2rgb8_1_c;
2150 *yuv2packed2 = yuv2rgb8_2_c;
2151 *yuv2packedX = yuv2rgb8_X_c;
2155 *yuv2packed1 = yuv2rgb4_1_c;
2156 *yuv2packed2 = yuv2rgb4_2_c;
2157 *yuv2packedX = yuv2rgb4_X_c;
2159 case PIX_FMT_RGB4_BYTE:
2160 case PIX_FMT_BGR4_BYTE:
2161 *yuv2packed1 = yuv2rgb4b_1_c;
2162 *yuv2packed2 = yuv2rgb4b_2_c;
2163 *yuv2packedX = yuv2rgb4b_X_c;
2169 #define DEBUG_SWSCALE_BUFFERS 0
2170 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2172 static int swScale(SwsContext *c, const uint8_t* src[],
2173 int srcStride[], int srcSliceY,
2174 int srcSliceH, uint8_t* dst[], int dstStride[])
2176 /* load a few things into local vars to make the code more readable? and faster */
2177 const int srcW= c->srcW;
2178 const int dstW= c->dstW;
2179 const int dstH= c->dstH;
2180 const int chrDstW= c->chrDstW;
2181 const int chrSrcW= c->chrSrcW;
2182 const int lumXInc= c->lumXInc;
2183 const int chrXInc= c->chrXInc;
2184 const enum PixelFormat dstFormat= c->dstFormat;
2185 const int flags= c->flags;
2186 int16_t *vLumFilterPos= c->vLumFilterPos;
2187 int16_t *vChrFilterPos= c->vChrFilterPos;
2188 int16_t *hLumFilterPos= c->hLumFilterPos;
2189 int16_t *hChrFilterPos= c->hChrFilterPos;
2190 int16_t *vLumFilter= c->vLumFilter;
2191 int16_t *vChrFilter= c->vChrFilter;
2192 int16_t *hLumFilter= c->hLumFilter;
2193 int16_t *hChrFilter= c->hChrFilter;
2194 int32_t *lumMmxFilter= c->lumMmxFilter;
2195 int32_t *chrMmxFilter= c->chrMmxFilter;
2196 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2197 const int vLumFilterSize= c->vLumFilterSize;
2198 const int vChrFilterSize= c->vChrFilterSize;
2199 const int hLumFilterSize= c->hLumFilterSize;
2200 const int hChrFilterSize= c->hChrFilterSize;
2201 int16_t **lumPixBuf= c->lumPixBuf;
2202 int16_t **chrUPixBuf= c->chrUPixBuf;
2203 int16_t **chrVPixBuf= c->chrVPixBuf;
2204 int16_t **alpPixBuf= c->alpPixBuf;
2205 const int vLumBufSize= c->vLumBufSize;
2206 const int vChrBufSize= c->vChrBufSize;
2207 uint8_t *formatConvBuffer= c->formatConvBuffer;
2208 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2209 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2211 uint32_t *pal=c->pal_yuv;
2212 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2213 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2214 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2215 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2216 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2218 /* vars which will change and which we need to store back in the context */
2220 int lumBufIndex= c->lumBufIndex;
2221 int chrBufIndex= c->chrBufIndex;
2222 int lastInLumBuf= c->lastInLumBuf;
2223 int lastInChrBuf= c->lastInChrBuf;
2225 if (isPacked(c->srcFormat)) {
2233 srcStride[3]= srcStride[0];
2235 srcStride[1]<<= c->vChrDrop;
2236 srcStride[2]<<= c->vChrDrop;
2238 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2239 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2240 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2241 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2242 srcSliceY, srcSliceH, dstY, dstH);
2243 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2244 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2246 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2247 static int warnedAlready=0; //FIXME move this into the context perhaps
2248 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2249 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2250 " ->cannot do aligned memory accesses anymore\n");
2255 /* Note the user might start scaling the picture in the middle so this
2256 will not get executed. This is not really intended but works
2257 currently, so people might do it. */
2258 if (srcSliceY ==0) {
2268 for (;dstY < dstH; dstY++) {
2269 const int chrDstY= dstY>>c->chrDstVSubSample;
2270 uint8_t *dest[4] = {
2271 dst[0] + dstStride[0] * dstY,
2272 dst[1] + dstStride[1] * chrDstY,
2273 dst[2] + dstStride[2] * chrDstY,
2274 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2277 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2278 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2279 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2280 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2281 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2282 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2285 //handle holes (FAST_BILINEAR & weird filters)
2286 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2287 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2288 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2289 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2291 DEBUG_BUFFERS("dstY: %d\n", dstY);
2292 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2293 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2294 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2295 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2297 // Do we have enough lines in this slice to output the dstY line
2298 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2300 if (!enough_lines) {
2301 lastLumSrcY = srcSliceY + srcSliceH - 1;
2302 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2303 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2304 lastLumSrcY, lastChrSrcY);
2307 //Do horizontal scaling
2308 while(lastInLumBuf < lastLumSrcY) {
2309 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2310 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2312 assert(lumBufIndex < 2*vLumBufSize);
2313 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2314 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2315 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2316 hLumFilter, hLumFilterPos, hLumFilterSize,
2319 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2320 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2321 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2325 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2326 lumBufIndex, lastInLumBuf);
2328 while(lastInChrBuf < lastChrSrcY) {
2329 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2330 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2332 assert(chrBufIndex < 2*vChrBufSize);
2333 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2334 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2335 //FIXME replace parameters through context struct (some at least)
2337 if (c->needs_hcscale)
2338 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2339 chrDstW, src1, src2, chrSrcW, chrXInc,
2340 hChrFilter, hChrFilterPos, hChrFilterSize,
2341 formatConvBuffer, pal);
2343 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2344 chrBufIndex, lastInChrBuf);
2346 //wrap buf index around to stay inside the ring buffer
2347 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2348 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2350 break; //we can't output a dstY line so let's try with the next slice
2353 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2355 if (dstY >= dstH-2) {
2356 // hmm looks like we can't use MMX here without overwriting this array's tail
2357 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2358 &yuv2packed1, &yuv2packed2,
2363 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2364 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2365 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2366 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2367 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2368 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2369 if ((dstY&chrSkipMask) || isGray(dstFormat))
2370 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2371 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2372 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2373 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2374 dest, dstW, chrDstW);
2375 } else { //General YV12
2376 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2377 lumSrcPtr, vLumFilterSize,
2378 vChrFilter + chrDstY * vChrFilterSize,
2379 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2380 alpSrcPtr, dest, dstW, chrDstW);
2383 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2384 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2385 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2386 int chrAlpha = vChrFilter[2 * dstY + 1];
2387 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2388 alpPixBuf ? *alpSrcPtr : NULL,
2389 dest[0], dstW, chrAlpha, dstY);
2390 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2391 int lumAlpha = vLumFilter[2 * dstY + 1];
2392 int chrAlpha = vChrFilter[2 * dstY + 1];
2394 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2396 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2397 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2398 alpPixBuf ? alpSrcPtr : NULL,
2399 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2400 } else { //general RGB
2401 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2402 lumSrcPtr, vLumFilterSize,
2403 vChrFilter + dstY * vChrFilterSize,
2404 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2405 alpSrcPtr, dest[0], dstW, dstY);
2411 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2412 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2415 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2416 __asm__ volatile("sfence":::"memory");
2420 /* store changed local vars back in the context */
2422 c->lumBufIndex= lumBufIndex;
2423 c->chrBufIndex= chrBufIndex;
2424 c->lastInLumBuf= lastInLumBuf;
2425 c->lastInChrBuf= lastInChrBuf;
2427 return dstY - lastDstY;
2430 static av_cold void sws_init_swScale_c(SwsContext *c)
2432 enum PixelFormat srcFormat = c->srcFormat;
2434 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2435 &c->yuv2packed1, &c->yuv2packed2,
2438 c->hScale = hScale_c;
2440 if (c->flags & SWS_FAST_BILINEAR) {
2441 c->hyscale_fast = hyscale_fast_c;
2442 c->hcscale_fast = hcscale_fast_c;
2445 c->chrToYV12 = NULL;
2447 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2448 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2449 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2450 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2454 case PIX_FMT_BGR4_BYTE:
2455 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2456 case PIX_FMT_YUV444P9BE:
2457 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
2458 case PIX_FMT_YUV444P9LE:
2459 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
2460 case PIX_FMT_YUV444P10BE:
2461 case PIX_FMT_YUV422P10BE:
2462 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
2463 case PIX_FMT_YUV422P10LE:
2464 case PIX_FMT_YUV444P10LE:
2465 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
2466 case PIX_FMT_YUV420P16BE:
2467 case PIX_FMT_YUV422P16BE:
2468 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
2469 case PIX_FMT_YUV420P16LE:
2470 case PIX_FMT_YUV422P16LE:
2471 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
2473 if (c->chrSrcHSubSample) {
2475 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2476 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2477 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2478 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2479 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2480 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2481 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2482 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2483 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2484 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2485 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2486 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2487 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2488 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2489 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2490 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2491 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2492 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2496 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2497 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2498 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2499 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2500 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2501 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2502 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2503 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2504 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2505 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2506 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2507 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2508 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2509 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2510 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2511 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2512 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2513 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2517 c->lumToYV12 = NULL;
2518 c->alpToYV12 = NULL;
2519 switch (srcFormat) {
2520 case PIX_FMT_YUV444P9BE:
2521 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2522 case PIX_FMT_YUV444P9LE:
2523 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2524 case PIX_FMT_YUV444P10BE:
2525 case PIX_FMT_YUV422P10BE:
2526 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2527 case PIX_FMT_YUV444P10LE:
2528 case PIX_FMT_YUV422P10LE:
2529 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2530 case PIX_FMT_YUYV422 :
2531 case PIX_FMT_YUV420P16BE:
2532 case PIX_FMT_YUV422P16BE:
2533 case PIX_FMT_YUV444P16BE:
2534 case PIX_FMT_Y400A :
2535 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2536 case PIX_FMT_UYVY422 :
2537 case PIX_FMT_YUV420P16LE:
2538 case PIX_FMT_YUV422P16LE:
2539 case PIX_FMT_YUV444P16LE:
2540 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2541 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2542 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2543 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2544 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2545 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2546 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2547 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2548 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2549 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2550 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2554 case PIX_FMT_BGR4_BYTE:
2555 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2556 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2557 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2558 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2559 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2560 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2561 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2562 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2563 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2564 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2565 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2568 switch (srcFormat) {
2570 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2572 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2573 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2577 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2579 c->lumConvertRange = lumRangeFromJpeg_c;
2580 c->chrConvertRange = chrRangeFromJpeg_c;
2582 c->lumConvertRange = lumRangeToJpeg_c;
2583 c->chrConvertRange = chrRangeToJpeg_c;
2587 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2588 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2589 c->needs_hcscale = 1;
2592 SwsFunc ff_getSwsFunc(SwsContext *c)
2594 sws_init_swScale_c(c);
2597 ff_sws_init_swScale_mmx(c);
2599 ff_sws_init_swScale_altivec(c);