2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
333 const int16_t *chrUSrc, const int16_t *chrVSrc,
334 const int16_t *alpSrc,
335 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
336 uint8_t *aDest, int dstW, int chrDstW)
339 for (i=0; i<dstW; i++) {
340 int val= (lumSrc[i]+64)>>7;
341 dest[i]= av_clip_uint8(val);
345 for (i=0; i<chrDstW; i++) {
346 int u=(chrUSrc[i]+64)>>7;
347 int v=(chrVSrc[i]+64)>>7;
348 uDest[i]= av_clip_uint8(u);
349 vDest[i]= av_clip_uint8(v);
352 if (CONFIG_SWSCALE_ALPHA && aDest)
353 for (i=0; i<dstW; i++) {
354 int val= (alpSrc[i]+64)>>7;
355 aDest[i]= av_clip_uint8(val);
359 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
360 const int16_t **lumSrc, int lumFilterSize,
361 const int16_t *chrFilter, const int16_t **chrUSrc,
362 const int16_t **chrVSrc, int chrFilterSize,
363 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
364 uint8_t *vDest, uint8_t *aDest,
365 int dstW, int chrDstW)
367 enum PixelFormat dstFormat = c->dstFormat;
369 //FIXME Optimize (just quickly written not optimized..)
371 for (i=0; i<dstW; i++) {
374 for (j=0; j<lumFilterSize; j++)
375 val += lumSrc[j][i] * lumFilter[j];
377 dest[i]= av_clip_uint8(val>>19);
383 if (dstFormat == PIX_FMT_NV12)
384 for (i=0; i<chrDstW; i++) {
388 for (j=0; j<chrFilterSize; j++) {
389 u += chrUSrc[j][i] * chrFilter[j];
390 v += chrVSrc[j][i] * chrFilter[j];
393 uDest[2*i]= av_clip_uint8(u>>19);
394 uDest[2*i+1]= av_clip_uint8(v>>19);
397 for (i=0; i<chrDstW; i++) {
401 for (j=0; j<chrFilterSize; j++) {
402 u += chrUSrc[j][i] * chrFilter[j];
403 v += chrVSrc[j][i] * chrFilter[j];
406 uDest[2*i]= av_clip_uint8(v>>19);
407 uDest[2*i+1]= av_clip_uint8(u>>19);
411 #define output_pixel(pos, val) \
412 if (target == PIX_FMT_GRAY16BE) { \
418 static av_always_inline void
419 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
420 const int16_t **lumSrc, int lumFilterSize,
421 const int16_t *chrFilter, const int16_t **chrUSrc,
422 const int16_t **chrVSrc, int chrFilterSize,
423 const int16_t **alpSrc, uint8_t *dest, int dstW,
424 int y, enum PixelFormat target)
428 for (i = 0; i < (dstW >> 1); i++) {
432 const int i2 = 2 * i;
434 for (j = 0; j < lumFilterSize; j++) {
435 Y1 += lumSrc[j][i2] * lumFilter[j];
436 Y2 += lumSrc[j][i2+1] * lumFilter[j];
440 if ((Y1 | Y2) & 0x10000) {
441 Y1 = av_clip_uint16(Y1);
442 Y2 = av_clip_uint16(Y2);
444 output_pixel(&dest[2 * i2 + 0], Y1);
445 output_pixel(&dest[2 * i2 + 2], Y2);
449 static av_always_inline void
450 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
451 const uint16_t *buf1, const uint16_t *ubuf0,
452 const uint16_t *ubuf1, const uint16_t *vbuf0,
453 const uint16_t *vbuf1, const uint16_t *abuf0,
454 const uint16_t *abuf1, uint8_t *dest, int dstW,
455 int yalpha, int uvalpha, int y,
456 enum PixelFormat target)
458 int yalpha1 = 4095 - yalpha; \
461 for (i = 0; i < (dstW >> 1); i++) {
462 const int i2 = 2 * i;
463 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
464 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
466 output_pixel(&dest[2 * i2 + 0], Y1);
467 output_pixel(&dest[2 * i2 + 2], Y2);
471 static av_always_inline void
472 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
473 const uint16_t *ubuf0, const uint16_t *ubuf1,
474 const uint16_t *vbuf0, const uint16_t *vbuf1,
475 const uint16_t *abuf0, uint8_t *dest, int dstW,
476 int uvalpha, enum PixelFormat dstFormat,
477 int flags, int y, enum PixelFormat target)
481 for (i = 0; i < (dstW >> 1); i++) {
482 const int i2 = 2 * i;
483 int Y1 = buf0[i2 ] << 1;
484 int Y2 = buf0[i2+1] << 1;
486 output_pixel(&dest[2 * i2 + 0], Y1);
487 output_pixel(&dest[2 * i2 + 2], Y2);
493 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
494 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
495 const int16_t **lumSrc, int lumFilterSize, \
496 const int16_t *chrFilter, const int16_t **chrUSrc, \
497 const int16_t **chrVSrc, int chrFilterSize, \
498 const int16_t **alpSrc, uint8_t *dest, int dstW, \
501 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
502 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
503 alpSrc, dest, dstW, y, fmt); \
506 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
507 const uint16_t *buf1, const uint16_t *ubuf0, \
508 const uint16_t *ubuf1, const uint16_t *vbuf0, \
509 const uint16_t *vbuf1, const uint16_t *abuf0, \
510 const uint16_t *abuf1, uint8_t *dest, int dstW, \
511 int yalpha, int uvalpha, int y) \
513 name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
514 vbuf0, vbuf1, abuf0, abuf1, \
515 dest, dstW, yalpha, uvalpha, y, fmt); \
518 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
519 const uint16_t *ubuf0, const uint16_t *ubuf1, \
520 const uint16_t *vbuf0, const uint16_t *vbuf1, \
521 const uint16_t *abuf0, uint8_t *dest, int dstW, \
522 int uvalpha, enum PixelFormat dstFormat, \
525 name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
526 vbuf1, abuf0, dest, dstW, uvalpha, \
527 dstFormat, flags, y, fmt); \
530 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
531 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
533 #define output_pixel(pos, acc) \
534 if (target == PIX_FMT_MONOBLACK) { \
540 static av_always_inline void
541 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
542 const int16_t **lumSrc, int lumFilterSize,
543 const int16_t *chrFilter, const int16_t **chrUSrc,
544 const int16_t **chrVSrc, int chrFilterSize,
545 const int16_t **alpSrc, uint8_t *dest, int dstW,
546 int y, enum PixelFormat target)
548 const uint8_t * const d128=dither_8x8_220[y&7];
549 uint8_t *g = c->table_gU[128] + c->table_gV[128];
553 for (i = 0; i < dstW - 1; i += 2) {
558 for (j = 0; j < lumFilterSize; j++) {
559 Y1 += lumSrc[j][i] * lumFilter[j];
560 Y2 += lumSrc[j][i+1] * lumFilter[j];
564 if ((Y1 | Y2) & 0x100) {
565 Y1 = av_clip_uint8(Y1);
566 Y2 = av_clip_uint8(Y2);
568 acc += acc + g[Y1 + d128[(i + 0) & 7]];
569 acc += acc + g[Y2 + d128[(i + 1) & 7]];
571 output_pixel(*dest++, acc);
576 static av_always_inline void
577 yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
578 const uint16_t *buf1, const uint16_t *ubuf0,
579 const uint16_t *ubuf1, const uint16_t *vbuf0,
580 const uint16_t *vbuf1, const uint16_t *abuf0,
581 const uint16_t *abuf1, uint8_t *dest, int dstW,
582 int yalpha, int uvalpha, int y,
583 enum PixelFormat target)
585 const uint8_t * const d128 = dither_8x8_220[y & 7];
586 uint8_t *g = c->table_gU[128] + c->table_gV[128];
587 int yalpha1 = 4095 - yalpha;
590 for (i = 0; i < dstW - 7; i += 8) {
591 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
592 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
593 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
594 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
595 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
596 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
597 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
598 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
599 output_pixel(*dest++, acc);
603 static av_always_inline void
604 yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
605 const uint16_t *ubuf0, const uint16_t *ubuf1,
606 const uint16_t *vbuf0, const uint16_t *vbuf1,
607 const uint16_t *abuf0, uint8_t *dest, int dstW,
608 int uvalpha, enum PixelFormat dstFormat,
609 int flags, int y, enum PixelFormat target)
611 const uint8_t * const d128 = dither_8x8_220[y & 7];
612 uint8_t *g = c->table_gU[128] + c->table_gV[128];
615 for (i = 0; i < dstW - 7; i += 8) {
616 int acc = g[(buf0[i ] >> 7) + d128[0]];
617 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
618 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
619 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
620 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
621 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
622 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
623 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
624 output_pixel(*dest++, acc);
630 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
631 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
633 #define output_pixels(pos, Y1, U, Y2, V) \
634 if (target == PIX_FMT_YUYV422) { \
635 dest[pos + 0] = Y1; \
637 dest[pos + 2] = Y2; \
641 dest[pos + 1] = Y1; \
643 dest[pos + 3] = Y2; \
646 static av_always_inline void
647 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
648 const int16_t **lumSrc, int lumFilterSize,
649 const int16_t *chrFilter, const int16_t **chrUSrc,
650 const int16_t **chrVSrc, int chrFilterSize,
651 const int16_t **alpSrc, uint8_t *dest, int dstW,
652 int y, enum PixelFormat target)
656 for (i = 0; i < (dstW >> 1); i++) {
663 for (j = 0; j < lumFilterSize; j++) {
664 Y1 += lumSrc[j][i * 2] * lumFilter[j];
665 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
667 for (j = 0; j < chrFilterSize; j++) {
668 U += chrUSrc[j][i] * chrFilter[j];
669 V += chrVSrc[j][i] * chrFilter[j];
675 if ((Y1 | Y2 | U | V) & 0x100) {
676 Y1 = av_clip_uint8(Y1);
677 Y2 = av_clip_uint8(Y2);
678 U = av_clip_uint8(U);
679 V = av_clip_uint8(V);
681 output_pixels(4*i, Y1, U, Y2, V);
685 static av_always_inline void
686 yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
687 const uint16_t *buf1, const uint16_t *ubuf0,
688 const uint16_t *ubuf1, const uint16_t *vbuf0,
689 const uint16_t *vbuf1, const uint16_t *abuf0,
690 const uint16_t *abuf1, uint8_t *dest, int dstW,
691 int yalpha, int uvalpha, int y,
692 enum PixelFormat target)
694 int yalpha1 = 4095 - yalpha;
695 int uvalpha1 = 4095 - uvalpha;
698 for (i = 0; i < (dstW >> 1); i++) {
699 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
700 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
701 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
702 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
704 output_pixels(i * 4, Y1, U, Y2, V);
708 static av_always_inline void
709 yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
710 const uint16_t *ubuf0, const uint16_t *ubuf1,
711 const uint16_t *vbuf0, const uint16_t *vbuf1,
712 const uint16_t *abuf0, uint8_t *dest, int dstW,
713 int uvalpha, enum PixelFormat dstFormat,
714 int flags, int y, enum PixelFormat target)
718 if (uvalpha < 2048) {
719 for (i = 0; i < (dstW >> 1); i++) {
720 int Y1 = buf0[i * 2] >> 7;
721 int Y2 = buf0[i * 2 + 1] >> 7;
722 int U = ubuf1[i] >> 7;
723 int V = vbuf1[i] >> 7;
725 output_pixels(i * 4, Y1, U, Y2, V);
728 for (i = 0; i < (dstW >> 1); i++) {
729 int Y1 = buf0[i * 2] >> 7;
730 int Y2 = buf0[i * 2 + 1] >> 7;
731 int U = (ubuf0[i] + ubuf1[i]) >> 8;
732 int V = (vbuf0[i] + vbuf1[i]) >> 8;
734 output_pixels(i * 4, Y1, U, Y2, V);
741 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
742 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
744 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
745 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
747 static av_always_inline void
748 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
749 const int16_t **lumSrc, int lumFilterSize,
750 const int16_t *chrFilter, const int16_t **chrUSrc,
751 const int16_t **chrVSrc, int chrFilterSize,
752 const int16_t **alpSrc, uint8_t *dest, int dstW,
753 int y, enum PixelFormat target)
757 for (i = 0; i < (dstW >> 1); i++) {
763 const uint8_t *r, *g, *b;
765 for (j = 0; j < lumFilterSize; j++) {
766 Y1 += lumSrc[j][i * 2] * lumFilter[j];
767 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
769 for (j = 0; j < chrFilterSize; j++) {
770 U += chrUSrc[j][i] * chrFilter[j];
771 V += chrVSrc[j][i] * chrFilter[j];
777 if ((Y1 | Y2 | U | V) & 0x100) {
778 Y1 = av_clip_uint8(Y1);
779 Y2 = av_clip_uint8(Y2);
780 U = av_clip_uint8(U);
781 V = av_clip_uint8(V);
784 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
785 r = (const uint8_t *) c->table_rV[V];
786 g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
787 b = (const uint8_t *) c->table_bU[U];
789 dest[ 0] = dest[ 1] = r_b[Y1];
790 dest[ 2] = dest[ 3] = g[Y1];
791 dest[ 4] = dest[ 5] = b_r[Y1];
792 dest[ 6] = dest[ 7] = r_b[Y2];
793 dest[ 8] = dest[ 9] = g[Y2];
794 dest[10] = dest[11] = b_r[Y2];
799 static av_always_inline void
800 yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
801 const uint16_t *buf1, const uint16_t *ubuf0,
802 const uint16_t *ubuf1, const uint16_t *vbuf0,
803 const uint16_t *vbuf1, const uint16_t *abuf0,
804 const uint16_t *abuf1, uint8_t *dest, int dstW,
805 int yalpha, int uvalpha, int y,
806 enum PixelFormat target)
808 int yalpha1 = 4095 - yalpha;
809 int uvalpha1 = 4095 - uvalpha;
812 for (i = 0; i < (dstW >> 1); i++) {
813 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
814 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
815 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
816 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
817 const uint8_t *r = (const uint8_t *) c->table_rV[V],
818 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
819 *b = (const uint8_t *) c->table_bU[U];
821 dest[ 0] = dest[ 1] = r_b[Y1];
822 dest[ 2] = dest[ 3] = g[Y1];
823 dest[ 4] = dest[ 5] = b_r[Y1];
824 dest[ 6] = dest[ 7] = r_b[Y2];
825 dest[ 8] = dest[ 9] = g[Y2];
826 dest[10] = dest[11] = b_r[Y2];
831 static av_always_inline void
832 yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
833 const uint16_t *ubuf0, const uint16_t *ubuf1,
834 const uint16_t *vbuf0, const uint16_t *vbuf1,
835 const uint16_t *abuf0, uint8_t *dest, int dstW,
836 int uvalpha, enum PixelFormat dstFormat,
837 int flags, int y, enum PixelFormat target)
841 if (uvalpha < 2048) {
842 for (i = 0; i < (dstW >> 1); i++) {
843 int Y1 = buf0[i * 2] >> 7;
844 int Y2 = buf0[i * 2 + 1] >> 7;
845 int U = ubuf1[i] >> 7;
846 int V = vbuf1[i] >> 7;
847 const uint8_t *r = (const uint8_t *) c->table_rV[V],
848 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
849 *b = (const uint8_t *) c->table_bU[U];
851 dest[ 0] = dest[ 1] = r_b[Y1];
852 dest[ 2] = dest[ 3] = g[Y1];
853 dest[ 4] = dest[ 5] = b_r[Y1];
854 dest[ 6] = dest[ 7] = r_b[Y2];
855 dest[ 8] = dest[ 9] = g[Y2];
856 dest[10] = dest[11] = b_r[Y2];
860 for (i = 0; i < (dstW >> 1); i++) {
861 int Y1 = buf0[i * 2] >> 7;
862 int Y2 = buf0[i * 2 + 1] >> 7;
863 int U = (ubuf0[i] + ubuf1[i]) >> 8;
864 int V = (vbuf0[i] + vbuf1[i]) >> 8;
865 const uint8_t *r = (const uint8_t *) c->table_rV[V],
866 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
867 *b = (const uint8_t *) c->table_bU[U];
869 dest[ 0] = dest[ 1] = r_b[Y1];
870 dest[ 2] = dest[ 3] = g[Y1];
871 dest[ 4] = dest[ 5] = b_r[Y1];
872 dest[ 6] = dest[ 7] = r_b[Y2];
873 dest[ 8] = dest[ 9] = g[Y2];
874 dest[10] = dest[11] = b_r[Y2];
883 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
884 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
885 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
886 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
888 static av_always_inline void
889 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
890 int U, int V, int A1, int A2,
891 const void *_r, const void *_g, const void *_b, int y,
892 enum PixelFormat target, int hasAlpha)
894 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
895 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
896 uint32_t *dest = (uint32_t *) _dest;
897 const uint32_t *r = (const uint32_t *) _r;
898 const uint32_t *g = (const uint32_t *) _g;
899 const uint32_t *b = (const uint32_t *) _b;
902 int sh = hasAlpha ? ((fmt == PIX_FMT_RGB32_1 || fmt == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
904 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
905 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
908 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
910 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
911 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
913 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
914 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
917 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
918 uint8_t *dest = (uint8_t *) _dest;
919 const uint8_t *r = (const uint8_t *) _r;
920 const uint8_t *g = (const uint8_t *) _g;
921 const uint8_t *b = (const uint8_t *) _b;
923 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
924 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
925 dest[i * 6 + 0] = r_b[Y1];
926 dest[i * 6 + 1] = g[Y1];
927 dest[i * 6 + 2] = b_r[Y1];
928 dest[i * 6 + 3] = r_b[Y2];
929 dest[i * 6 + 4] = g[Y2];
930 dest[i * 6 + 5] = b_r[Y2];
933 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
934 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
935 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
936 uint16_t *dest = (uint16_t *) _dest;
937 const uint16_t *r = (const uint16_t *) _r;
938 const uint16_t *g = (const uint16_t *) _g;
939 const uint16_t *b = (const uint16_t *) _b;
940 int dr1, dg1, db1, dr2, dg2, db2;
942 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
943 dr1 = dither_2x2_8[ y & 1 ][0];
944 dg1 = dither_2x2_4[ y & 1 ][0];
945 db1 = dither_2x2_8[(y & 1) ^ 1][0];
946 dr2 = dither_2x2_8[ y & 1 ][1];
947 dg2 = dither_2x2_4[ y & 1 ][1];
948 db2 = dither_2x2_8[(y & 1) ^ 1][1];
949 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
950 dr1 = dither_2x2_8[ y & 1 ][0];
951 dg1 = dither_2x2_8[ y & 1 ][1];
952 db1 = dither_2x2_8[(y & 1) ^ 1][0];
953 dr2 = dither_2x2_8[ y & 1 ][1];
954 dg2 = dither_2x2_8[ y & 1 ][0];
955 db2 = dither_2x2_8[(y & 1) ^ 1][1];
957 dr1 = dither_4x4_16[ y & 3 ][0];
958 dg1 = dither_4x4_16[ y & 3 ][1];
959 db1 = dither_4x4_16[(y & 3) ^ 3][0];
960 dr2 = dither_4x4_16[ y & 3 ][1];
961 dg2 = dither_4x4_16[ y & 3 ][0];
962 db2 = dither_4x4_16[(y & 3) ^ 3][1];
965 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
966 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
967 } else /* 8/4-bit */ {
968 uint8_t *dest = (uint8_t *) _dest;
969 const uint8_t *r = (const uint8_t *) _r;
970 const uint8_t *g = (const uint8_t *) _g;
971 const uint8_t *b = (const uint8_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
974 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
975 const uint8_t * const d64 = dither_8x8_73[y & 7];
976 const uint8_t * const d32 = dither_8x8_32[y & 7];
977 dr1 = dg1 = d32[(i * 2 + 0) & 7];
978 db1 = d64[(i * 2 + 0) & 7];
979 dr2 = dg2 = d32[(i * 2 + 1) & 7];
980 db2 = d64[(i * 2 + 1) & 7];
982 const uint8_t * const d64 = dither_8x8_73 [y & 7];
983 const uint8_t * const d128 = dither_8x8_220[y & 7];
984 dr1 = db1 = d128[(i * 2 + 0) & 7];
985 dg1 = d64[(i * 2 + 0) & 7];
986 dr2 = db2 = d128[(i * 2 + 1) & 7];
987 dg2 = d64[(i * 2 + 1) & 7];
990 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
991 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
992 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
994 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
995 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1000 static av_always_inline void
1001 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1002 const int16_t **lumSrc, int lumFilterSize,
1003 const int16_t *chrFilter, const int16_t **chrUSrc,
1004 const int16_t **chrVSrc, int chrFilterSize,
1005 const int16_t **alpSrc, uint8_t *dest, int dstW,
1006 int y, enum PixelFormat target, int hasAlpha)
1010 for (i = 0; i < (dstW >> 1); i++) {
1016 int av_unused A1, A2;
1017 const void *r, *g, *b;
1019 for (j = 0; j < lumFilterSize; j++) {
1020 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1021 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1023 for (j = 0; j < chrFilterSize; j++) {
1024 U += chrUSrc[j][i] * chrFilter[j];
1025 V += chrVSrc[j][i] * chrFilter[j];
1031 if ((Y1 | Y2 | U | V) & 0x100) {
1032 Y1 = av_clip_uint8(Y1);
1033 Y2 = av_clip_uint8(Y2);
1034 U = av_clip_uint8(U);
1035 V = av_clip_uint8(V);
1040 for (j = 0; j < lumFilterSize; j++) {
1041 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1042 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1046 if ((A1 | A2) & 0x100) {
1047 A1 = av_clip_uint8(A1);
1048 A2 = av_clip_uint8(A2);
1052 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1054 g = (c->table_gU[U] + c->table_gV[V]);
1057 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1058 r, g, b, y, target, hasAlpha);
1062 static av_always_inline void
1063 yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0,
1064 const uint16_t *buf1, const uint16_t *ubuf0,
1065 const uint16_t *ubuf1, const uint16_t *vbuf0,
1066 const uint16_t *vbuf1, const uint16_t *abuf0,
1067 const uint16_t *abuf1, uint8_t *dest, int dstW,
1068 int yalpha, int uvalpha, int y,
1069 enum PixelFormat target, int hasAlpha)
1071 int yalpha1 = 4095 - yalpha;
1072 int uvalpha1 = 4095 - uvalpha;
1075 for (i = 0; i < (dstW >> 1); i++) {
1076 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1077 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1078 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1079 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1081 const void *r = c->table_rV[V],
1082 *g = (c->table_gU[U] + c->table_gV[V]),
1083 *b = c->table_bU[U];
1086 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1087 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1090 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1091 r, g, b, y, target, hasAlpha);
1095 static av_always_inline void
1096 yuv2rgb_1_c_template(SwsContext *c, const uint16_t *buf0,
1097 const uint16_t *ubuf0, const uint16_t *ubuf1,
1098 const uint16_t *vbuf0, const uint16_t *vbuf1,
1099 const uint16_t *abuf0, uint8_t *dest, int dstW,
1100 int uvalpha, enum PixelFormat dstFormat,
1101 int flags, int y, enum PixelFormat target,
1106 if (uvalpha < 2048) {
1107 for (i = 0; i < (dstW >> 1); i++) {
1108 int Y1 = buf0[i * 2] >> 7;
1109 int Y2 = buf0[i * 2 + 1] >> 7;
1110 int U = ubuf1[i] >> 7;
1111 int V = vbuf1[i] >> 7;
1113 const void *r = c->table_rV[V],
1114 *g = (c->table_gU[U] + c->table_gV[V]),
1115 *b = c->table_bU[U];
1118 A1 = abuf0[i * 2 ] >> 7;
1119 A2 = abuf0[i * 2 + 1] >> 7;
1122 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1123 r, g, b, y, target, hasAlpha);
1126 for (i = 0; i < (dstW >> 1); i++) {
1127 int Y1 = buf0[i * 2] >> 7;
1128 int Y2 = buf0[i * 2 + 1] >> 7;
1129 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1130 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1132 const void *r = c->table_rV[V],
1133 *g = (c->table_gU[U] + c->table_gV[V]),
1134 *b = c->table_bU[U];
1137 A1 = abuf0[i * 2 ] >> 7;
1138 A2 = abuf0[i * 2 + 1] >> 7;
1141 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1142 r, g, b, y, target, hasAlpha);
1147 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1148 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1149 const int16_t **lumSrc, int lumFilterSize, \
1150 const int16_t *chrFilter, const int16_t **chrUSrc, \
1151 const int16_t **chrVSrc, int chrFilterSize, \
1152 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1155 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1156 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1157 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1160 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
1161 const uint16_t *buf1, const uint16_t *ubuf0, \
1162 const uint16_t *ubuf1, const uint16_t *vbuf0, \
1163 const uint16_t *vbuf1, const uint16_t *abuf0, \
1164 const uint16_t *abuf1, uint8_t *dest, int dstW, \
1165 int yalpha, int uvalpha, int y) \
1167 name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
1168 vbuf0, vbuf1, abuf0, abuf1, \
1169 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1172 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
1173 const uint16_t *ubuf0, const uint16_t *ubuf1, \
1174 const uint16_t *vbuf0, const uint16_t *vbuf1, \
1175 const uint16_t *abuf0, uint8_t *dest, int dstW, \
1176 int uvalpha, enum PixelFormat dstFormat, \
1179 name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
1180 vbuf1, abuf0, dest, dstW, uvalpha, \
1181 dstFormat, flags, y, fmt, hasAlpha); \
1185 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1186 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1188 #if CONFIG_SWSCALE_ALPHA
1189 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1190 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1192 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1193 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1195 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1196 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1197 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1198 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1199 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1200 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1201 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1202 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1204 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
1205 for (i=0; i<dstW; i++) {\
1213 for (j=0; j<lumFilterSize; j++) {\
1214 Y += lumSrc[j][i ] * lumFilter[j];\
1216 for (j=0; j<chrFilterSize; j++) {\
1217 U += chrUSrc[j][i] * chrFilter[j];\
1218 V += chrVSrc[j][i] * chrFilter[j];\
1225 for (j=0; j<lumFilterSize; j++)\
1226 A += alpSrc[j][i ] * lumFilter[j];\
1229 A = av_clip_uint8(A);\
1231 Y-= c->yuv2rgb_y_offset;\
1232 Y*= c->yuv2rgb_y_coeff;\
1234 R= Y + V*c->yuv2rgb_v2r_coeff;\
1235 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
1236 B= Y + U*c->yuv2rgb_u2b_coeff;\
1237 if ((R|G|B)&(0xC0000000)) {\
1238 R = av_clip_uintp2(R, 30); \
1239 G = av_clip_uintp2(G, 30); \
1240 B = av_clip_uintp2(B, 30); \
1243 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
1244 const int16_t **lumSrc, int lumFilterSize,
1245 const int16_t *chrFilter, const int16_t **chrUSrc,
1246 const int16_t **chrVSrc, int chrFilterSize,
1247 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1250 int step= c->dstFormatBpp/8;
1253 switch(c->dstFormat) {
1261 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1262 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1263 dest[aidx]= needAlpha ? A : 255;
1270 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1271 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1279 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1296 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1297 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1298 dest[aidx]= needAlpha ? A : 255;
1305 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1306 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1314 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1329 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1330 int width, int height,
1334 uint8_t *ptr = plane + stride*y;
1335 for (i=0; i<height; i++) {
1336 memset(ptr, val, width);
1341 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1343 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1344 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1346 static av_always_inline void
1347 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
1348 enum PixelFormat origin)
1351 for (i = 0; i < width; i++) {
1352 int r_b = input_pixel(&src[i*6+0]) >> 8;
1353 int g = input_pixel(&src[i*6+2]) >> 8;
1354 int b_r = input_pixel(&src[i*6+4]) >> 8;
1356 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1360 static av_always_inline void
1361 rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1362 const uint8_t *src1, const uint8_t *src2,
1363 int width, enum PixelFormat origin)
1367 for (i = 0; i < width; i++) {
1368 int r_b = input_pixel(&src1[i*6+0]) >> 8;
1369 int g = input_pixel(&src1[i*6+2]) >> 8;
1370 int b_r = input_pixel(&src1[i*6+4]) >> 8;
1372 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1373 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1377 static av_always_inline void
1378 rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1379 const uint8_t *src1, const uint8_t *src2,
1380 int width, enum PixelFormat origin)
1384 for (i = 0; i < width; i++) {
1385 int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
1386 int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
1387 int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
1389 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1390 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1398 #define rgb48funcs(pattern, BE_LE, origin) \
1399 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
1400 int width, uint32_t *unused) \
1402 rgb48ToY_c_template(dst, src, width, origin); \
1405 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1406 const uint8_t *src1, const uint8_t *src2, \
1407 int width, uint32_t *unused) \
1409 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1412 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1413 const uint8_t *src1, const uint8_t *src2, \
1414 int width, uint32_t *unused) \
1416 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1419 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1420 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1421 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1422 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1424 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1425 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1426 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1428 static av_always_inline void
1429 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1430 int width, enum PixelFormat origin,
1431 int shr, int shg, int shb, int shp,
1432 int maskr, int maskg, int maskb,
1433 int rsh, int gsh, int bsh, int S)
1435 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1436 rnd = 33 << (S - 1);
1439 for (i = 0; i < width; i++) {
1440 int px = input_pixel(i) >> shp;
1441 int b = (px & maskb) >> shb;
1442 int g = (px & maskg) >> shg;
1443 int r = (px & maskr) >> shr;
1445 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1449 static av_always_inline void
1450 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1451 const uint8_t *src, int width,
1452 enum PixelFormat origin,
1453 int shr, int shg, int shb, int shp,
1454 int maskr, int maskg, int maskb,
1455 int rsh, int gsh, int bsh, int S)
1457 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1458 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1459 rnd = 257 << (S - 1);
1462 for (i = 0; i < width; i++) {
1463 int px = input_pixel(i) >> shp;
1464 int b = (px & maskb) >> shb;
1465 int g = (px & maskg) >> shg;
1466 int r = (px & maskr) >> shr;
1468 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1469 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1473 static av_always_inline void
1474 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1475 const uint8_t *src, int width,
1476 enum PixelFormat origin,
1477 int shr, int shg, int shb, int shp,
1478 int maskr, int maskg, int maskb,
1479 int rsh, int gsh, int bsh, int S)
1481 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1482 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1483 rnd = 257 << S, maskgx = ~(maskr | maskb);
1486 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487 for (i = 0; i < width; i++) {
1488 int px0 = input_pixel(2 * i + 0) >> shp;
1489 int px1 = input_pixel(2 * i + 1) >> shp;
1490 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491 int rb = px0 + px1 - g;
1493 b = (rb & maskb) >> shb;
1494 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1498 g = (g & maskg) >> shg;
1500 r = (rb & maskr) >> shr;
1502 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1509 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510 maskg, maskb, rsh, gsh, bsh, S) \
1511 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512 int width, uint32_t *unused) \
1514 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1518 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519 const uint8_t *src, const uint8_t *dummy, \
1520 int width, uint32_t *unused) \
1522 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1526 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527 const uint8_t *src, const uint8_t *dummy, \
1528 int width, uint32_t *unused) \
1530 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1534 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1535 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1536 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1537 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1538 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1539 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1540 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1541 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1542 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1543 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1544 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1545 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1547 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1550 for (i=0; i<width; i++) {
1555 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1558 for (i=0; i<width; i++) {
1563 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1566 for (i=0; i<width; i++) {
1569 dst[i]= pal[d] & 0xFF;
1573 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1574 const uint8_t *src1, const uint8_t *src2,
1575 int width, uint32_t *pal)
1578 assert(src1 == src2);
1579 for (i=0; i<width; i++) {
1580 int p= pal[src1[i]];
1587 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1588 int width, uint32_t *unused)
1591 for (i=0; i<width/8; i++) {
1594 dst[8*i+j]= ((d>>(7-j))&1)*255;
1598 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1599 int width, uint32_t *unused)
1602 for (i=0; i<width/8; i++) {
1605 dst[8*i+j]= ((d>>(7-j))&1)*255;
1609 //FIXME yuy2* can read up to 7 samples too much
1611 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1615 for (i=0; i<width; i++)
1619 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1620 const uint8_t *src2, int width, uint32_t *unused)
1623 for (i=0; i<width; i++) {
1624 dstU[i]= src1[4*i + 1];
1625 dstV[i]= src1[4*i + 3];
1627 assert(src1 == src2);
1630 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1631 const uint8_t *src2, int width, uint32_t *unused)
1634 for (i=0; i<width; i++) {
1635 dstU[i]= src1[2*i + 1];
1636 dstV[i]= src2[2*i + 1];
1640 /* This is almost identical to the previous, end exists only because
1641 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1642 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1646 for (i=0; i<width; i++)
1650 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1651 const uint8_t *src2, int width, uint32_t *unused)
1654 for (i=0; i<width; i++) {
1655 dstU[i]= src1[4*i + 0];
1656 dstV[i]= src1[4*i + 2];
1658 assert(src1 == src2);
1661 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1662 const uint8_t *src2, int width, uint32_t *unused)
1665 for (i=0; i<width; i++) {
1671 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1672 const uint8_t *src, int width)
1675 for (i = 0; i < width; i++) {
1676 dst1[i] = src[2*i+0];
1677 dst2[i] = src[2*i+1];
1681 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1682 const uint8_t *src1, const uint8_t *src2,
1683 int width, uint32_t *unused)
1685 nvXXtoUV_c(dstU, dstV, src1, width);
1688 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1689 const uint8_t *src1, const uint8_t *src2,
1690 int width, uint32_t *unused)
1692 nvXXtoUV_c(dstV, dstU, src1, width);
1695 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1697 // FIXME Maybe dither instead.
1698 static av_always_inline void
1699 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1700 const uint8_t *_srcU, const uint8_t *_srcV,
1701 int width, enum PixelFormat origin, int depth)
1704 const uint16_t *srcU = (const uint16_t *) _srcU;
1705 const uint16_t *srcV = (const uint16_t *) _srcV;
1707 for (i = 0; i < width; i++) {
1708 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1709 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1713 static av_always_inline void
1714 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1715 int width, enum PixelFormat origin, int depth)
1718 const uint16_t *srcY = (const uint16_t*)_srcY;
1720 for (i = 0; i < width; i++)
1721 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1726 #define YUV_NBPS(depth, BE_LE, origin) \
1727 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1728 const uint8_t *srcU, const uint8_t *srcV, \
1729 int width, uint32_t *unused) \
1731 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1733 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1734 int width, uint32_t *unused) \
1736 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1739 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1740 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1741 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1742 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1744 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1745 int width, uint32_t *unused)
1748 for (i=0; i<width; i++) {
1753 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1757 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1758 const uint8_t *src2, int width, uint32_t *unused)
1761 for (i=0; i<width; i++) {
1762 int b= src1[3*i + 0];
1763 int g= src1[3*i + 1];
1764 int r= src1[3*i + 2];
1766 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1767 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1769 assert(src1 == src2);
1772 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1773 const uint8_t *src2, int width, uint32_t *unused)
1776 for (i=0; i<width; i++) {
1777 int b= src1[6*i + 0] + src1[6*i + 3];
1778 int g= src1[6*i + 1] + src1[6*i + 4];
1779 int r= src1[6*i + 2] + src1[6*i + 5];
1781 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1782 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1784 assert(src1 == src2);
1787 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1791 for (i=0; i<width; i++) {
1796 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1800 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1801 const uint8_t *src2, int width, uint32_t *unused)
1805 for (i=0; i<width; i++) {
1806 int r= src1[3*i + 0];
1807 int g= src1[3*i + 1];
1808 int b= src1[3*i + 2];
1810 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1811 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1815 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1816 const uint8_t *src2, int width, uint32_t *unused)
1820 for (i=0; i<width; i++) {
1821 int r= src1[6*i + 0] + src1[6*i + 3];
1822 int g= src1[6*i + 1] + src1[6*i + 4];
1823 int b= src1[6*i + 2] + src1[6*i + 5];
1825 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1826 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1830 // bilinear / bicubic scaling
1831 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1832 const int16_t *filter, const int16_t *filterPos,
1836 for (i=0; i<dstW; i++) {
1838 int srcPos= filterPos[i];
1840 for (j=0; j<filterSize; j++) {
1841 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1843 //filter += hFilterSize;
1844 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1849 //FIXME all pal and rgb srcFormats could do this convertion as well
1850 //FIXME all scalers more complex than bilinear could do half of this transform
1851 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1854 for (i = 0; i < width; i++) {
1855 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1856 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1859 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1862 for (i = 0; i < width; i++) {
1863 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1864 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1867 static void lumRangeToJpeg_c(int16_t *dst, int width)
1870 for (i = 0; i < width; i++)
1871 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1873 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1876 for (i = 0; i < width; i++)
1877 dst[i] = (dst[i]*14071 + 33561947)>>14;
1880 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1881 const uint8_t *src, int srcW, int xInc)
1884 unsigned int xpos=0;
1885 for (i=0;i<dstWidth;i++) {
1886 register unsigned int xx=xpos>>16;
1887 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1888 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1893 // *** horizontal scale Y line to temp buffer
1894 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1895 const uint8_t *src, int srcW, int xInc,
1896 const int16_t *hLumFilter,
1897 const int16_t *hLumFilterPos, int hLumFilterSize,
1898 uint8_t *formatConvBuffer,
1899 uint32_t *pal, int isAlpha)
1901 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1902 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1905 toYV12(formatConvBuffer, src, srcW, pal);
1906 src= formatConvBuffer;
1909 if (!c->hyscale_fast) {
1910 c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1911 } else { // fast bilinear upscale / crap downscale
1912 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1916 convertRange(dst, dstWidth);
1919 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1920 int dstWidth, const uint8_t *src1,
1921 const uint8_t *src2, int srcW, int xInc)
1924 unsigned int xpos=0;
1925 for (i=0;i<dstWidth;i++) {
1926 register unsigned int xx=xpos>>16;
1927 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1928 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1929 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1934 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1935 const uint8_t *src1, const uint8_t *src2,
1936 int srcW, int xInc, const int16_t *hChrFilter,
1937 const int16_t *hChrFilterPos, int hChrFilterSize,
1938 uint8_t *formatConvBuffer, uint32_t *pal)
1941 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1942 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1943 src1= formatConvBuffer;
1947 if (!c->hcscale_fast) {
1948 c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
1949 c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
1950 } else { // fast bilinear upscale / crap downscale
1951 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1954 if (c->chrConvertRange)
1955 c->chrConvertRange(dst1, dst2, dstWidth);
1958 static av_always_inline void
1959 find_c_packed_planar_out_funcs(SwsContext *c,
1960 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1961 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1962 yuv2packedX_fn *yuv2packedX)
1964 enum PixelFormat dstFormat = c->dstFormat;
1966 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1967 *yuv2yuvX = yuv2nv12X_c;
1968 } else if (is16BPS(dstFormat)) {
1969 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1970 } else if (is9_OR_10BPS(dstFormat)) {
1971 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
1972 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1974 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1977 *yuv2yuv1 = yuv2yuv1_c;
1978 *yuv2yuvX = yuv2yuvX_c;
1980 if(c->flags & SWS_FULL_CHR_H_INT) {
1981 *yuv2packedX = yuv2rgbX_c_full;
1983 switch (dstFormat) {
1984 case PIX_FMT_GRAY16BE:
1985 *yuv2packed1 = yuv2gray16BE_1_c;
1986 *yuv2packed2 = yuv2gray16BE_2_c;
1987 *yuv2packedX = yuv2gray16BE_X_c;
1989 case PIX_FMT_GRAY16LE:
1990 *yuv2packed1 = yuv2gray16LE_1_c;
1991 *yuv2packed2 = yuv2gray16LE_2_c;
1992 *yuv2packedX = yuv2gray16LE_X_c;
1994 case PIX_FMT_MONOWHITE:
1995 *yuv2packed1 = yuv2monowhite_1_c;
1996 *yuv2packed2 = yuv2monowhite_2_c;
1997 *yuv2packedX = yuv2monowhite_X_c;
1999 case PIX_FMT_MONOBLACK:
2000 *yuv2packed1 = yuv2monoblack_1_c;
2001 *yuv2packed2 = yuv2monoblack_2_c;
2002 *yuv2packedX = yuv2monoblack_X_c;
2004 case PIX_FMT_YUYV422:
2005 *yuv2packed1 = yuv2yuyv422_1_c;
2006 *yuv2packed2 = yuv2yuyv422_2_c;
2007 *yuv2packedX = yuv2yuyv422_X_c;
2009 case PIX_FMT_UYVY422:
2010 *yuv2packed1 = yuv2uyvy422_1_c;
2011 *yuv2packed2 = yuv2uyvy422_2_c;
2012 *yuv2packedX = yuv2uyvy422_X_c;
2014 case PIX_FMT_RGB48LE:
2015 //*yuv2packed1 = yuv2rgb48le_1_c;
2016 //*yuv2packed2 = yuv2rgb48le_2_c;
2017 //*yuv2packedX = yuv2rgb48le_X_c;
2019 case PIX_FMT_RGB48BE:
2020 *yuv2packed1 = yuv2rgb48be_1_c;
2021 *yuv2packed2 = yuv2rgb48be_2_c;
2022 *yuv2packedX = yuv2rgb48be_X_c;
2024 case PIX_FMT_BGR48LE:
2025 //*yuv2packed1 = yuv2bgr48le_1_c;
2026 //*yuv2packed2 = yuv2bgr48le_2_c;
2027 //*yuv2packedX = yuv2bgr48le_X_c;
2029 case PIX_FMT_BGR48BE:
2030 *yuv2packed1 = yuv2bgr48be_1_c;
2031 *yuv2packed2 = yuv2bgr48be_2_c;
2032 *yuv2packedX = yuv2bgr48be_X_c;
2037 *yuv2packed1 = yuv2rgb32_1_c;
2038 *yuv2packed2 = yuv2rgb32_2_c;
2039 *yuv2packedX = yuv2rgb32_X_c;
2041 #if CONFIG_SWSCALE_ALPHA
2043 *yuv2packed1 = yuv2rgba32_1_c;
2044 *yuv2packed2 = yuv2rgba32_2_c;
2045 *yuv2packedX = yuv2rgba32_X_c;
2047 #endif /* CONFIG_SWSCALE_ALPHA */
2049 *yuv2packed1 = yuv2rgbx32_1_c;
2050 *yuv2packed2 = yuv2rgbx32_2_c;
2051 *yuv2packedX = yuv2rgbx32_X_c;
2053 #endif /* !CONFIG_SMALL */
2055 case PIX_FMT_RGB32_1:
2056 case PIX_FMT_BGR32_1:
2058 *yuv2packed1 = yuv2rgb32_1_1_c;
2059 *yuv2packed2 = yuv2rgb32_1_2_c;
2060 *yuv2packedX = yuv2rgb32_1_X_c;
2062 #if CONFIG_SWSCALE_ALPHA
2064 *yuv2packed1 = yuv2rgba32_1_1_c;
2065 *yuv2packed2 = yuv2rgba32_1_2_c;
2066 *yuv2packedX = yuv2rgba32_1_X_c;
2068 #endif /* CONFIG_SWSCALE_ALPHA */
2070 *yuv2packed1 = yuv2rgbx32_1_1_c;
2071 *yuv2packed2 = yuv2rgbx32_1_2_c;
2072 *yuv2packedX = yuv2rgbx32_1_X_c;
2074 #endif /* !CONFIG_SMALL */
2077 *yuv2packed1 = yuv2rgb24_1_c;
2078 *yuv2packed2 = yuv2rgb24_2_c;
2079 *yuv2packedX = yuv2rgb24_X_c;
2082 *yuv2packed1 = yuv2bgr24_1_c;
2083 *yuv2packed2 = yuv2bgr24_2_c;
2084 *yuv2packedX = yuv2bgr24_X_c;
2086 case PIX_FMT_RGB565:
2087 case PIX_FMT_BGR565:
2088 *yuv2packed1 = yuv2rgb16_1_c;
2089 *yuv2packed2 = yuv2rgb16_2_c;
2090 *yuv2packedX = yuv2rgb16_X_c;
2092 case PIX_FMT_RGB555:
2093 case PIX_FMT_BGR555:
2094 *yuv2packed1 = yuv2rgb15_1_c;
2095 *yuv2packed2 = yuv2rgb15_2_c;
2096 *yuv2packedX = yuv2rgb15_X_c;
2098 case PIX_FMT_RGB444:
2099 case PIX_FMT_BGR444:
2100 *yuv2packed1 = yuv2rgb12_1_c;
2101 *yuv2packed2 = yuv2rgb12_2_c;
2102 *yuv2packedX = yuv2rgb12_X_c;
2106 *yuv2packed1 = yuv2rgb8_1_c;
2107 *yuv2packed2 = yuv2rgb8_2_c;
2108 *yuv2packedX = yuv2rgb8_X_c;
2112 *yuv2packed1 = yuv2rgb4_1_c;
2113 *yuv2packed2 = yuv2rgb4_2_c;
2114 *yuv2packedX = yuv2rgb4_X_c;
2116 case PIX_FMT_RGB4_BYTE:
2117 case PIX_FMT_BGR4_BYTE:
2118 *yuv2packed1 = yuv2rgb4b_1_c;
2119 *yuv2packed2 = yuv2rgb4b_2_c;
2120 *yuv2packedX = yuv2rgb4b_X_c;
2126 #define DEBUG_SWSCALE_BUFFERS 0
2127 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2129 static int swScale(SwsContext *c, const uint8_t* src[],
2130 int srcStride[], int srcSliceY,
2131 int srcSliceH, uint8_t* dst[], int dstStride[])
2133 /* load a few things into local vars to make the code more readable? and faster */
2134 const int srcW= c->srcW;
2135 const int dstW= c->dstW;
2136 const int dstH= c->dstH;
2137 const int chrDstW= c->chrDstW;
2138 const int chrSrcW= c->chrSrcW;
2139 const int lumXInc= c->lumXInc;
2140 const int chrXInc= c->chrXInc;
2141 const enum PixelFormat dstFormat= c->dstFormat;
2142 const int flags= c->flags;
2143 int16_t *vLumFilterPos= c->vLumFilterPos;
2144 int16_t *vChrFilterPos= c->vChrFilterPos;
2145 int16_t *hLumFilterPos= c->hLumFilterPos;
2146 int16_t *hChrFilterPos= c->hChrFilterPos;
2147 int16_t *vLumFilter= c->vLumFilter;
2148 int16_t *vChrFilter= c->vChrFilter;
2149 int16_t *hLumFilter= c->hLumFilter;
2150 int16_t *hChrFilter= c->hChrFilter;
2151 int32_t *lumMmxFilter= c->lumMmxFilter;
2152 int32_t *chrMmxFilter= c->chrMmxFilter;
2153 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2154 const int vLumFilterSize= c->vLumFilterSize;
2155 const int vChrFilterSize= c->vChrFilterSize;
2156 const int hLumFilterSize= c->hLumFilterSize;
2157 const int hChrFilterSize= c->hChrFilterSize;
2158 int16_t **lumPixBuf= c->lumPixBuf;
2159 int16_t **chrUPixBuf= c->chrUPixBuf;
2160 int16_t **chrVPixBuf= c->chrVPixBuf;
2161 int16_t **alpPixBuf= c->alpPixBuf;
2162 const int vLumBufSize= c->vLumBufSize;
2163 const int vChrBufSize= c->vChrBufSize;
2164 uint8_t *formatConvBuffer= c->formatConvBuffer;
2165 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2166 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2168 uint32_t *pal=c->pal_yuv;
2169 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2170 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2171 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2172 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2173 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2175 /* vars which will change and which we need to store back in the context */
2177 int lumBufIndex= c->lumBufIndex;
2178 int chrBufIndex= c->chrBufIndex;
2179 int lastInLumBuf= c->lastInLumBuf;
2180 int lastInChrBuf= c->lastInChrBuf;
2182 if (isPacked(c->srcFormat)) {
2190 srcStride[3]= srcStride[0];
2192 srcStride[1]<<= c->vChrDrop;
2193 srcStride[2]<<= c->vChrDrop;
2195 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2196 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2197 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2198 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2199 srcSliceY, srcSliceH, dstY, dstH);
2200 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2201 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2203 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2204 static int warnedAlready=0; //FIXME move this into the context perhaps
2205 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2206 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2207 " ->cannot do aligned memory accesses anymore\n");
2212 /* Note the user might start scaling the picture in the middle so this
2213 will not get executed. This is not really intended but works
2214 currently, so people might do it. */
2215 if (srcSliceY ==0) {
2225 for (;dstY < dstH; dstY++) {
2226 unsigned char *dest =dst[0]+dstStride[0]*dstY;
2227 const int chrDstY= dstY>>c->chrDstVSubSample;
2228 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2229 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
2230 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2232 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2233 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2234 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2235 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2236 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2237 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2240 //handle holes (FAST_BILINEAR & weird filters)
2241 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2242 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2243 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2244 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2246 DEBUG_BUFFERS("dstY: %d\n", dstY);
2247 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2248 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2249 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2250 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2252 // Do we have enough lines in this slice to output the dstY line
2253 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2255 if (!enough_lines) {
2256 lastLumSrcY = srcSliceY + srcSliceH - 1;
2257 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2258 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2259 lastLumSrcY, lastChrSrcY);
2262 //Do horizontal scaling
2263 while(lastInLumBuf < lastLumSrcY) {
2264 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2265 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2267 assert(lumBufIndex < 2*vLumBufSize);
2268 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2269 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2270 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2271 hLumFilter, hLumFilterPos, hLumFilterSize,
2274 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2275 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2276 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2280 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2281 lumBufIndex, lastInLumBuf);
2283 while(lastInChrBuf < lastChrSrcY) {
2284 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2285 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2287 assert(chrBufIndex < 2*vChrBufSize);
2288 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2289 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2290 //FIXME replace parameters through context struct (some at least)
2292 if (c->needs_hcscale)
2293 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2294 chrDstW, src1, src2, chrSrcW, chrXInc,
2295 hChrFilter, hChrFilterPos, hChrFilterSize,
2296 formatConvBuffer, pal);
2298 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2299 chrBufIndex, lastInChrBuf);
2301 //wrap buf index around to stay inside the ring buffer
2302 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2303 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2305 break; //we can't output a dstY line so let's try with the next slice
2308 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2310 if (dstY >= dstH-2) {
2311 // hmm looks like we can't use MMX here without overwriting this array's tail
2312 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2313 &yuv2packed1, &yuv2packed2,
2318 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2319 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2320 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2321 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2322 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2323 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2324 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
2325 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2326 const int16_t *lumBuf = lumSrcPtr[0];
2327 const int16_t *chrUBuf= chrUSrcPtr[0];
2328 const int16_t *chrVBuf= chrVSrcPtr[0];
2329 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2330 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
2331 uDest, vDest, aDest, dstW, chrDstW);
2332 } else { //General YV12
2334 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2335 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
2336 chrVSrcPtr, vChrFilterSize,
2337 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2340 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2341 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2342 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2343 int chrAlpha= vChrFilter[2*dstY+1];
2344 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
2345 *chrVSrcPtr, *(chrVSrcPtr+1),
2346 alpPixBuf ? *alpSrcPtr : NULL,
2347 dest, dstW, chrAlpha, dstFormat, flags, dstY);
2348 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2349 int lumAlpha= vLumFilter[2*dstY+1];
2350 int chrAlpha= vChrFilter[2*dstY+1];
2352 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2354 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
2355 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
2356 *chrVSrcPtr, *(chrVSrcPtr+1),
2357 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2358 dest, dstW, lumAlpha, chrAlpha, dstY);
2359 } else { //general RGB
2361 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2362 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2363 alpSrcPtr, dest, dstW, dstY);
2369 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2370 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2373 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2374 __asm__ volatile("sfence":::"memory");
2378 /* store changed local vars back in the context */
2380 c->lumBufIndex= lumBufIndex;
2381 c->chrBufIndex= chrBufIndex;
2382 c->lastInLumBuf= lastInLumBuf;
2383 c->lastInChrBuf= lastInChrBuf;
2385 return dstY - lastDstY;
2388 static av_cold void sws_init_swScale_c(SwsContext *c)
2390 enum PixelFormat srcFormat = c->srcFormat;
2392 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2393 &c->yuv2packed1, &c->yuv2packed2,
2396 c->hScale = hScale_c;
2398 if (c->flags & SWS_FAST_BILINEAR) {
2399 c->hyscale_fast = hyscale_fast_c;
2400 c->hcscale_fast = hcscale_fast_c;
2403 c->chrToYV12 = NULL;
2405 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2406 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2407 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2408 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2412 case PIX_FMT_BGR4_BYTE:
2413 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2414 case PIX_FMT_YUV444P9BE:
2415 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
2416 case PIX_FMT_YUV444P9LE:
2417 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
2418 case PIX_FMT_YUV444P10BE:
2419 case PIX_FMT_YUV422P10BE:
2420 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
2421 case PIX_FMT_YUV422P10LE:
2422 case PIX_FMT_YUV444P10LE:
2423 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
2424 case PIX_FMT_YUV420P16BE:
2425 case PIX_FMT_YUV422P16BE:
2426 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
2427 case PIX_FMT_YUV420P16LE:
2428 case PIX_FMT_YUV422P16LE:
2429 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
2431 if (c->chrSrcHSubSample) {
2433 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2434 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2435 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2436 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2437 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2438 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2439 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2440 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2441 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2442 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2443 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2444 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2445 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2446 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2447 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2448 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2449 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2450 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2454 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2455 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2456 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2457 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2458 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2459 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2460 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2461 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2462 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2463 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2464 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2465 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2466 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2467 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2468 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2469 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2470 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2471 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2475 c->lumToYV12 = NULL;
2476 c->alpToYV12 = NULL;
2477 switch (srcFormat) {
2478 case PIX_FMT_YUV444P9BE:
2479 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2480 case PIX_FMT_YUV444P9LE:
2481 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2482 case PIX_FMT_YUV444P10BE:
2483 case PIX_FMT_YUV422P10BE:
2484 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2485 case PIX_FMT_YUV444P10LE:
2486 case PIX_FMT_YUV422P10LE:
2487 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2488 case PIX_FMT_YUYV422 :
2489 case PIX_FMT_YUV420P16BE:
2490 case PIX_FMT_YUV422P16BE:
2491 case PIX_FMT_YUV444P16BE:
2492 case PIX_FMT_Y400A :
2493 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2494 case PIX_FMT_UYVY422 :
2495 case PIX_FMT_YUV420P16LE:
2496 case PIX_FMT_YUV422P16LE:
2497 case PIX_FMT_YUV444P16LE:
2498 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2499 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2500 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2501 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2502 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2503 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2504 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2505 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2506 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2507 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2508 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2512 case PIX_FMT_BGR4_BYTE:
2513 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2514 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2515 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2516 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2517 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2518 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2519 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2520 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2521 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2522 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2523 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2526 switch (srcFormat) {
2528 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2530 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2531 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2535 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2537 c->lumConvertRange = lumRangeFromJpeg_c;
2538 c->chrConvertRange = chrRangeFromJpeg_c;
2540 c->lumConvertRange = lumRangeToJpeg_c;
2541 c->chrConvertRange = chrRangeToJpeg_c;
2545 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2546 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2547 c->needs_hcscale = 1;
2550 SwsFunc ff_getSwsFunc(SwsContext *c)
2552 sws_init_swScale_c(c);
2555 ff_sws_init_swScale_mmx(c);
2557 ff_sws_init_swScale_altivec(c);