2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
333 const int16_t *chrUSrc, const int16_t *chrVSrc,
334 const int16_t *alpSrc,
335 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
336 uint8_t *aDest, int dstW, int chrDstW)
339 for (i=0; i<dstW; i++) {
340 int val= (lumSrc[i]+64)>>7;
341 dest[i]= av_clip_uint8(val);
345 for (i=0; i<chrDstW; i++) {
346 int u=(chrUSrc[i]+64)>>7;
347 int v=(chrVSrc[i]+64)>>7;
348 uDest[i]= av_clip_uint8(u);
349 vDest[i]= av_clip_uint8(v);
352 if (CONFIG_SWSCALE_ALPHA && aDest)
353 for (i=0; i<dstW; i++) {
354 int val= (alpSrc[i]+64)>>7;
355 aDest[i]= av_clip_uint8(val);
359 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
360 const int16_t **lumSrc, int lumFilterSize,
361 const int16_t *chrFilter, const int16_t **chrUSrc,
362 const int16_t **chrVSrc, int chrFilterSize,
363 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
364 uint8_t *vDest, uint8_t *aDest,
365 int dstW, int chrDstW)
367 enum PixelFormat dstFormat = c->dstFormat;
369 //FIXME Optimize (just quickly written not optimized..)
371 for (i=0; i<dstW; i++) {
374 for (j=0; j<lumFilterSize; j++)
375 val += lumSrc[j][i] * lumFilter[j];
377 dest[i]= av_clip_uint8(val>>19);
383 if (dstFormat == PIX_FMT_NV12)
384 for (i=0; i<chrDstW; i++) {
388 for (j=0; j<chrFilterSize; j++) {
389 u += chrUSrc[j][i] * chrFilter[j];
390 v += chrVSrc[j][i] * chrFilter[j];
393 uDest[2*i]= av_clip_uint8(u>>19);
394 uDest[2*i+1]= av_clip_uint8(v>>19);
397 for (i=0; i<chrDstW; i++) {
401 for (j=0; j<chrFilterSize; j++) {
402 u += chrUSrc[j][i] * chrFilter[j];
403 v += chrVSrc[j][i] * chrFilter[j];
406 uDest[2*i]= av_clip_uint8(v>>19);
407 uDest[2*i+1]= av_clip_uint8(u>>19);
411 #define output_pixel(pos, val) \
412 if (target == PIX_FMT_GRAY16BE) { \
418 static av_always_inline void
419 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
420 const int16_t **lumSrc, int lumFilterSize,
421 const int16_t *chrFilter, const int16_t **chrUSrc,
422 const int16_t **chrVSrc, int chrFilterSize,
423 const int16_t **alpSrc, uint8_t *dest, int dstW,
424 int y, enum PixelFormat target)
428 for (i = 0; i < (dstW >> 1); i++) {
432 const int i2 = 2 * i;
434 for (j = 0; j < lumFilterSize; j++) {
435 Y1 += lumSrc[j][i2] * lumFilter[j];
436 Y2 += lumSrc[j][i2+1] * lumFilter[j];
440 if ((Y1 | Y2) & 0x10000) {
441 Y1 = av_clip_uint16(Y1);
442 Y2 = av_clip_uint16(Y2);
444 output_pixel(&dest[2 * i2 + 0], Y1);
445 output_pixel(&dest[2 * i2 + 2], Y2);
449 static av_always_inline void
450 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
451 const uint16_t *buf1, const uint16_t *ubuf0,
452 const uint16_t *ubuf1, const uint16_t *vbuf0,
453 const uint16_t *vbuf1, const uint16_t *abuf0,
454 const uint16_t *abuf1, uint8_t *dest, int dstW,
455 int yalpha, int uvalpha, int y,
456 enum PixelFormat target)
458 int yalpha1 = 4095 - yalpha; \
461 for (i = 0; i < (dstW >> 1); i++) {
462 const int i2 = 2 * i;
463 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
464 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
466 output_pixel(&dest[2 * i2 + 0], Y1);
467 output_pixel(&dest[2 * i2 + 2], Y2);
471 static av_always_inline void
472 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
473 const uint16_t *ubuf0, const uint16_t *ubuf1,
474 const uint16_t *vbuf0, const uint16_t *vbuf1,
475 const uint16_t *abuf0, uint8_t *dest, int dstW,
476 int uvalpha, enum PixelFormat dstFormat,
477 int flags, int y, enum PixelFormat target)
481 for (i = 0; i < (dstW >> 1); i++) {
482 const int i2 = 2 * i;
483 int Y1 = buf0[i2 ] << 1;
484 int Y2 = buf0[i2+1] << 1;
486 output_pixel(&dest[2 * i2 + 0], Y1);
487 output_pixel(&dest[2 * i2 + 2], Y2);
493 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
494 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
495 const int16_t **lumSrc, int lumFilterSize, \
496 const int16_t *chrFilter, const int16_t **chrUSrc, \
497 const int16_t **chrVSrc, int chrFilterSize, \
498 const int16_t **alpSrc, uint8_t *dest, int dstW, \
501 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
502 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
503 alpSrc, dest, dstW, y, fmt); \
506 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
507 const uint16_t *buf1, const uint16_t *ubuf0, \
508 const uint16_t *ubuf1, const uint16_t *vbuf0, \
509 const uint16_t *vbuf1, const uint16_t *abuf0, \
510 const uint16_t *abuf1, uint8_t *dest, int dstW, \
511 int yalpha, int uvalpha, int y) \
513 name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
514 vbuf0, vbuf1, abuf0, abuf1, \
515 dest, dstW, yalpha, uvalpha, y, fmt); \
518 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
519 const uint16_t *ubuf0, const uint16_t *ubuf1, \
520 const uint16_t *vbuf0, const uint16_t *vbuf1, \
521 const uint16_t *abuf0, uint8_t *dest, int dstW, \
522 int uvalpha, enum PixelFormat dstFormat, \
525 name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
526 vbuf1, abuf0, dest, dstW, uvalpha, \
527 dstFormat, flags, y, fmt); \
530 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
531 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
533 #define output_pixel(pos, acc) \
534 if (target == PIX_FMT_MONOBLACK) { \
540 static av_always_inline void
541 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
542 const int16_t **lumSrc, int lumFilterSize,
543 const int16_t *chrFilter, const int16_t **chrUSrc,
544 const int16_t **chrVSrc, int chrFilterSize,
545 const int16_t **alpSrc, uint8_t *dest, int dstW,
546 int y, enum PixelFormat target)
548 const uint8_t * const d128=dither_8x8_220[y&7];
549 uint8_t *g = c->table_gU[128] + c->table_gV[128];
553 for (i = 0; i < dstW - 1; i += 2) {
558 for (j = 0; j < lumFilterSize; j++) {
559 Y1 += lumSrc[j][i] * lumFilter[j];
560 Y2 += lumSrc[j][i+1] * lumFilter[j];
564 if ((Y1 | Y2) & 0x100) {
565 Y1 = av_clip_uint8(Y1);
566 Y2 = av_clip_uint8(Y2);
568 acc += acc + g[Y1 + d128[(i + 0) & 7]];
569 acc += acc + g[Y2 + d128[(i + 1) & 7]];
571 output_pixel(*dest++, acc);
576 static av_always_inline void
577 yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
578 const uint16_t *buf1, const uint16_t *ubuf0,
579 const uint16_t *ubuf1, const uint16_t *vbuf0,
580 const uint16_t *vbuf1, const uint16_t *abuf0,
581 const uint16_t *abuf1, uint8_t *dest, int dstW,
582 int yalpha, int uvalpha, int y,
583 enum PixelFormat target)
585 const uint8_t * const d128 = dither_8x8_220[y & 7];
586 uint8_t *g = c->table_gU[128] + c->table_gV[128];
587 int yalpha1 = 4095 - yalpha;
590 for (i = 0; i < dstW - 7; i += 8) {
591 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
592 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
593 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
594 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
595 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
596 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
597 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
598 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
599 output_pixel(*dest++, acc);
603 static av_always_inline void
604 yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
605 const uint16_t *ubuf0, const uint16_t *ubuf1,
606 const uint16_t *vbuf0, const uint16_t *vbuf1,
607 const uint16_t *abuf0, uint8_t *dest, int dstW,
608 int uvalpha, enum PixelFormat dstFormat,
609 int flags, int y, enum PixelFormat target)
611 const uint8_t * const d128 = dither_8x8_220[y & 7];
612 uint8_t *g = c->table_gU[128] + c->table_gV[128];
615 for (i = 0; i < dstW - 7; i += 8) {
616 int acc = g[(buf0[i ] >> 7) + d128[0]];
617 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
618 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
619 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
620 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
621 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
622 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
623 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
624 output_pixel(*dest++, acc);
630 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
631 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
633 #define output_pixels(pos, Y1, U, Y2, V) \
634 if (target == PIX_FMT_YUYV422) { \
635 dest[pos + 0] = Y1; \
637 dest[pos + 2] = Y2; \
641 dest[pos + 1] = Y1; \
643 dest[pos + 3] = Y2; \
646 static av_always_inline void
647 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
648 const int16_t **lumSrc, int lumFilterSize,
649 const int16_t *chrFilter, const int16_t **chrUSrc,
650 const int16_t **chrVSrc, int chrFilterSize,
651 const int16_t **alpSrc, uint8_t *dest, int dstW,
652 int y, enum PixelFormat target)
656 for (i = 0; i < (dstW >> 1); i++) {
663 for (j = 0; j < lumFilterSize; j++) {
664 Y1 += lumSrc[j][i * 2] * lumFilter[j];
665 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
667 for (j = 0; j < chrFilterSize; j++) {
668 U += chrUSrc[j][i] * chrFilter[j];
669 V += chrVSrc[j][i] * chrFilter[j];
675 if ((Y1 | Y2 | U | V) & 0x100) {
676 Y1 = av_clip_uint8(Y1);
677 Y2 = av_clip_uint8(Y2);
678 U = av_clip_uint8(U);
679 V = av_clip_uint8(V);
681 output_pixels(4*i, Y1, U, Y2, V);
685 static av_always_inline void
686 yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
687 const uint16_t *buf1, const uint16_t *ubuf0,
688 const uint16_t *ubuf1, const uint16_t *vbuf0,
689 const uint16_t *vbuf1, const uint16_t *abuf0,
690 const uint16_t *abuf1, uint8_t *dest, int dstW,
691 int yalpha, int uvalpha, int y,
692 enum PixelFormat target)
694 int yalpha1 = 4095 - yalpha;
695 int uvalpha1 = 4095 - uvalpha;
698 for (i = 0; i < (dstW >> 1); i++) {
699 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
700 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
701 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
702 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
704 output_pixels(i * 4, Y1, U, Y2, V);
708 static av_always_inline void
709 yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
710 const uint16_t *ubuf0, const uint16_t *ubuf1,
711 const uint16_t *vbuf0, const uint16_t *vbuf1,
712 const uint16_t *abuf0, uint8_t *dest, int dstW,
713 int uvalpha, enum PixelFormat dstFormat,
714 int flags, int y, enum PixelFormat target)
718 if (uvalpha < 2048) {
719 for (i = 0; i < (dstW >> 1); i++) {
720 int Y1 = buf0[i * 2] >> 7;
721 int Y2 = buf0[i * 2 + 1] >> 7;
722 int U = ubuf1[i] >> 7;
723 int V = vbuf1[i] >> 7;
725 output_pixels(i * 4, Y1, U, Y2, V);
728 for (i = 0; i < (dstW >> 1); i++) {
729 int Y1 = buf0[i * 2] >> 7;
730 int Y2 = buf0[i * 2 + 1] >> 7;
731 int U = (ubuf0[i] + ubuf1[i]) >> 8;
732 int V = (vbuf0[i] + vbuf1[i]) >> 8;
734 output_pixels(i * 4, Y1, U, Y2, V);
741 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
742 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
744 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
745 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
747 static av_always_inline void
748 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
749 const int16_t **lumSrc, int lumFilterSize,
750 const int16_t *chrFilter, const int16_t **chrUSrc,
751 const int16_t **chrVSrc, int chrFilterSize,
752 const int16_t **alpSrc, uint8_t *dest, int dstW,
753 int y, enum PixelFormat target)
757 for (i = 0; i < (dstW >> 1); i++) {
763 const uint8_t *r, *g, *b;
765 for (j = 0; j < lumFilterSize; j++) {
766 Y1 += lumSrc[j][i * 2] * lumFilter[j];
767 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
769 for (j = 0; j < chrFilterSize; j++) {
770 U += chrUSrc[j][i] * chrFilter[j];
771 V += chrVSrc[j][i] * chrFilter[j];
777 if ((Y1 | Y2 | U | V) & 0x100) {
778 Y1 = av_clip_uint8(Y1);
779 Y2 = av_clip_uint8(Y2);
780 U = av_clip_uint8(U);
781 V = av_clip_uint8(V);
784 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
785 r = (const uint8_t *) c->table_rV[V];
786 g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
787 b = (const uint8_t *) c->table_bU[U];
789 dest[ 0] = dest[ 1] = r_b[Y1];
790 dest[ 2] = dest[ 3] = g[Y1];
791 dest[ 4] = dest[ 5] = b_r[Y1];
792 dest[ 6] = dest[ 7] = r_b[Y2];
793 dest[ 8] = dest[ 9] = g[Y2];
794 dest[10] = dest[11] = b_r[Y2];
799 static av_always_inline void
800 yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
801 const uint16_t *buf1, const uint16_t *ubuf0,
802 const uint16_t *ubuf1, const uint16_t *vbuf0,
803 const uint16_t *vbuf1, const uint16_t *abuf0,
804 const uint16_t *abuf1, uint8_t *dest, int dstW,
805 int yalpha, int uvalpha, int y,
806 enum PixelFormat target)
808 int yalpha1 = 4095 - yalpha;
809 int uvalpha1 = 4095 - uvalpha;
812 for (i = 0; i < (dstW >> 1); i++) {
813 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
814 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
815 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
816 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
817 const uint8_t *r = (const uint8_t *) c->table_rV[V],
818 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
819 *b = (const uint8_t *) c->table_bU[U];
821 dest[ 0] = dest[ 1] = r_b[Y1];
822 dest[ 2] = dest[ 3] = g[Y1];
823 dest[ 4] = dest[ 5] = b_r[Y1];
824 dest[ 6] = dest[ 7] = r_b[Y2];
825 dest[ 8] = dest[ 9] = g[Y2];
826 dest[10] = dest[11] = b_r[Y2];
831 static av_always_inline void
832 yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
833 const uint16_t *ubuf0, const uint16_t *ubuf1,
834 const uint16_t *vbuf0, const uint16_t *vbuf1,
835 const uint16_t *abuf0, uint8_t *dest, int dstW,
836 int uvalpha, enum PixelFormat dstFormat,
837 int flags, int y, enum PixelFormat target)
841 if (uvalpha < 2048) {
842 for (i = 0; i < (dstW >> 1); i++) {
843 int Y1 = buf0[i * 2] >> 7;
844 int Y2 = buf0[i * 2 + 1] >> 7;
845 int U = ubuf1[i] >> 7;
846 int V = vbuf1[i] >> 7;
847 const uint8_t *r = (const uint8_t *) c->table_rV[V],
848 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
849 *b = (const uint8_t *) c->table_bU[U];
851 dest[ 0] = dest[ 1] = r_b[Y1];
852 dest[ 2] = dest[ 3] = g[Y1];
853 dest[ 4] = dest[ 5] = b_r[Y1];
854 dest[ 6] = dest[ 7] = r_b[Y2];
855 dest[ 8] = dest[ 9] = g[Y2];
856 dest[10] = dest[11] = b_r[Y2];
860 for (i = 0; i < (dstW >> 1); i++) {
861 int Y1 = buf0[i * 2] >> 7;
862 int Y2 = buf0[i * 2 + 1] >> 7;
863 int U = (ubuf0[i] + ubuf1[i]) >> 8;
864 int V = (vbuf0[i] + vbuf1[i]) >> 8;
865 const uint8_t *r = (const uint8_t *) c->table_rV[V],
866 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
867 *b = (const uint8_t *) c->table_bU[U];
869 dest[ 0] = dest[ 1] = r_b[Y1];
870 dest[ 2] = dest[ 3] = g[Y1];
871 dest[ 4] = dest[ 5] = b_r[Y1];
872 dest[ 6] = dest[ 7] = r_b[Y2];
873 dest[ 8] = dest[ 9] = g[Y2];
874 dest[10] = dest[11] = b_r[Y2];
883 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
884 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
885 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
886 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
888 static av_always_inline void
889 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
890 int U, int V, int A1, int A2,
891 const void *_r, const void *_g, const void *_b, int y,
892 enum PixelFormat target, int hasAlpha)
894 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
895 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
896 uint32_t *dest = (uint32_t *) _dest;
897 const uint32_t *r = (const uint32_t *) _r;
898 const uint32_t *g = (const uint32_t *) _g;
899 const uint32_t *b = (const uint32_t *) _b;
902 int sh = hasAlpha ? ((fmt == PIX_FMT_RGB32_1 || fmt == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
904 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
905 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
908 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
910 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
911 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
913 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
914 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
917 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
918 uint8_t *dest = (uint8_t *) _dest;
919 const uint8_t *r = (const uint8_t *) _r;
920 const uint8_t *g = (const uint8_t *) _g;
921 const uint8_t *b = (const uint8_t *) _b;
923 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
924 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
925 dest[i * 6 + 0] = r_b[Y1];
926 dest[i * 6 + 1] = g[Y1];
927 dest[i * 6 + 2] = b_r[Y1];
928 dest[i * 6 + 3] = r_b[Y2];
929 dest[i * 6 + 4] = g[Y2];
930 dest[i * 6 + 5] = b_r[Y2];
933 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
934 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
935 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
936 uint16_t *dest = (uint16_t *) _dest;
937 const uint16_t *r = (const uint16_t *) _r;
938 const uint16_t *g = (const uint16_t *) _g;
939 const uint16_t *b = (const uint16_t *) _b;
940 int dr1, dg1, db1, dr2, dg2, db2;
942 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
943 dr1 = dither_2x2_8[ y & 1 ][0];
944 dg1 = dither_2x2_4[ y & 1 ][0];
945 db1 = dither_2x2_8[(y & 1) ^ 1][0];
946 dr2 = dither_2x2_8[ y & 1 ][1];
947 dg2 = dither_2x2_4[ y & 1 ][1];
948 db2 = dither_2x2_8[(y & 1) ^ 1][1];
949 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
950 dr1 = dither_2x2_8[ y & 1 ][0];
951 dg1 = dither_2x2_8[ y & 1 ][1];
952 db1 = dither_2x2_8[(y & 1) ^ 1][0];
953 dr2 = dither_2x2_8[ y & 1 ][1];
954 dg2 = dither_2x2_8[ y & 1 ][0];
955 db2 = dither_2x2_8[(y & 1) ^ 1][1];
957 dr1 = dither_4x4_16[ y & 3 ][0];
958 dg1 = dither_4x4_16[ y & 3 ][1];
959 db1 = dither_4x4_16[(y & 3) ^ 3][0];
960 dr2 = dither_4x4_16[ y & 3 ][1];
961 dg2 = dither_4x4_16[ y & 3 ][0];
962 db2 = dither_4x4_16[(y & 3) ^ 3][1];
965 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
966 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
967 } else /* 8/4-bit */ {
968 uint8_t *dest = (uint8_t *) _dest;
969 const uint8_t *r = (const uint8_t *) _r;
970 const uint8_t *g = (const uint8_t *) _g;
971 const uint8_t *b = (const uint8_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
974 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
975 const uint8_t * const d64 = dither_8x8_73[y & 7];
976 const uint8_t * const d32 = dither_8x8_32[y & 7];
977 dr1 = dg1 = d32[(i * 2 + 0) & 7];
978 db1 = d64[(i * 2 + 0) & 7];
979 dr2 = dg2 = d32[(i * 2 + 1) & 7];
980 db2 = d64[(i * 2 + 1) & 7];
982 const uint8_t * const d64 = dither_8x8_73 [y & 7];
983 const uint8_t * const d128 = dither_8x8_220[y & 7];
984 dr1 = db1 = d128[(i * 2 + 0) & 7];
985 dg1 = d64[(i * 2 + 0) & 7];
986 dr2 = db2 = d128[(i * 2 + 1) & 7];
987 dg2 = d64[(i * 2 + 1) & 7];
990 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
991 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
992 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
994 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
995 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1000 static av_always_inline void
1001 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1002 const int16_t **lumSrc, int lumFilterSize,
1003 const int16_t *chrFilter, const int16_t **chrUSrc,
1004 const int16_t **chrVSrc, int chrFilterSize,
1005 const int16_t **alpSrc, uint8_t *dest, int dstW,
1006 int y, enum PixelFormat target, int hasAlpha)
1010 for (i = 0; i < (dstW >> 1); i++) {
1016 int av_unused A1, A2;
1017 const void *r, *g, *b;
1019 for (j = 0; j < lumFilterSize; j++) {
1020 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1021 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1023 for (j = 0; j < chrFilterSize; j++) {
1024 U += chrUSrc[j][i] * chrFilter[j];
1025 V += chrVSrc[j][i] * chrFilter[j];
1031 if ((Y1 | Y2 | U | V) & 0x100) {
1032 Y1 = av_clip_uint8(Y1);
1033 Y2 = av_clip_uint8(Y2);
1034 U = av_clip_uint8(U);
1035 V = av_clip_uint8(V);
1040 for (j = 0; j < lumFilterSize; j++) {
1041 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1042 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1046 if ((A1 | A2) & 0x100) {
1047 A1 = av_clip_uint8(A1);
1048 A2 = av_clip_uint8(A2);
1052 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1054 g = (c->table_gU[U] + c->table_gV[V]);
1057 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1058 r, g, b, y, target, hasAlpha);
1062 static av_always_inline void
1063 yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0,
1064 const uint16_t *buf1, const uint16_t *ubuf0,
1065 const uint16_t *ubuf1, const uint16_t *vbuf0,
1066 const uint16_t *vbuf1, const uint16_t *abuf0,
1067 const uint16_t *abuf1, uint8_t *dest, int dstW,
1068 int yalpha, int uvalpha, int y,
1069 enum PixelFormat target, int hasAlpha)
1071 int yalpha1 = 4095 - yalpha;
1072 int uvalpha1 = 4095 - uvalpha;
1075 for (i = 0; i < (dstW >> 1); i++) {
1076 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1077 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1078 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1079 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1081 const void *r = c->table_rV[V],
1082 *g = (c->table_gU[U] + c->table_gV[V]),
1083 *b = c->table_bU[U];
1086 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1087 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1090 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1091 r, g, b, y, target, hasAlpha);
1095 static av_always_inline void
1096 yuv2rgb_1_c_template(SwsContext *c, const uint16_t *buf0,
1097 const uint16_t *ubuf0, const uint16_t *ubuf1,
1098 const uint16_t *vbuf0, const uint16_t *vbuf1,
1099 const uint16_t *abuf0, uint8_t *dest, int dstW,
1100 int uvalpha, enum PixelFormat dstFormat,
1101 int flags, int y, enum PixelFormat target,
1106 if (uvalpha < 2048) {
1107 for (i = 0; i < (dstW >> 1); i++) {
1108 int Y1 = buf0[i * 2] >> 7;
1109 int Y2 = buf0[i * 2 + 1] >> 7;
1110 int U = ubuf1[i] >> 7;
1111 int V = vbuf1[i] >> 7;
1113 const void *r = c->table_rV[V],
1114 *g = (c->table_gU[U] + c->table_gV[V]),
1115 *b = c->table_bU[U];
1118 A1 = abuf0[i * 2 ] >> 7;
1119 A2 = abuf0[i * 2 + 1] >> 7;
1122 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1123 r, g, b, y, target, hasAlpha);
1126 for (i = 0; i < (dstW >> 1); i++) {
1127 int Y1 = buf0[i * 2] >> 7;
1128 int Y2 = buf0[i * 2 + 1] >> 7;
1129 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1130 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1132 const void *r = c->table_rV[V],
1133 *g = (c->table_gU[U] + c->table_gV[V]),
1134 *b = c->table_bU[U];
1137 A1 = abuf0[i * 2 ] >> 7;
1138 A2 = abuf0[i * 2 + 1] >> 7;
1141 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1142 r, g, b, y, target, hasAlpha);
1147 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1148 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1149 const int16_t **lumSrc, int lumFilterSize, \
1150 const int16_t *chrFilter, const int16_t **chrUSrc, \
1151 const int16_t **chrVSrc, int chrFilterSize, \
1152 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1155 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1156 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1157 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1159 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1160 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1161 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
1162 const uint16_t *buf1, const uint16_t *ubuf0, \
1163 const uint16_t *ubuf1, const uint16_t *vbuf0, \
1164 const uint16_t *vbuf1, const uint16_t *abuf0, \
1165 const uint16_t *abuf1, uint8_t *dest, int dstW, \
1166 int yalpha, int uvalpha, int y) \
1168 name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
1169 vbuf0, vbuf1, abuf0, abuf1, \
1170 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1173 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
1174 const uint16_t *ubuf0, const uint16_t *ubuf1, \
1175 const uint16_t *vbuf0, const uint16_t *vbuf1, \
1176 const uint16_t *abuf0, uint8_t *dest, int dstW, \
1177 int uvalpha, enum PixelFormat dstFormat, \
1180 name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
1181 vbuf1, abuf0, dest, dstW, uvalpha, \
1182 dstFormat, flags, y, fmt, hasAlpha); \
1186 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1187 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1189 #if CONFIG_SWSCALE_ALPHA
1190 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1191 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1193 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1194 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1196 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1197 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1198 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1199 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1200 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1201 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1202 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1203 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1205 static av_always_inline void
1206 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1207 const int16_t **lumSrc, int lumFilterSize,
1208 const int16_t *chrFilter, const int16_t **chrUSrc,
1209 const int16_t **chrVSrc, int chrFilterSize,
1210 const int16_t **alpSrc, uint8_t *dest,
1211 int dstW, int y, enum PixelFormat target, int hasAlpha)
1214 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1216 for (i = 0; i < dstW; i++) {
1224 for (j = 0; j < lumFilterSize; j++) {
1225 Y += lumSrc[j][i] * lumFilter[j];
1227 for (j = 0; j < chrFilterSize; j++) {
1228 U += chrUSrc[j][i] * chrFilter[j];
1229 V += chrVSrc[j][i] * chrFilter[j];
1236 for (j = 0; j < lumFilterSize; j++) {
1237 A += alpSrc[j][i] * lumFilter[j];
1241 A = av_clip_uint8(A);
1243 Y -= c->yuv2rgb_y_offset;
1244 Y *= c->yuv2rgb_y_coeff;
1246 R = Y + V*c->yuv2rgb_v2r_coeff;
1247 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1248 B = Y + U*c->yuv2rgb_u2b_coeff;
1249 if ((R | G | B) & 0xC0000000) {
1250 R = av_clip_uintp2(R, 30);
1251 G = av_clip_uintp2(G, 30);
1252 B = av_clip_uintp2(B, 30);
1257 dest[0] = hasAlpha ? A : 255;
1271 dest[3] = hasAlpha ? A : 255;
1274 dest[0] = hasAlpha ? A : 255;
1289 dest[3] = hasAlpha ? A : 255;
1297 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1298 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1299 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1300 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1302 #if CONFIG_SWSCALE_ALPHA
1303 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1305 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1306 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1308 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1309 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1310 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1311 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1313 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1314 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1316 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1317 int width, int height,
1321 uint8_t *ptr = plane + stride*y;
1322 for (i=0; i<height; i++) {
1323 memset(ptr, val, width);
1328 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1330 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1331 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1333 static av_always_inline void
1334 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
1335 enum PixelFormat origin)
1338 for (i = 0; i < width; i++) {
1339 int r_b = input_pixel(&src[i*6+0]) >> 8;
1340 int g = input_pixel(&src[i*6+2]) >> 8;
1341 int b_r = input_pixel(&src[i*6+4]) >> 8;
1343 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1347 static av_always_inline void
1348 rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1349 const uint8_t *src1, const uint8_t *src2,
1350 int width, enum PixelFormat origin)
1354 for (i = 0; i < width; i++) {
1355 int r_b = input_pixel(&src1[i*6+0]) >> 8;
1356 int g = input_pixel(&src1[i*6+2]) >> 8;
1357 int b_r = input_pixel(&src1[i*6+4]) >> 8;
1359 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1360 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364 static av_always_inline void
1365 rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1366 const uint8_t *src1, const uint8_t *src2,
1367 int width, enum PixelFormat origin)
1371 for (i = 0; i < width; i++) {
1372 int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
1373 int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
1374 int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
1376 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1377 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1385 #define rgb48funcs(pattern, BE_LE, origin) \
1386 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
1387 int width, uint32_t *unused) \
1389 rgb48ToY_c_template(dst, src, width, origin); \
1392 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1393 const uint8_t *src1, const uint8_t *src2, \
1394 int width, uint32_t *unused) \
1396 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1399 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1400 const uint8_t *src1, const uint8_t *src2, \
1401 int width, uint32_t *unused) \
1403 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1406 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1407 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1408 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1409 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1411 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1412 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1413 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1415 static av_always_inline void
1416 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1417 int width, enum PixelFormat origin,
1418 int shr, int shg, int shb, int shp,
1419 int maskr, int maskg, int maskb,
1420 int rsh, int gsh, int bsh, int S)
1422 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1423 rnd = 33 << (S - 1);
1426 for (i = 0; i < width; i++) {
1427 int px = input_pixel(i) >> shp;
1428 int b = (px & maskb) >> shb;
1429 int g = (px & maskg) >> shg;
1430 int r = (px & maskr) >> shr;
1432 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1436 static av_always_inline void
1437 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1438 const uint8_t *src, int width,
1439 enum PixelFormat origin,
1440 int shr, int shg, int shb, int shp,
1441 int maskr, int maskg, int maskb,
1442 int rsh, int gsh, int bsh, int S)
1444 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1445 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1446 rnd = 257 << (S - 1);
1449 for (i = 0; i < width; i++) {
1450 int px = input_pixel(i) >> shp;
1451 int b = (px & maskb) >> shb;
1452 int g = (px & maskg) >> shg;
1453 int r = (px & maskr) >> shr;
1455 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1456 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1460 static av_always_inline void
1461 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1462 const uint8_t *src, int width,
1463 enum PixelFormat origin,
1464 int shr, int shg, int shb, int shp,
1465 int maskr, int maskg, int maskb,
1466 int rsh, int gsh, int bsh, int S)
1468 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1469 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1470 rnd = 257 << S, maskgx = ~(maskr | maskb);
1473 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1474 for (i = 0; i < width; i++) {
1475 int px0 = input_pixel(2 * i + 0) >> shp;
1476 int px1 = input_pixel(2 * i + 1) >> shp;
1477 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1478 int rb = px0 + px1 - g;
1480 b = (rb & maskb) >> shb;
1481 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1482 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1485 g = (g & maskg) >> shg;
1487 r = (rb & maskr) >> shr;
1489 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1490 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1496 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1497 maskg, maskb, rsh, gsh, bsh, S) \
1498 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1499 int width, uint32_t *unused) \
1501 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1502 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1505 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1506 const uint8_t *src, const uint8_t *dummy, \
1507 int width, uint32_t *unused) \
1509 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1510 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1513 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1514 const uint8_t *src, const uint8_t *dummy, \
1515 int width, uint32_t *unused) \
1517 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1518 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1521 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1522 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1523 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1524 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1525 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1526 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1527 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1528 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1529 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1530 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1531 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1532 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1534 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1537 for (i=0; i<width; i++) {
1542 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1545 for (i=0; i<width; i++) {
1550 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1553 for (i=0; i<width; i++) {
1556 dst[i]= pal[d] & 0xFF;
1560 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1561 const uint8_t *src1, const uint8_t *src2,
1562 int width, uint32_t *pal)
1565 assert(src1 == src2);
1566 for (i=0; i<width; i++) {
1567 int p= pal[src1[i]];
1574 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1575 int width, uint32_t *unused)
1578 for (i=0; i<width/8; i++) {
1581 dst[8*i+j]= ((d>>(7-j))&1)*255;
1585 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1586 int width, uint32_t *unused)
1589 for (i=0; i<width/8; i++) {
1592 dst[8*i+j]= ((d>>(7-j))&1)*255;
1596 //FIXME yuy2* can read up to 7 samples too much
1598 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1602 for (i=0; i<width; i++)
1606 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1607 const uint8_t *src2, int width, uint32_t *unused)
1610 for (i=0; i<width; i++) {
1611 dstU[i]= src1[4*i + 1];
1612 dstV[i]= src1[4*i + 3];
1614 assert(src1 == src2);
1617 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1618 const uint8_t *src2, int width, uint32_t *unused)
1621 for (i=0; i<width; i++) {
1622 dstU[i]= src1[2*i + 1];
1623 dstV[i]= src2[2*i + 1];
1627 /* This is almost identical to the previous, end exists only because
1628 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1629 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1633 for (i=0; i<width; i++)
1637 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1638 const uint8_t *src2, int width, uint32_t *unused)
1641 for (i=0; i<width; i++) {
1642 dstU[i]= src1[4*i + 0];
1643 dstV[i]= src1[4*i + 2];
1645 assert(src1 == src2);
1648 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1649 const uint8_t *src2, int width, uint32_t *unused)
1652 for (i=0; i<width; i++) {
1658 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1659 const uint8_t *src, int width)
1662 for (i = 0; i < width; i++) {
1663 dst1[i] = src[2*i+0];
1664 dst2[i] = src[2*i+1];
1668 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1669 const uint8_t *src1, const uint8_t *src2,
1670 int width, uint32_t *unused)
1672 nvXXtoUV_c(dstU, dstV, src1, width);
1675 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1676 const uint8_t *src1, const uint8_t *src2,
1677 int width, uint32_t *unused)
1679 nvXXtoUV_c(dstV, dstU, src1, width);
1682 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1684 // FIXME Maybe dither instead.
1685 static av_always_inline void
1686 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1687 const uint8_t *_srcU, const uint8_t *_srcV,
1688 int width, enum PixelFormat origin, int depth)
1691 const uint16_t *srcU = (const uint16_t *) _srcU;
1692 const uint16_t *srcV = (const uint16_t *) _srcV;
1694 for (i = 0; i < width; i++) {
1695 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1696 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1700 static av_always_inline void
1701 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1702 int width, enum PixelFormat origin, int depth)
1705 const uint16_t *srcY = (const uint16_t*)_srcY;
1707 for (i = 0; i < width; i++)
1708 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1713 #define YUV_NBPS(depth, BE_LE, origin) \
1714 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1715 const uint8_t *srcU, const uint8_t *srcV, \
1716 int width, uint32_t *unused) \
1718 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1720 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1721 int width, uint32_t *unused) \
1723 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1726 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1727 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1728 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1729 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1731 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1732 int width, uint32_t *unused)
1735 for (i=0; i<width; i++) {
1740 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1744 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1745 const uint8_t *src2, int width, uint32_t *unused)
1748 for (i=0; i<width; i++) {
1749 int b= src1[3*i + 0];
1750 int g= src1[3*i + 1];
1751 int r= src1[3*i + 2];
1753 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1754 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1756 assert(src1 == src2);
1759 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1760 const uint8_t *src2, int width, uint32_t *unused)
1763 for (i=0; i<width; i++) {
1764 int b= src1[6*i + 0] + src1[6*i + 3];
1765 int g= src1[6*i + 1] + src1[6*i + 4];
1766 int r= src1[6*i + 2] + src1[6*i + 5];
1768 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1769 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1771 assert(src1 == src2);
1774 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1778 for (i=0; i<width; i++) {
1783 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1787 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1788 const uint8_t *src2, int width, uint32_t *unused)
1792 for (i=0; i<width; i++) {
1793 int r= src1[3*i + 0];
1794 int g= src1[3*i + 1];
1795 int b= src1[3*i + 2];
1797 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1798 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1802 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1803 const uint8_t *src2, int width, uint32_t *unused)
1807 for (i=0; i<width; i++) {
1808 int r= src1[6*i + 0] + src1[6*i + 3];
1809 int g= src1[6*i + 1] + src1[6*i + 4];
1810 int b= src1[6*i + 2] + src1[6*i + 5];
1812 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1813 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1817 // bilinear / bicubic scaling
1818 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1819 const int16_t *filter, const int16_t *filterPos,
1823 for (i=0; i<dstW; i++) {
1825 int srcPos= filterPos[i];
1827 for (j=0; j<filterSize; j++) {
1828 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1830 //filter += hFilterSize;
1831 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1836 //FIXME all pal and rgb srcFormats could do this convertion as well
1837 //FIXME all scalers more complex than bilinear could do half of this transform
1838 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1841 for (i = 0; i < width; i++) {
1842 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1843 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1846 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1849 for (i = 0; i < width; i++) {
1850 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1851 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1854 static void lumRangeToJpeg_c(int16_t *dst, int width)
1857 for (i = 0; i < width; i++)
1858 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1860 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1863 for (i = 0; i < width; i++)
1864 dst[i] = (dst[i]*14071 + 33561947)>>14;
1867 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1868 const uint8_t *src, int srcW, int xInc)
1871 unsigned int xpos=0;
1872 for (i=0;i<dstWidth;i++) {
1873 register unsigned int xx=xpos>>16;
1874 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1875 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1880 // *** horizontal scale Y line to temp buffer
1881 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1882 const uint8_t *src, int srcW, int xInc,
1883 const int16_t *hLumFilter,
1884 const int16_t *hLumFilterPos, int hLumFilterSize,
1885 uint8_t *formatConvBuffer,
1886 uint32_t *pal, int isAlpha)
1888 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1889 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1892 toYV12(formatConvBuffer, src, srcW, pal);
1893 src= formatConvBuffer;
1896 if (!c->hyscale_fast) {
1897 c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1898 } else { // fast bilinear upscale / crap downscale
1899 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1903 convertRange(dst, dstWidth);
1906 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1907 int dstWidth, const uint8_t *src1,
1908 const uint8_t *src2, int srcW, int xInc)
1911 unsigned int xpos=0;
1912 for (i=0;i<dstWidth;i++) {
1913 register unsigned int xx=xpos>>16;
1914 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1915 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1916 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1921 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1922 const uint8_t *src1, const uint8_t *src2,
1923 int srcW, int xInc, const int16_t *hChrFilter,
1924 const int16_t *hChrFilterPos, int hChrFilterSize,
1925 uint8_t *formatConvBuffer, uint32_t *pal)
1928 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1929 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1930 src1= formatConvBuffer;
1934 if (!c->hcscale_fast) {
1935 c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
1936 c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
1937 } else { // fast bilinear upscale / crap downscale
1938 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1941 if (c->chrConvertRange)
1942 c->chrConvertRange(dst1, dst2, dstWidth);
1945 static av_always_inline void
1946 find_c_packed_planar_out_funcs(SwsContext *c,
1947 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1948 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1949 yuv2packedX_fn *yuv2packedX)
1951 enum PixelFormat dstFormat = c->dstFormat;
1953 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1954 *yuv2yuvX = yuv2nv12X_c;
1955 } else if (is16BPS(dstFormat)) {
1956 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1957 } else if (is9_OR_10BPS(dstFormat)) {
1958 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
1959 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1961 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1964 *yuv2yuv1 = yuv2yuv1_c;
1965 *yuv2yuvX = yuv2yuvX_c;
1967 if(c->flags & SWS_FULL_CHR_H_INT) {
1968 switch (dstFormat) {
1971 *yuv2packedX = yuv2rgba32_full_X_c;
1973 #if CONFIG_SWSCALE_ALPHA
1975 *yuv2packedX = yuv2rgba32_full_X_c;
1977 #endif /* CONFIG_SWSCALE_ALPHA */
1979 *yuv2packedX = yuv2rgbx32_full_X_c;
1981 #endif /* !CONFIG_SMALL */
1985 *yuv2packedX = yuv2argb32_full_X_c;
1987 #if CONFIG_SWSCALE_ALPHA
1989 *yuv2packedX = yuv2argb32_full_X_c;
1991 #endif /* CONFIG_SWSCALE_ALPHA */
1993 *yuv2packedX = yuv2xrgb32_full_X_c;
1995 #endif /* !CONFIG_SMALL */
1999 *yuv2packedX = yuv2bgra32_full_X_c;
2001 #if CONFIG_SWSCALE_ALPHA
2003 *yuv2packedX = yuv2bgra32_full_X_c;
2005 #endif /* CONFIG_SWSCALE_ALPHA */
2007 *yuv2packedX = yuv2bgrx32_full_X_c;
2009 #endif /* !CONFIG_SMALL */
2013 *yuv2packedX = yuv2abgr32_full_X_c;
2015 #if CONFIG_SWSCALE_ALPHA
2017 *yuv2packedX = yuv2abgr32_full_X_c;
2019 #endif /* CONFIG_SWSCALE_ALPHA */
2021 *yuv2packedX = yuv2xbgr32_full_X_c;
2023 #endif /* !CONFIG_SMALL */
2026 *yuv2packedX = yuv2rgb24_full_X_c;
2029 *yuv2packedX = yuv2bgr24_full_X_c;
2033 switch (dstFormat) {
2034 case PIX_FMT_GRAY16BE:
2035 *yuv2packed1 = yuv2gray16BE_1_c;
2036 *yuv2packed2 = yuv2gray16BE_2_c;
2037 *yuv2packedX = yuv2gray16BE_X_c;
2039 case PIX_FMT_GRAY16LE:
2040 *yuv2packed1 = yuv2gray16LE_1_c;
2041 *yuv2packed2 = yuv2gray16LE_2_c;
2042 *yuv2packedX = yuv2gray16LE_X_c;
2044 case PIX_FMT_MONOWHITE:
2045 *yuv2packed1 = yuv2monowhite_1_c;
2046 *yuv2packed2 = yuv2monowhite_2_c;
2047 *yuv2packedX = yuv2monowhite_X_c;
2049 case PIX_FMT_MONOBLACK:
2050 *yuv2packed1 = yuv2monoblack_1_c;
2051 *yuv2packed2 = yuv2monoblack_2_c;
2052 *yuv2packedX = yuv2monoblack_X_c;
2054 case PIX_FMT_YUYV422:
2055 *yuv2packed1 = yuv2yuyv422_1_c;
2056 *yuv2packed2 = yuv2yuyv422_2_c;
2057 *yuv2packedX = yuv2yuyv422_X_c;
2059 case PIX_FMT_UYVY422:
2060 *yuv2packed1 = yuv2uyvy422_1_c;
2061 *yuv2packed2 = yuv2uyvy422_2_c;
2062 *yuv2packedX = yuv2uyvy422_X_c;
2064 case PIX_FMT_RGB48LE:
2065 //*yuv2packed1 = yuv2rgb48le_1_c;
2066 //*yuv2packed2 = yuv2rgb48le_2_c;
2067 //*yuv2packedX = yuv2rgb48le_X_c;
2069 case PIX_FMT_RGB48BE:
2070 *yuv2packed1 = yuv2rgb48be_1_c;
2071 *yuv2packed2 = yuv2rgb48be_2_c;
2072 *yuv2packedX = yuv2rgb48be_X_c;
2074 case PIX_FMT_BGR48LE:
2075 //*yuv2packed1 = yuv2bgr48le_1_c;
2076 //*yuv2packed2 = yuv2bgr48le_2_c;
2077 //*yuv2packedX = yuv2bgr48le_X_c;
2079 case PIX_FMT_BGR48BE:
2080 *yuv2packed1 = yuv2bgr48be_1_c;
2081 *yuv2packed2 = yuv2bgr48be_2_c;
2082 *yuv2packedX = yuv2bgr48be_X_c;
2087 *yuv2packed1 = yuv2rgb32_1_c;
2088 *yuv2packed2 = yuv2rgb32_2_c;
2089 *yuv2packedX = yuv2rgb32_X_c;
2091 #if CONFIG_SWSCALE_ALPHA
2093 *yuv2packed1 = yuv2rgba32_1_c;
2094 *yuv2packed2 = yuv2rgba32_2_c;
2095 *yuv2packedX = yuv2rgba32_X_c;
2097 #endif /* CONFIG_SWSCALE_ALPHA */
2099 *yuv2packed1 = yuv2rgbx32_1_c;
2100 *yuv2packed2 = yuv2rgbx32_2_c;
2101 *yuv2packedX = yuv2rgbx32_X_c;
2103 #endif /* !CONFIG_SMALL */
2105 case PIX_FMT_RGB32_1:
2106 case PIX_FMT_BGR32_1:
2108 *yuv2packed1 = yuv2rgb32_1_1_c;
2109 *yuv2packed2 = yuv2rgb32_1_2_c;
2110 *yuv2packedX = yuv2rgb32_1_X_c;
2112 #if CONFIG_SWSCALE_ALPHA
2114 *yuv2packed1 = yuv2rgba32_1_1_c;
2115 *yuv2packed2 = yuv2rgba32_1_2_c;
2116 *yuv2packedX = yuv2rgba32_1_X_c;
2118 #endif /* CONFIG_SWSCALE_ALPHA */
2120 *yuv2packed1 = yuv2rgbx32_1_1_c;
2121 *yuv2packed2 = yuv2rgbx32_1_2_c;
2122 *yuv2packedX = yuv2rgbx32_1_X_c;
2124 #endif /* !CONFIG_SMALL */
2127 *yuv2packed1 = yuv2rgb24_1_c;
2128 *yuv2packed2 = yuv2rgb24_2_c;
2129 *yuv2packedX = yuv2rgb24_X_c;
2132 *yuv2packed1 = yuv2bgr24_1_c;
2133 *yuv2packed2 = yuv2bgr24_2_c;
2134 *yuv2packedX = yuv2bgr24_X_c;
2136 case PIX_FMT_RGB565:
2137 case PIX_FMT_BGR565:
2138 *yuv2packed1 = yuv2rgb16_1_c;
2139 *yuv2packed2 = yuv2rgb16_2_c;
2140 *yuv2packedX = yuv2rgb16_X_c;
2142 case PIX_FMT_RGB555:
2143 case PIX_FMT_BGR555:
2144 *yuv2packed1 = yuv2rgb15_1_c;
2145 *yuv2packed2 = yuv2rgb15_2_c;
2146 *yuv2packedX = yuv2rgb15_X_c;
2148 case PIX_FMT_RGB444:
2149 case PIX_FMT_BGR444:
2150 *yuv2packed1 = yuv2rgb12_1_c;
2151 *yuv2packed2 = yuv2rgb12_2_c;
2152 *yuv2packedX = yuv2rgb12_X_c;
2156 *yuv2packed1 = yuv2rgb8_1_c;
2157 *yuv2packed2 = yuv2rgb8_2_c;
2158 *yuv2packedX = yuv2rgb8_X_c;
2162 *yuv2packed1 = yuv2rgb4_1_c;
2163 *yuv2packed2 = yuv2rgb4_2_c;
2164 *yuv2packedX = yuv2rgb4_X_c;
2166 case PIX_FMT_RGB4_BYTE:
2167 case PIX_FMT_BGR4_BYTE:
2168 *yuv2packed1 = yuv2rgb4b_1_c;
2169 *yuv2packed2 = yuv2rgb4b_2_c;
2170 *yuv2packedX = yuv2rgb4b_X_c;
2176 #define DEBUG_SWSCALE_BUFFERS 0
2177 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2179 static int swScale(SwsContext *c, const uint8_t* src[],
2180 int srcStride[], int srcSliceY,
2181 int srcSliceH, uint8_t* dst[], int dstStride[])
2183 /* load a few things into local vars to make the code more readable? and faster */
2184 const int srcW= c->srcW;
2185 const int dstW= c->dstW;
2186 const int dstH= c->dstH;
2187 const int chrDstW= c->chrDstW;
2188 const int chrSrcW= c->chrSrcW;
2189 const int lumXInc= c->lumXInc;
2190 const int chrXInc= c->chrXInc;
2191 const enum PixelFormat dstFormat= c->dstFormat;
2192 const int flags= c->flags;
2193 int16_t *vLumFilterPos= c->vLumFilterPos;
2194 int16_t *vChrFilterPos= c->vChrFilterPos;
2195 int16_t *hLumFilterPos= c->hLumFilterPos;
2196 int16_t *hChrFilterPos= c->hChrFilterPos;
2197 int16_t *vLumFilter= c->vLumFilter;
2198 int16_t *vChrFilter= c->vChrFilter;
2199 int16_t *hLumFilter= c->hLumFilter;
2200 int16_t *hChrFilter= c->hChrFilter;
2201 int32_t *lumMmxFilter= c->lumMmxFilter;
2202 int32_t *chrMmxFilter= c->chrMmxFilter;
2203 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2204 const int vLumFilterSize= c->vLumFilterSize;
2205 const int vChrFilterSize= c->vChrFilterSize;
2206 const int hLumFilterSize= c->hLumFilterSize;
2207 const int hChrFilterSize= c->hChrFilterSize;
2208 int16_t **lumPixBuf= c->lumPixBuf;
2209 int16_t **chrUPixBuf= c->chrUPixBuf;
2210 int16_t **chrVPixBuf= c->chrVPixBuf;
2211 int16_t **alpPixBuf= c->alpPixBuf;
2212 const int vLumBufSize= c->vLumBufSize;
2213 const int vChrBufSize= c->vChrBufSize;
2214 uint8_t *formatConvBuffer= c->formatConvBuffer;
2215 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2216 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2218 uint32_t *pal=c->pal_yuv;
2219 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2220 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2221 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2222 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2223 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2225 /* vars which will change and which we need to store back in the context */
2227 int lumBufIndex= c->lumBufIndex;
2228 int chrBufIndex= c->chrBufIndex;
2229 int lastInLumBuf= c->lastInLumBuf;
2230 int lastInChrBuf= c->lastInChrBuf;
2232 if (isPacked(c->srcFormat)) {
2240 srcStride[3]= srcStride[0];
2242 srcStride[1]<<= c->vChrDrop;
2243 srcStride[2]<<= c->vChrDrop;
2245 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2246 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2247 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2248 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2249 srcSliceY, srcSliceH, dstY, dstH);
2250 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2251 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2253 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2254 static int warnedAlready=0; //FIXME move this into the context perhaps
2255 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2256 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2257 " ->cannot do aligned memory accesses anymore\n");
2262 /* Note the user might start scaling the picture in the middle so this
2263 will not get executed. This is not really intended but works
2264 currently, so people might do it. */
2265 if (srcSliceY ==0) {
2275 for (;dstY < dstH; dstY++) {
2276 unsigned char *dest =dst[0]+dstStride[0]*dstY;
2277 const int chrDstY= dstY>>c->chrDstVSubSample;
2278 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2279 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
2280 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2282 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2283 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2284 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2285 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2286 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2287 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2290 //handle holes (FAST_BILINEAR & weird filters)
2291 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2292 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2293 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2294 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2296 DEBUG_BUFFERS("dstY: %d\n", dstY);
2297 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2298 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2299 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2300 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2302 // Do we have enough lines in this slice to output the dstY line
2303 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2305 if (!enough_lines) {
2306 lastLumSrcY = srcSliceY + srcSliceH - 1;
2307 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2308 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2309 lastLumSrcY, lastChrSrcY);
2312 //Do horizontal scaling
2313 while(lastInLumBuf < lastLumSrcY) {
2314 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2315 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2317 assert(lumBufIndex < 2*vLumBufSize);
2318 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2319 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2320 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2321 hLumFilter, hLumFilterPos, hLumFilterSize,
2324 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2325 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2326 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2330 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2331 lumBufIndex, lastInLumBuf);
2333 while(lastInChrBuf < lastChrSrcY) {
2334 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2335 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2337 assert(chrBufIndex < 2*vChrBufSize);
2338 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2339 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2340 //FIXME replace parameters through context struct (some at least)
2342 if (c->needs_hcscale)
2343 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2344 chrDstW, src1, src2, chrSrcW, chrXInc,
2345 hChrFilter, hChrFilterPos, hChrFilterSize,
2346 formatConvBuffer, pal);
2348 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2349 chrBufIndex, lastInChrBuf);
2351 //wrap buf index around to stay inside the ring buffer
2352 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2353 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2355 break; //we can't output a dstY line so let's try with the next slice
2358 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2360 if (dstY >= dstH-2) {
2361 // hmm looks like we can't use MMX here without overwriting this array's tail
2362 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2363 &yuv2packed1, &yuv2packed2,
2368 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2369 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2370 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2371 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2372 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2373 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2374 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
2375 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2376 const int16_t *lumBuf = lumSrcPtr[0];
2377 const int16_t *chrUBuf= chrUSrcPtr[0];
2378 const int16_t *chrVBuf= chrVSrcPtr[0];
2379 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2380 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
2381 uDest, vDest, aDest, dstW, chrDstW);
2382 } else { //General YV12
2384 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2385 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
2386 chrVSrcPtr, vChrFilterSize,
2387 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2390 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2391 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2392 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2393 int chrAlpha= vChrFilter[2*dstY+1];
2394 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
2395 *chrVSrcPtr, *(chrVSrcPtr+1),
2396 alpPixBuf ? *alpSrcPtr : NULL,
2397 dest, dstW, chrAlpha, dstFormat, flags, dstY);
2398 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2399 int lumAlpha= vLumFilter[2*dstY+1];
2400 int chrAlpha= vChrFilter[2*dstY+1];
2402 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2404 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
2405 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
2406 *chrVSrcPtr, *(chrVSrcPtr+1),
2407 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2408 dest, dstW, lumAlpha, chrAlpha, dstY);
2409 } else { //general RGB
2411 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2412 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2413 alpSrcPtr, dest, dstW, dstY);
2419 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2420 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2423 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2424 __asm__ volatile("sfence":::"memory");
2428 /* store changed local vars back in the context */
2430 c->lumBufIndex= lumBufIndex;
2431 c->chrBufIndex= chrBufIndex;
2432 c->lastInLumBuf= lastInLumBuf;
2433 c->lastInChrBuf= lastInChrBuf;
2435 return dstY - lastDstY;
2438 static av_cold void sws_init_swScale_c(SwsContext *c)
2440 enum PixelFormat srcFormat = c->srcFormat;
2442 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2443 &c->yuv2packed1, &c->yuv2packed2,
2446 c->hScale = hScale_c;
2448 if (c->flags & SWS_FAST_BILINEAR) {
2449 c->hyscale_fast = hyscale_fast_c;
2450 c->hcscale_fast = hcscale_fast_c;
2453 c->chrToYV12 = NULL;
2455 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2456 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2457 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2458 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2462 case PIX_FMT_BGR4_BYTE:
2463 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2464 case PIX_FMT_YUV444P9BE:
2465 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
2466 case PIX_FMT_YUV444P9LE:
2467 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
2468 case PIX_FMT_YUV444P10BE:
2469 case PIX_FMT_YUV422P10BE:
2470 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
2471 case PIX_FMT_YUV422P10LE:
2472 case PIX_FMT_YUV444P10LE:
2473 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
2474 case PIX_FMT_YUV420P16BE:
2475 case PIX_FMT_YUV422P16BE:
2476 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
2477 case PIX_FMT_YUV420P16LE:
2478 case PIX_FMT_YUV422P16LE:
2479 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
2481 if (c->chrSrcHSubSample) {
2483 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2484 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2485 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2486 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2487 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2488 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2489 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2490 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2491 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2492 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2493 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2494 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2495 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2496 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2497 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2498 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2499 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2500 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2504 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2505 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2506 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2507 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2508 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2509 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2510 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2511 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2512 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2513 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2514 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2515 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2516 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2517 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2518 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2519 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2520 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2521 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2525 c->lumToYV12 = NULL;
2526 c->alpToYV12 = NULL;
2527 switch (srcFormat) {
2528 case PIX_FMT_YUV444P9BE:
2529 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2530 case PIX_FMT_YUV444P9LE:
2531 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2532 case PIX_FMT_YUV444P10BE:
2533 case PIX_FMT_YUV422P10BE:
2534 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2535 case PIX_FMT_YUV444P10LE:
2536 case PIX_FMT_YUV422P10LE:
2537 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2538 case PIX_FMT_YUYV422 :
2539 case PIX_FMT_YUV420P16BE:
2540 case PIX_FMT_YUV422P16BE:
2541 case PIX_FMT_YUV444P16BE:
2542 case PIX_FMT_Y400A :
2543 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2544 case PIX_FMT_UYVY422 :
2545 case PIX_FMT_YUV420P16LE:
2546 case PIX_FMT_YUV422P16LE:
2547 case PIX_FMT_YUV444P16LE:
2548 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2549 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2550 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2551 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2552 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2553 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2554 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2555 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2556 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2557 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2558 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2562 case PIX_FMT_BGR4_BYTE:
2563 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2564 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2565 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2566 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2567 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2568 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2569 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2570 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2571 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2572 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2573 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2576 switch (srcFormat) {
2578 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2580 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2581 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2585 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2587 c->lumConvertRange = lumRangeFromJpeg_c;
2588 c->chrConvertRange = chrRangeFromJpeg_c;
2590 c->lumConvertRange = lumRangeToJpeg_c;
2591 c->chrConvertRange = chrRangeToJpeg_c;
2595 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2596 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2597 c->needs_hcscale = 1;
2600 SwsFunc ff_getSwsFunc(SwsContext *c)
2602 sws_init_swScale_c(c);
2605 ff_sws_init_swScale_mmx(c);
2607 ff_sws_init_swScale_altivec(c);