2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
289 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
290 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
291 enum PixelFormat dstFormat)
293 #define conv16(bits) \
294 if (isBE(dstFormat)) { \
295 yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
296 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
298 dest, uDest, vDest, aDest, \
301 yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
302 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
304 dest, uDest, vDest, aDest, \
307 if (is16BPS(dstFormat)) {
309 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
317 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
318 const int16_t **lumSrc, int lumFilterSize,
319 const int16_t *chrFilter, const int16_t **chrUSrc,
320 const int16_t **chrVSrc,
321 int chrFilterSize, const int16_t **alpSrc,
322 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
323 uint8_t *aDest, int dstW, int chrDstW)
325 //FIXME Optimize (just quickly written not optimized..)
327 for (i=0; i<dstW; i++) {
330 for (j=0; j<lumFilterSize; j++)
331 val += lumSrc[j][i] * lumFilter[j];
333 dest[i]= av_clip_uint8(val>>19);
337 for (i=0; i<chrDstW; i++) {
341 for (j=0; j<chrFilterSize; j++) {
342 u += chrUSrc[j][i] * chrFilter[j];
343 v += chrVSrc[j][i] * chrFilter[j];
346 uDest[i]= av_clip_uint8(u>>19);
347 vDest[i]= av_clip_uint8(v>>19);
350 if (CONFIG_SWSCALE_ALPHA && aDest)
351 for (i=0; i<dstW; i++) {
354 for (j=0; j<lumFilterSize; j++)
355 val += alpSrc[j][i] * lumFilter[j];
357 aDest[i]= av_clip_uint8(val>>19);
362 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
363 const int16_t **lumSrc, int lumFilterSize,
364 const int16_t *chrFilter, const int16_t **chrUSrc,
365 const int16_t **chrVSrc, int chrFilterSize,
366 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
367 uint8_t *vDest, uint8_t *aDest,
368 int dstW, int chrDstW)
370 enum PixelFormat dstFormat = c->dstFormat;
372 //FIXME Optimize (just quickly written not optimized..)
374 for (i=0; i<dstW; i++) {
377 for (j=0; j<lumFilterSize; j++)
378 val += lumSrc[j][i] * lumFilter[j];
380 dest[i]= av_clip_uint8(val>>19);
386 if (dstFormat == PIX_FMT_NV12)
387 for (i=0; i<chrDstW; i++) {
391 for (j=0; j<chrFilterSize; j++) {
392 u += chrUSrc[j][i] * chrFilter[j];
393 v += chrVSrc[j][i] * chrFilter[j];
396 uDest[2*i]= av_clip_uint8(u>>19);
397 uDest[2*i+1]= av_clip_uint8(v>>19);
400 for (i=0; i<chrDstW; i++) {
404 for (j=0; j<chrFilterSize; j++) {
405 u += chrUSrc[j][i] * chrFilter[j];
406 v += chrVSrc[j][i] * chrFilter[j];
409 uDest[2*i]= av_clip_uint8(v>>19);
410 uDest[2*i+1]= av_clip_uint8(u>>19);
414 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
415 for (i=0; i<(dstW>>1); i++) {\
421 int av_unused A1, A2;\
422 type av_unused *r, *b, *g;\
425 for (j=0; j<lumFilterSize; j++) {\
426 Y1 += lumSrc[j][i2] * lumFilter[j];\
427 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
429 for (j=0; j<chrFilterSize; j++) {\
430 U += chrUSrc[j][i] * chrFilter[j];\
431 V += chrVSrc[j][i] * chrFilter[j];\
440 for (j=0; j<lumFilterSize; j++) {\
441 A1 += alpSrc[j][i2 ] * lumFilter[j];\
442 A2 += alpSrc[j][i2+1] * lumFilter[j];\
448 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
449 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
450 if ((Y1|Y2|U|V)&256) {\
451 if (Y1>255) Y1=255; \
452 else if (Y1<0)Y1=0; \
453 if (Y2>255) Y2=255; \
454 else if (Y2<0)Y2=0; \
460 if (alpha && ((A1|A2)&256)) {\
461 A1=av_clip_uint8(A1);\
462 A2=av_clip_uint8(A2);\
465 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
466 for (i=0; i<dstW; i++) {\
474 for (j=0; j<lumFilterSize; j++) {\
475 Y += lumSrc[j][i ] * lumFilter[j];\
477 for (j=0; j<chrFilterSize; j++) {\
478 U += chrUSrc[j][i] * chrFilter[j];\
479 V += chrVSrc[j][i] * chrFilter[j];\
486 for (j=0; j<lumFilterSize; j++)\
487 A += alpSrc[j][i ] * lumFilter[j];\
490 A = av_clip_uint8(A);\
493 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
494 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
495 Y-= c->yuv2rgb_y_offset;\
496 Y*= c->yuv2rgb_y_coeff;\
498 R= Y + V*c->yuv2rgb_v2r_coeff;\
499 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
500 B= Y + U*c->yuv2rgb_u2b_coeff;\
501 if ((R|G|B)&(0xC0000000)) {\
502 if (R>=(256<<22)) R=(256<<22)-1; \
504 if (G>=(256<<22)) G=(256<<22)-1; \
506 if (B>=(256<<22)) B=(256<<22)-1; \
510 #define YSCALE_YUV_2_GRAY16_C \
511 for (i=0; i<(dstW>>1); i++) {\
520 for (j=0; j<lumFilterSize; j++) {\
521 Y1 += lumSrc[j][i2] * lumFilter[j];\
522 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
526 if ((Y1|Y2|U|V)&65536) {\
527 if (Y1>65535) Y1=65535; \
528 else if (Y1<0)Y1=0; \
529 if (Y2>65535) Y2=65535; \
530 else if (Y2<0)Y2=0; \
533 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
534 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
535 r = (type *)c->table_rV[V]; \
536 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
537 b = (type *)c->table_bU[U];
539 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
540 for (i=0; i<(dstW>>1); i++) { \
542 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
543 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
544 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
545 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
546 type av_unused *r, *b, *g; \
547 int av_unused A1, A2; \
549 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
550 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
553 #define YSCALE_YUV_2_GRAY16_2_C \
554 for (i=0; i<(dstW>>1); i++) { \
556 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
557 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
559 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
560 YSCALE_YUV_2_PACKED2_C(type,alpha)\
561 r = (type *)c->table_rV[V];\
562 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
563 b = (type *)c->table_bU[U];
565 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
566 for (i=0; i<(dstW>>1); i++) {\
568 int Y1= buf0[i2 ]>>7;\
569 int Y2= buf0[i2+1]>>7;\
570 int U= (ubuf1[i])>>7;\
571 int V= (vbuf1[i])>>7;\
572 type av_unused *r, *b, *g;\
573 int av_unused A1, A2;\
579 #define YSCALE_YUV_2_GRAY16_1_C \
580 for (i=0; i<(dstW>>1); i++) {\
582 int Y1= buf0[i2 ]<<1;\
583 int Y2= buf0[i2+1]<<1;
585 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
586 YSCALE_YUV_2_PACKED1_C(type,alpha)\
587 r = (type *)c->table_rV[V];\
588 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
589 b = (type *)c->table_bU[U];
591 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
592 for (i=0; i<(dstW>>1); i++) {\
594 int Y1= buf0[i2 ]>>7;\
595 int Y2= buf0[i2+1]>>7;\
596 int U= (ubuf0[i] + ubuf1[i])>>8;\
597 int V= (vbuf0[i] + vbuf1[i])>>8;\
598 type av_unused *r, *b, *g;\
599 int av_unused A1, A2;\
605 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
606 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
607 r = (type *)c->table_rV[V];\
608 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
609 b = (type *)c->table_bU[U];
611 #define YSCALE_YUV_2_MONO2_C \
612 const uint8_t * const d128=dither_8x8_220[y&7];\
613 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
614 for (i=0; i<dstW-7; i+=8) {\
616 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
617 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
618 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
619 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
620 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
621 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
622 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
623 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
624 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
628 #define YSCALE_YUV_2_MONOX_C \
629 const uint8_t * const d128=dither_8x8_220[y&7];\
630 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
632 for (i=0; i<dstW-1; i+=2) {\
637 for (j=0; j<lumFilterSize; j++) {\
638 Y1 += lumSrc[j][i] * lumFilter[j];\
639 Y2 += lumSrc[j][i+1] * lumFilter[j];\
649 acc+= acc + g[Y1+d128[(i+0)&7]];\
650 acc+= acc + g[Y2+d128[(i+1)&7]];\
652 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
657 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
658 switch(c->dstFormat) {\
659 case PIX_FMT_RGB48BE:\
660 case PIX_FMT_RGB48LE:\
662 ((uint8_t*)dest)[ 0]= r[Y1];\
663 ((uint8_t*)dest)[ 1]= r[Y1];\
664 ((uint8_t*)dest)[ 2]= g[Y1];\
665 ((uint8_t*)dest)[ 3]= g[Y1];\
666 ((uint8_t*)dest)[ 4]= b[Y1];\
667 ((uint8_t*)dest)[ 5]= b[Y1];\
668 ((uint8_t*)dest)[ 6]= r[Y2];\
669 ((uint8_t*)dest)[ 7]= r[Y2];\
670 ((uint8_t*)dest)[ 8]= g[Y2];\
671 ((uint8_t*)dest)[ 9]= g[Y2];\
672 ((uint8_t*)dest)[10]= b[Y2];\
673 ((uint8_t*)dest)[11]= b[Y2];\
677 case PIX_FMT_BGR48BE:\
678 case PIX_FMT_BGR48LE:\
680 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
681 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
682 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
683 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
684 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
685 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
692 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
693 func(uint32_t,needAlpha)\
694 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
695 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
698 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
700 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
701 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
705 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
706 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
714 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
715 func(uint32_t,needAlpha)\
716 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
717 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
720 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
722 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
723 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
727 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
728 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
735 ((uint8_t*)dest)[0]= r[Y1];\
736 ((uint8_t*)dest)[1]= g[Y1];\
737 ((uint8_t*)dest)[2]= b[Y1];\
738 ((uint8_t*)dest)[3]= r[Y2];\
739 ((uint8_t*)dest)[4]= g[Y2];\
740 ((uint8_t*)dest)[5]= b[Y2];\
746 ((uint8_t*)dest)[0]= b[Y1];\
747 ((uint8_t*)dest)[1]= g[Y1];\
748 ((uint8_t*)dest)[2]= r[Y1];\
749 ((uint8_t*)dest)[3]= b[Y2];\
750 ((uint8_t*)dest)[4]= g[Y2];\
751 ((uint8_t*)dest)[5]= r[Y2];\
755 case PIX_FMT_RGB565BE:\
756 case PIX_FMT_RGB565LE:\
757 case PIX_FMT_BGR565BE:\
758 case PIX_FMT_BGR565LE:\
760 const int dr1= dither_2x2_8[y&1 ][0];\
761 const int dg1= dither_2x2_4[y&1 ][0];\
762 const int db1= dither_2x2_8[(y&1)^1][0];\
763 const int dr2= dither_2x2_8[y&1 ][1];\
764 const int dg2= dither_2x2_4[y&1 ][1];\
765 const int db2= dither_2x2_8[(y&1)^1][1];\
767 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
768 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
772 case PIX_FMT_RGB555BE:\
773 case PIX_FMT_RGB555LE:\
774 case PIX_FMT_BGR555BE:\
775 case PIX_FMT_BGR555LE:\
777 const int dr1= dither_2x2_8[y&1 ][0];\
778 const int dg1= dither_2x2_8[y&1 ][1];\
779 const int db1= dither_2x2_8[(y&1)^1][0];\
780 const int dr2= dither_2x2_8[y&1 ][1];\
781 const int dg2= dither_2x2_8[y&1 ][0];\
782 const int db2= dither_2x2_8[(y&1)^1][1];\
784 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
785 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
789 case PIX_FMT_RGB444BE:\
790 case PIX_FMT_RGB444LE:\
791 case PIX_FMT_BGR444BE:\
792 case PIX_FMT_BGR444LE:\
794 const int dr1= dither_4x4_16[y&3 ][0];\
795 const int dg1= dither_4x4_16[y&3 ][1];\
796 const int db1= dither_4x4_16[(y&3)^3][0];\
797 const int dr2= dither_4x4_16[y&3 ][1];\
798 const int dg2= dither_4x4_16[y&3 ][0];\
799 const int db2= dither_4x4_16[(y&3)^3][1];\
801 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
802 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
809 const uint8_t * const d64= dither_8x8_73[y&7];\
810 const uint8_t * const d32= dither_8x8_32[y&7];\
812 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
813 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
820 const uint8_t * const d64= dither_8x8_73 [y&7];\
821 const uint8_t * const d128=dither_8x8_220[y&7];\
823 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
824 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
828 case PIX_FMT_RGB4_BYTE:\
829 case PIX_FMT_BGR4_BYTE:\
831 const uint8_t * const d64= dither_8x8_73 [y&7];\
832 const uint8_t * const d128=dither_8x8_220[y&7];\
834 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
835 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
839 case PIX_FMT_MONOBLACK:\
840 case PIX_FMT_MONOWHITE:\
845 case PIX_FMT_YUYV422:\
847 ((uint8_t*)dest)[2*i2+0]= Y1;\
848 ((uint8_t*)dest)[2*i2+1]= U;\
849 ((uint8_t*)dest)[2*i2+2]= Y2;\
850 ((uint8_t*)dest)[2*i2+3]= V;\
853 case PIX_FMT_UYVY422:\
855 ((uint8_t*)dest)[2*i2+0]= U;\
856 ((uint8_t*)dest)[2*i2+1]= Y1;\
857 ((uint8_t*)dest)[2*i2+2]= V;\
858 ((uint8_t*)dest)[2*i2+3]= Y2;\
861 case PIX_FMT_GRAY16BE:\
863 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
864 ((uint8_t*)dest)[2*i2+1]= Y1;\
865 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
866 ((uint8_t*)dest)[2*i2+3]= Y2;\
869 case PIX_FMT_GRAY16LE:\
871 ((uint8_t*)dest)[2*i2+0]= Y1;\
872 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
873 ((uint8_t*)dest)[2*i2+2]= Y2;\
874 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
879 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
880 const int16_t **lumSrc, int lumFilterSize,
881 const int16_t *chrFilter, const int16_t **chrUSrc,
882 const int16_t **chrVSrc, int chrFilterSize,
883 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
886 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
889 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
890 const int16_t **lumSrc, int lumFilterSize,
891 const int16_t *chrFilter, const int16_t **chrUSrc,
892 const int16_t **chrVSrc, int chrFilterSize,
893 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
896 int step= c->dstFormatBpp/8;
899 switch(c->dstFormat) {
907 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
908 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
909 dest[aidx]= needAlpha ? A : 255;
916 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
917 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
925 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
942 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
943 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
944 dest[aidx]= needAlpha ? A : 255;
951 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
952 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
960 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
975 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
978 uint8_t *ptr = plane + stride*y;
979 for (i=0; i<height; i++) {
980 memset(ptr, val, width);
985 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
986 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
987 uint8_t *dst, const uint8_t *src, int width, \
991 for (i = 0; i < width; i++) { \
992 int compA = rfunc(&src[i*6+0]) >> 8; \
993 int compB = rfunc(&src[i*6+2]) >> 8; \
994 int compC = rfunc(&src[i*6+4]) >> 8; \
996 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1000 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
1001 uint8_t *dstU, uint8_t *dstV, \
1002 const uint8_t *src1, const uint8_t *src2, \
1003 int width, uint32_t *unused) \
1006 assert(src1==src2); \
1007 for (i = 0; i < width; i++) { \
1008 int compA = rfunc(&src1[6*i + 0]) >> 8; \
1009 int compB = rfunc(&src1[6*i + 2]) >> 8; \
1010 int compC = rfunc(&src1[6*i + 4]) >> 8; \
1012 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1013 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1017 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
1018 uint8_t *dstU, uint8_t *dstV, \
1019 const uint8_t *src1, const uint8_t *src2, \
1020 int width, uint32_t *unused) \
1023 assert(src1==src2); \
1024 for (i = 0; i < width; i++) { \
1025 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
1026 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
1027 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1029 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1030 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1033 rgb48funcs(LE, AV_RL16, r, g, b);
1034 rgb48funcs(BE, AV_RB16, r, g, b);
1035 rgb48funcs(LE, AV_RL16, b, g, r);
1036 rgb48funcs(BE, AV_RB16, b, g, r);
1038 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1039 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1040 int width, uint32_t *unused)\
1043 for (i=0; i<width; i++) {\
1044 int b= (((const type*)src)[i]>>shb)&maskb;\
1045 int g= (((const type*)src)[i]>>shg)&maskg;\
1046 int r= (((const type*)src)[i]>>shr)&maskr;\
1048 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1052 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1053 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1054 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1055 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1056 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1057 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1058 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1059 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1061 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1064 for (i=0; i<width; i++) {
1069 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1072 for (i=0; i<width; i++) {
1077 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1078 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1079 const uint8_t *src, const uint8_t *dummy, \
1080 int width, uint32_t *unused)\
1083 for (i=0; i<width; i++) {\
1084 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1085 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1086 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1088 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1089 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1092 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1093 const uint8_t *src, const uint8_t *dummy, \
1094 int width, uint32_t *unused)\
1097 for (i=0; i<width; i++) {\
1098 int pix0= ((const type*)src)[2*i+0]>>shp;\
1099 int pix1= ((const type*)src)[2*i+1]>>shp;\
1100 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1101 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1102 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1103 g&= maskg|(2*maskg);\
1107 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1108 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1112 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1113 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1114 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1115 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1116 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1117 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1118 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1119 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1121 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1124 for (i=0; i<width; i++) {
1127 dst[i]= pal[d] & 0xFF;
1131 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1132 const uint8_t *src1, const uint8_t *src2,
1133 int width, uint32_t *pal)
1136 assert(src1 == src2);
1137 for (i=0; i<width; i++) {
1138 int p= pal[src1[i]];
1145 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1146 int width, uint32_t *unused)
1149 for (i=0; i<width/8; i++) {
1152 dst[8*i+j]= ((d>>(7-j))&1)*255;
1156 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1157 int width, uint32_t *unused)
1160 for (i=0; i<width/8; i++) {
1163 dst[8*i+j]= ((d>>(7-j))&1)*255;
1167 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1168 const int16_t *chrUSrc, const int16_t *chrVSrc,
1169 const int16_t *alpSrc,
1170 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1171 uint8_t *aDest, int dstW, int chrDstW)
1174 for (i=0; i<dstW; i++) {
1175 int val= (lumSrc[i]+64)>>7;
1176 dest[i]= av_clip_uint8(val);
1180 for (i=0; i<chrDstW; i++) {
1181 int u=(chrUSrc[i]+64)>>7;
1182 int v=(chrVSrc[i]+64)>>7;
1183 uDest[i]= av_clip_uint8(u);
1184 vDest[i]= av_clip_uint8(v);
1187 if (CONFIG_SWSCALE_ALPHA && aDest)
1188 for (i=0; i<dstW; i++) {
1189 int val= (alpSrc[i]+64)>>7;
1190 aDest[i]= av_clip_uint8(val);
1195 * vertical bilinear scale YV12 to RGB
1197 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1198 const uint16_t *buf1, const uint16_t *ubuf0,
1199 const uint16_t *ubuf1, const uint16_t *vbuf0,
1200 const uint16_t *vbuf1, const uint16_t *abuf0,
1201 const uint16_t *abuf1, uint8_t *dest, int dstW,
1202 int yalpha, int uvalpha, int y)
1204 int yalpha1=4095- yalpha;
1205 int uvalpha1=4095-uvalpha;
1208 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1212 * YV12 to RGB without scaling or interpolating
1214 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1215 const uint16_t *ubuf0, const uint16_t *ubuf1,
1216 const uint16_t *vbuf0, const uint16_t *vbuf1,
1217 const uint16_t *abuf0, uint8_t *dest, int dstW,
1218 int uvalpha, enum PixelFormat dstFormat,
1221 const int yalpha1=0;
1224 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1225 const int yalpha= 4096; //FIXME ...
1227 if (uvalpha < 2048) {
1228 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1230 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1234 //FIXME yuy2* can read up to 7 samples too much
1236 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1240 for (i=0; i<width; i++)
1244 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1245 const uint8_t *src2, int width, uint32_t *unused)
1248 for (i=0; i<width; i++) {
1249 dstU[i]= src1[4*i + 1];
1250 dstV[i]= src1[4*i + 3];
1252 assert(src1 == src2);
1255 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1256 const uint8_t *src2, int width, uint32_t *unused)
1259 for (i=0; i<width; i++) {
1260 dstU[i]= src1[2*i + 1];
1261 dstV[i]= src2[2*i + 1];
1265 /* This is almost identical to the previous, end exists only because
1266 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1267 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1271 for (i=0; i<width; i++)
1275 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1276 const uint8_t *src2, int width, uint32_t *unused)
1279 for (i=0; i<width; i++) {
1280 dstU[i]= src1[4*i + 0];
1281 dstV[i]= src1[4*i + 2];
1283 assert(src1 == src2);
1286 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1287 const uint8_t *src2, int width, uint32_t *unused)
1290 for (i=0; i<width; i++) {
1296 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1297 const uint8_t *src, int width)
1300 for (i = 0; i < width; i++) {
1301 dst1[i] = src[2*i+0];
1302 dst2[i] = src[2*i+1];
1306 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1307 const uint8_t *src1, const uint8_t *src2,
1308 int width, uint32_t *unused)
1310 nvXXtoUV_c(dstU, dstV, src1, width);
1313 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1314 const uint8_t *src1, const uint8_t *src2,
1315 int width, uint32_t *unused)
1317 nvXXtoUV_c(dstV, dstU, src1, width);
1320 // FIXME Maybe dither instead.
1321 #define YUV_NBPS(depth, endianness, rfunc) \
1322 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1323 const uint8_t *_srcU, const uint8_t *_srcV, \
1324 int width, uint32_t *unused) \
1327 const uint16_t *srcU = (const uint16_t*)_srcU; \
1328 const uint16_t *srcV = (const uint16_t*)_srcV; \
1329 for (i = 0; i < width; i++) { \
1330 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1331 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1335 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1336 int width, uint32_t *unused) \
1339 const uint16_t *srcY = (const uint16_t*)_srcY; \
1340 for (i = 0; i < width; i++) \
1341 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1344 YUV_NBPS( 9, LE, AV_RL16)
1345 YUV_NBPS( 9, BE, AV_RB16)
1346 YUV_NBPS(10, LE, AV_RL16)
1347 YUV_NBPS(10, BE, AV_RB16)
1349 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1350 int width, uint32_t *unused)
1353 for (i=0; i<width; i++) {
1358 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1362 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1363 const uint8_t *src2, int width, uint32_t *unused)
1366 for (i=0; i<width; i++) {
1367 int b= src1[3*i + 0];
1368 int g= src1[3*i + 1];
1369 int r= src1[3*i + 2];
1371 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1372 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1374 assert(src1 == src2);
1377 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1378 const uint8_t *src2, int width, uint32_t *unused)
1381 for (i=0; i<width; i++) {
1382 int b= src1[6*i + 0] + src1[6*i + 3];
1383 int g= src1[6*i + 1] + src1[6*i + 4];
1384 int r= src1[6*i + 2] + src1[6*i + 5];
1386 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1387 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1389 assert(src1 == src2);
1392 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1396 for (i=0; i<width; i++) {
1401 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1405 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1406 const uint8_t *src2, int width, uint32_t *unused)
1410 for (i=0; i<width; i++) {
1411 int r= src1[3*i + 0];
1412 int g= src1[3*i + 1];
1413 int b= src1[3*i + 2];
1415 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1416 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1420 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1421 const uint8_t *src2, int width, uint32_t *unused)
1425 for (i=0; i<width; i++) {
1426 int r= src1[6*i + 0] + src1[6*i + 3];
1427 int g= src1[6*i + 1] + src1[6*i + 4];
1428 int b= src1[6*i + 2] + src1[6*i + 5];
1430 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1431 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1436 // bilinear / bicubic scaling
1437 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1439 const int16_t *filter, const int16_t *filterPos,
1443 for (i=0; i<dstW; i++) {
1445 int srcPos= filterPos[i];
1447 for (j=0; j<filterSize; j++) {
1448 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1450 //filter += hFilterSize;
1451 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1456 //FIXME all pal and rgb srcFormats could do this convertion as well
1457 //FIXME all scalers more complex than bilinear could do half of this transform
1458 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1461 for (i = 0; i < width; i++) {
1462 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1463 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1466 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1469 for (i = 0; i < width; i++) {
1470 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1471 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1474 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1477 for (i = 0; i < width; i++)
1478 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1480 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1483 for (i = 0; i < width; i++)
1484 dst[i] = (dst[i]*14071 + 33561947)>>14;
1487 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1488 const uint8_t *src, int srcW, int xInc)
1491 unsigned int xpos=0;
1492 for (i=0;i<dstWidth;i++) {
1493 register unsigned int xx=xpos>>16;
1494 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1495 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1500 // *** horizontal scale Y line to temp buffer
1501 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1502 const uint8_t *src, int srcW, int xInc,
1503 const int16_t *hLumFilter,
1504 const int16_t *hLumFilterPos, int hLumFilterSize,
1505 uint8_t *formatConvBuffer,
1506 uint32_t *pal, int isAlpha)
1508 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1509 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1512 toYV12(formatConvBuffer, src, srcW, pal);
1513 src= formatConvBuffer;
1516 if (!c->hyscale_fast) {
1517 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1518 } else { // fast bilinear upscale / crap downscale
1519 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1523 convertRange(dst, dstWidth);
1526 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1527 int dstWidth, const uint8_t *src1,
1528 const uint8_t *src2, int srcW, int xInc)
1531 unsigned int xpos=0;
1532 for (i=0;i<dstWidth;i++) {
1533 register unsigned int xx=xpos>>16;
1534 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1535 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1536 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1541 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1542 const uint8_t *src1, const uint8_t *src2,
1543 int srcW, int xInc, const int16_t *hChrFilter,
1544 const int16_t *hChrFilterPos, int hChrFilterSize,
1545 uint8_t *formatConvBuffer, uint32_t *pal)
1548 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1549 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1550 src1= formatConvBuffer;
1554 if (!c->hcscale_fast) {
1555 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1556 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1557 } else { // fast bilinear upscale / crap downscale
1558 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1561 if (c->chrConvertRange)
1562 c->chrConvertRange(dst1, dst2, dstWidth);
1565 #define DEBUG_SWSCALE_BUFFERS 0
1566 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1568 static int swScale(SwsContext *c, const uint8_t* src[],
1569 int srcStride[], int srcSliceY,
1570 int srcSliceH, uint8_t* dst[], int dstStride[])
1572 /* load a few things into local vars to make the code more readable? and faster */
1573 const int srcW= c->srcW;
1574 const int dstW= c->dstW;
1575 const int dstH= c->dstH;
1576 const int chrDstW= c->chrDstW;
1577 const int chrSrcW= c->chrSrcW;
1578 const int lumXInc= c->lumXInc;
1579 const int chrXInc= c->chrXInc;
1580 const enum PixelFormat dstFormat= c->dstFormat;
1581 const int flags= c->flags;
1582 int16_t *vLumFilterPos= c->vLumFilterPos;
1583 int16_t *vChrFilterPos= c->vChrFilterPos;
1584 int16_t *hLumFilterPos= c->hLumFilterPos;
1585 int16_t *hChrFilterPos= c->hChrFilterPos;
1586 int16_t *vLumFilter= c->vLumFilter;
1587 int16_t *vChrFilter= c->vChrFilter;
1588 int16_t *hLumFilter= c->hLumFilter;
1589 int16_t *hChrFilter= c->hChrFilter;
1590 int32_t *lumMmxFilter= c->lumMmxFilter;
1591 int32_t *chrMmxFilter= c->chrMmxFilter;
1592 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1593 const int vLumFilterSize= c->vLumFilterSize;
1594 const int vChrFilterSize= c->vChrFilterSize;
1595 const int hLumFilterSize= c->hLumFilterSize;
1596 const int hChrFilterSize= c->hChrFilterSize;
1597 int16_t **lumPixBuf= c->lumPixBuf;
1598 int16_t **chrUPixBuf= c->chrUPixBuf;
1599 int16_t **chrVPixBuf= c->chrVPixBuf;
1600 int16_t **alpPixBuf= c->alpPixBuf;
1601 const int vLumBufSize= c->vLumBufSize;
1602 const int vChrBufSize= c->vChrBufSize;
1603 uint8_t *formatConvBuffer= c->formatConvBuffer;
1604 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1605 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1607 uint32_t *pal=c->pal_yuv;
1609 /* vars which will change and which we need to store back in the context */
1611 int lumBufIndex= c->lumBufIndex;
1612 int chrBufIndex= c->chrBufIndex;
1613 int lastInLumBuf= c->lastInLumBuf;
1614 int lastInChrBuf= c->lastInChrBuf;
1616 if (isPacked(c->srcFormat)) {
1624 srcStride[3]= srcStride[0];
1626 srcStride[1]<<= c->vChrDrop;
1627 srcStride[2]<<= c->vChrDrop;
1629 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1630 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1631 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1632 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1633 srcSliceY, srcSliceH, dstY, dstH);
1634 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1635 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1637 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1638 static int warnedAlready=0; //FIXME move this into the context perhaps
1639 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1640 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1641 " ->cannot do aligned memory accesses anymore\n");
1646 /* Note the user might start scaling the picture in the middle so this
1647 will not get executed. This is not really intended but works
1648 currently, so people might do it. */
1649 if (srcSliceY ==0) {
1659 for (;dstY < dstH; dstY++) {
1660 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1661 const int chrDstY= dstY>>c->chrDstVSubSample;
1662 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1663 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1664 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1666 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1667 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1668 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1669 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1670 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1671 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1674 //handle holes (FAST_BILINEAR & weird filters)
1675 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1676 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1677 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1678 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1680 DEBUG_BUFFERS("dstY: %d\n", dstY);
1681 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1682 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1683 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1684 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1686 // Do we have enough lines in this slice to output the dstY line
1687 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1689 if (!enough_lines) {
1690 lastLumSrcY = srcSliceY + srcSliceH - 1;
1691 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1692 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1693 lastLumSrcY, lastChrSrcY);
1696 //Do horizontal scaling
1697 while(lastInLumBuf < lastLumSrcY) {
1698 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1699 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1701 assert(lumBufIndex < 2*vLumBufSize);
1702 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1703 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1704 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1705 hLumFilter, hLumFilterPos, hLumFilterSize,
1708 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1709 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1710 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1714 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1715 lumBufIndex, lastInLumBuf);
1717 while(lastInChrBuf < lastChrSrcY) {
1718 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1719 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1721 assert(chrBufIndex < 2*vChrBufSize);
1722 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1723 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1724 //FIXME replace parameters through context struct (some at least)
1726 if (c->needs_hcscale)
1727 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1728 chrDstW, src1, src2, chrSrcW, chrXInc,
1729 hChrFilter, hChrFilterPos, hChrFilterSize,
1730 formatConvBuffer, pal);
1732 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1733 chrBufIndex, lastInChrBuf);
1735 //wrap buf index around to stay inside the ring buffer
1736 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1737 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1739 break; //we can't output a dstY line so let's try with the next slice
1742 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1744 if (dstY < dstH-2) {
1745 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1746 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1747 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1748 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1749 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1750 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1751 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1752 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1753 const int16_t *lumBuf = lumSrcPtr[0];
1754 const int16_t *chrUBuf= chrUSrcPtr[0];
1755 const int16_t *chrVBuf= chrVSrcPtr[0];
1756 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1757 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1758 uDest, vDest, aDest, dstW, chrDstW);
1759 } else { //General YV12
1761 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1762 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1763 chrVSrcPtr, vChrFilterSize,
1764 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1767 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1768 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1769 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1770 int chrAlpha= vChrFilter[2*dstY+1];
1771 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1772 *chrVSrcPtr, *(chrVSrcPtr+1),
1773 alpPixBuf ? *alpSrcPtr : NULL,
1774 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1775 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1776 int lumAlpha= vLumFilter[2*dstY+1];
1777 int chrAlpha= vChrFilter[2*dstY+1];
1779 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1781 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1782 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1783 *chrVSrcPtr, *(chrVSrcPtr+1),
1784 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1785 dest, dstW, lumAlpha, chrAlpha, dstY);
1786 } else { //general RGB
1788 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1789 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1790 alpSrcPtr, dest, dstW, dstY);
1793 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
1794 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1795 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1796 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1797 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1798 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1799 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1800 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1801 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
1802 lumSrcPtr, vLumFilterSize,
1803 vChrFilter+chrDstY*vChrFilterSize,
1804 chrUSrcPtr, chrVSrcPtr, vChrFilterSize, NULL,
1805 dest, uDest, NULL, NULL, dstW, chrDstW);
1806 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
1807 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1808 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1809 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1810 yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1811 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1812 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
1815 yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
1816 lumSrcPtr, vLumFilterSize,
1817 vChrFilter+chrDstY*vChrFilterSize,
1818 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1819 alpSrcPtr, dest, uDest, vDest, aDest,
1823 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1824 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1825 if(flags & SWS_FULL_CHR_H_INT) {
1827 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1828 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1829 alpSrcPtr, dest, dstW, dstY);
1832 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1833 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1834 alpSrcPtr, dest, dstW, dstY);
1840 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1841 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1844 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1845 __asm__ volatile("sfence":::"memory");
1849 /* store changed local vars back in the context */
1851 c->lumBufIndex= lumBufIndex;
1852 c->chrBufIndex= chrBufIndex;
1853 c->lastInLumBuf= lastInLumBuf;
1854 c->lastInChrBuf= lastInChrBuf;
1856 return dstY - lastDstY;
1859 static void sws_init_swScale_c(SwsContext *c)
1861 enum PixelFormat srcFormat = c->srcFormat,
1862 dstFormat = c->dstFormat;
1864 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1865 c->yuv2yuvX = yuv2nv12X_c;
1866 } else if (is16BPS(dstFormat)) {
1867 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1868 } else if (is9_OR_10BPS(dstFormat)) {
1869 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1870 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1872 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1875 c->yuv2yuv1 = yuv2yuv1_c;
1876 c->yuv2yuvX = yuv2yuvX_c;
1878 if(c->flags & SWS_FULL_CHR_H_INT) {
1879 c->yuv2packedX = yuv2rgbX_c_full;
1881 c->yuv2packed1 = yuv2packed1_c;
1882 c->yuv2packed2 = yuv2packed2_c;
1883 c->yuv2packedX = yuv2packedX_c;
1886 c->hScale = hScale_c;
1888 if (c->flags & SWS_FAST_BILINEAR) {
1889 c->hyscale_fast = hyscale_fast_c;
1890 c->hcscale_fast = hcscale_fast_c;
1893 c->chrToYV12 = NULL;
1895 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1896 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1897 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1898 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1902 case PIX_FMT_BGR4_BYTE:
1903 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1904 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1905 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1906 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1907 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1908 case PIX_FMT_YUV420P16BE:
1909 case PIX_FMT_YUV422P16BE:
1910 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1911 case PIX_FMT_YUV420P16LE:
1912 case PIX_FMT_YUV422P16LE:
1913 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1915 if (c->chrSrcHSubSample) {
1917 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1918 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1919 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1920 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1921 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1922 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1923 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1924 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1925 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1926 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1927 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1928 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1929 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1930 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1934 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1935 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1936 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1937 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1938 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1939 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1940 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1941 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1942 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1943 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1944 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1945 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1946 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1947 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1951 c->lumToYV12 = NULL;
1952 c->alpToYV12 = NULL;
1953 switch (srcFormat) {
1954 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1955 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1956 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1957 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1958 case PIX_FMT_YUYV422 :
1959 case PIX_FMT_YUV420P16BE:
1960 case PIX_FMT_YUV422P16BE:
1961 case PIX_FMT_YUV444P16BE:
1962 case PIX_FMT_Y400A :
1963 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1964 case PIX_FMT_UYVY422 :
1965 case PIX_FMT_YUV420P16LE:
1966 case PIX_FMT_YUV422P16LE:
1967 case PIX_FMT_YUV444P16LE:
1968 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1969 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1970 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1971 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1972 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1973 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1974 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1978 case PIX_FMT_BGR4_BYTE:
1979 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
1980 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
1981 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
1982 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
1983 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
1984 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
1985 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
1986 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
1987 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
1988 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
1989 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
1992 switch (srcFormat) {
1994 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
1996 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
1997 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2001 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2003 c->lumConvertRange = lumRangeFromJpeg_c;
2004 c->chrConvertRange = chrRangeFromJpeg_c;
2006 c->lumConvertRange = lumRangeToJpeg_c;
2007 c->chrConvertRange = chrRangeToJpeg_c;
2011 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2012 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2013 c->needs_hcscale = 1;
2016 SwsFunc ff_getSwsFunc(SwsContext *c)
2018 sws_init_swScale_c(c);
2021 ff_sws_init_swScale_mmx(c);
2023 ff_sws_init_swScale_altivec(c);