2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
289 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
290 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
291 enum PixelFormat dstFormat)
293 #define conv16(bits) \
294 if (isBE(dstFormat)) { \
295 yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
296 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
298 dest, uDest, vDest, aDest, \
301 yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
302 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
304 dest, uDest, vDest, aDest, \
307 if (is16BPS(dstFormat)) {
309 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
317 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
318 const int16_t **lumSrc, int lumFilterSize,
319 const int16_t *chrFilter, const int16_t **chrUSrc,
320 const int16_t **chrVSrc,
321 int chrFilterSize, const int16_t **alpSrc,
322 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
323 uint8_t *aDest, int dstW, int chrDstW)
325 //FIXME Optimize (just quickly written not optimized..)
327 for (i=0; i<dstW; i++) {
330 for (j=0; j<lumFilterSize; j++)
331 val += lumSrc[j][i] * lumFilter[j];
333 dest[i]= av_clip_uint8(val>>19);
337 for (i=0; i<chrDstW; i++) {
341 for (j=0; j<chrFilterSize; j++) {
342 u += chrUSrc[j][i] * chrFilter[j];
343 v += chrVSrc[j][i] * chrFilter[j];
346 uDest[i]= av_clip_uint8(u>>19);
347 vDest[i]= av_clip_uint8(v>>19);
350 if (CONFIG_SWSCALE_ALPHA && aDest)
351 for (i=0; i<dstW; i++) {
354 for (j=0; j<lumFilterSize; j++)
355 val += alpSrc[j][i] * lumFilter[j];
357 aDest[i]= av_clip_uint8(val>>19);
362 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
363 const int16_t **lumSrc, int lumFilterSize,
364 const int16_t *chrFilter, const int16_t **chrUSrc,
365 const int16_t **chrVSrc, int chrFilterSize,
366 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
367 uint8_t *vDest, uint8_t *aDest,
368 int dstW, int chrDstW)
370 enum PixelFormat dstFormat = c->dstFormat;
372 //FIXME Optimize (just quickly written not optimized..)
374 for (i=0; i<dstW; i++) {
377 for (j=0; j<lumFilterSize; j++)
378 val += lumSrc[j][i] * lumFilter[j];
380 dest[i]= av_clip_uint8(val>>19);
386 if (dstFormat == PIX_FMT_NV12)
387 for (i=0; i<chrDstW; i++) {
391 for (j=0; j<chrFilterSize; j++) {
392 u += chrUSrc[j][i] * chrFilter[j];
393 v += chrVSrc[j][i] * chrFilter[j];
396 uDest[2*i]= av_clip_uint8(u>>19);
397 uDest[2*i+1]= av_clip_uint8(v>>19);
400 for (i=0; i<chrDstW; i++) {
404 for (j=0; j<chrFilterSize; j++) {
405 u += chrUSrc[j][i] * chrFilter[j];
406 v += chrVSrc[j][i] * chrFilter[j];
409 uDest[2*i]= av_clip_uint8(v>>19);
410 uDest[2*i+1]= av_clip_uint8(u>>19);
414 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
415 for (i=0; i<(dstW>>1); i++) {\
421 int av_unused A1, A2;\
422 type av_unused *r, *b, *g;\
425 for (j=0; j<lumFilterSize; j++) {\
426 Y1 += lumSrc[j][i2] * lumFilter[j];\
427 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
429 for (j=0; j<chrFilterSize; j++) {\
430 U += chrUSrc[j][i] * chrFilter[j];\
431 V += chrVSrc[j][i] * chrFilter[j];\
440 for (j=0; j<lumFilterSize; j++) {\
441 A1 += alpSrc[j][i2 ] * lumFilter[j];\
442 A2 += alpSrc[j][i2+1] * lumFilter[j];\
448 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
449 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
450 if ((Y1|Y2|U|V)&256) {\
451 if (Y1>255) Y1=255; \
452 else if (Y1<0)Y1=0; \
453 if (Y2>255) Y2=255; \
454 else if (Y2<0)Y2=0; \
460 if (alpha && ((A1|A2)&256)) {\
461 A1=av_clip_uint8(A1);\
462 A2=av_clip_uint8(A2);\
465 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
466 for (i=0; i<dstW; i++) {\
474 for (j=0; j<lumFilterSize; j++) {\
475 Y += lumSrc[j][i ] * lumFilter[j];\
477 for (j=0; j<chrFilterSize; j++) {\
478 U += chrUSrc[j][i] * chrFilter[j];\
479 V += chrVSrc[j][i] * chrFilter[j];\
486 for (j=0; j<lumFilterSize; j++)\
487 A += alpSrc[j][i ] * lumFilter[j];\
490 A = av_clip_uint8(A);\
493 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
494 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
495 Y-= c->yuv2rgb_y_offset;\
496 Y*= c->yuv2rgb_y_coeff;\
498 R= Y + V*c->yuv2rgb_v2r_coeff;\
499 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
500 B= Y + U*c->yuv2rgb_u2b_coeff;\
501 if ((R|G|B)&(0xC0000000)) {\
502 if (R>=(256<<22)) R=(256<<22)-1; \
504 if (G>=(256<<22)) G=(256<<22)-1; \
506 if (B>=(256<<22)) B=(256<<22)-1; \
510 #define YSCALE_YUV_2_GRAY16_C \
511 for (i=0; i<(dstW>>1); i++) {\
520 for (j=0; j<lumFilterSize; j++) {\
521 Y1 += lumSrc[j][i2] * lumFilter[j];\
522 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
526 if ((Y1|Y2|U|V)&65536) {\
527 if (Y1>65535) Y1=65535; \
528 else if (Y1<0)Y1=0; \
529 if (Y2>65535) Y2=65535; \
530 else if (Y2<0)Y2=0; \
533 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
534 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
535 r = (type *)c->table_rV[V]; \
536 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
537 b = (type *)c->table_bU[U];
539 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
540 for (i=0; i<(dstW>>1); i++) { \
542 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
543 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
544 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
545 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
546 type av_unused *r, *b, *g; \
547 int av_unused A1, A2; \
549 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
550 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
553 #define YSCALE_YUV_2_GRAY16_2_C \
554 for (i=0; i<(dstW>>1); i++) { \
556 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
557 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
559 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
560 YSCALE_YUV_2_PACKED2_C(type,alpha)\
561 r = (type *)c->table_rV[V];\
562 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
563 b = (type *)c->table_bU[U];
565 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
566 for (i=0; i<(dstW>>1); i++) {\
568 int Y1= buf0[i2 ]>>7;\
569 int Y2= buf0[i2+1]>>7;\
570 int U= (ubuf1[i])>>7;\
571 int V= (vbuf1[i])>>7;\
572 type av_unused *r, *b, *g;\
573 int av_unused A1, A2;\
579 #define YSCALE_YUV_2_GRAY16_1_C \
580 for (i=0; i<(dstW>>1); i++) {\
582 int Y1= buf0[i2 ]<<1;\
583 int Y2= buf0[i2+1]<<1;
585 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
586 YSCALE_YUV_2_PACKED1_C(type,alpha)\
587 r = (type *)c->table_rV[V];\
588 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
589 b = (type *)c->table_bU[U];
591 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
592 for (i=0; i<(dstW>>1); i++) {\
594 int Y1= buf0[i2 ]>>7;\
595 int Y2= buf0[i2+1]>>7;\
596 int U= (ubuf0[i] + ubuf1[i])>>8;\
597 int V= (vbuf0[i] + vbuf1[i])>>8;\
598 type av_unused *r, *b, *g;\
599 int av_unused A1, A2;\
605 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
606 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
607 r = (type *)c->table_rV[V];\
608 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
609 b = (type *)c->table_bU[U];
611 #define YSCALE_YUV_2_MONO2_C \
612 const uint8_t * const d128=dither_8x8_220[y&7];\
613 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
614 for (i=0; i<dstW-7; i+=8) {\
616 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
617 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
618 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
619 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
620 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
621 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
622 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
623 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
624 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
628 #define YSCALE_YUV_2_MONOX_C \
629 const uint8_t * const d128=dither_8x8_220[y&7];\
630 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
632 for (i=0; i<dstW-1; i+=2) {\
637 for (j=0; j<lumFilterSize; j++) {\
638 Y1 += lumSrc[j][i] * lumFilter[j];\
639 Y2 += lumSrc[j][i+1] * lumFilter[j];\
649 acc+= acc + g[Y1+d128[(i+0)&7]];\
650 acc+= acc + g[Y2+d128[(i+1)&7]];\
652 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
657 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
658 switch(c->dstFormat) {\
659 case PIX_FMT_RGB48BE:\
660 case PIX_FMT_RGB48LE:\
662 ((uint8_t*)dest)[ 0]= r[Y1];\
663 ((uint8_t*)dest)[ 1]= r[Y1];\
664 ((uint8_t*)dest)[ 2]= g[Y1];\
665 ((uint8_t*)dest)[ 3]= g[Y1];\
666 ((uint8_t*)dest)[ 4]= b[Y1];\
667 ((uint8_t*)dest)[ 5]= b[Y1];\
668 ((uint8_t*)dest)[ 6]= r[Y2];\
669 ((uint8_t*)dest)[ 7]= r[Y2];\
670 ((uint8_t*)dest)[ 8]= g[Y2];\
671 ((uint8_t*)dest)[ 9]= g[Y2];\
672 ((uint8_t*)dest)[10]= b[Y2];\
673 ((uint8_t*)dest)[11]= b[Y2];\
677 case PIX_FMT_BGR48BE:\
678 case PIX_FMT_BGR48LE:\
680 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
681 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
682 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
683 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
684 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
685 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
692 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
693 func(uint32_t,needAlpha)\
694 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
695 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
698 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
700 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
701 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
705 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
706 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
714 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
715 func(uint32_t,needAlpha)\
716 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
717 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
720 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
722 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
723 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
727 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
728 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
735 ((uint8_t*)dest)[0]= r[Y1];\
736 ((uint8_t*)dest)[1]= g[Y1];\
737 ((uint8_t*)dest)[2]= b[Y1];\
738 ((uint8_t*)dest)[3]= r[Y2];\
739 ((uint8_t*)dest)[4]= g[Y2];\
740 ((uint8_t*)dest)[5]= b[Y2];\
746 ((uint8_t*)dest)[0]= b[Y1];\
747 ((uint8_t*)dest)[1]= g[Y1];\
748 ((uint8_t*)dest)[2]= r[Y1];\
749 ((uint8_t*)dest)[3]= b[Y2];\
750 ((uint8_t*)dest)[4]= g[Y2];\
751 ((uint8_t*)dest)[5]= r[Y2];\
755 case PIX_FMT_RGB565BE:\
756 case PIX_FMT_RGB565LE:\
757 case PIX_FMT_BGR565BE:\
758 case PIX_FMT_BGR565LE:\
760 const int dr1= dither_2x2_8[y&1 ][0];\
761 const int dg1= dither_2x2_4[y&1 ][0];\
762 const int db1= dither_2x2_8[(y&1)^1][0];\
763 const int dr2= dither_2x2_8[y&1 ][1];\
764 const int dg2= dither_2x2_4[y&1 ][1];\
765 const int db2= dither_2x2_8[(y&1)^1][1];\
767 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
768 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
772 case PIX_FMT_RGB555BE:\
773 case PIX_FMT_RGB555LE:\
774 case PIX_FMT_BGR555BE:\
775 case PIX_FMT_BGR555LE:\
777 const int dr1= dither_2x2_8[y&1 ][0];\
778 const int dg1= dither_2x2_8[y&1 ][1];\
779 const int db1= dither_2x2_8[(y&1)^1][0];\
780 const int dr2= dither_2x2_8[y&1 ][1];\
781 const int dg2= dither_2x2_8[y&1 ][0];\
782 const int db2= dither_2x2_8[(y&1)^1][1];\
784 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
785 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
789 case PIX_FMT_RGB444BE:\
790 case PIX_FMT_RGB444LE:\
791 case PIX_FMT_BGR444BE:\
792 case PIX_FMT_BGR444LE:\
794 const int dr1= dither_4x4_16[y&3 ][0];\
795 const int dg1= dither_4x4_16[y&3 ][1];\
796 const int db1= dither_4x4_16[(y&3)^3][0];\
797 const int dr2= dither_4x4_16[y&3 ][1];\
798 const int dg2= dither_4x4_16[y&3 ][0];\
799 const int db2= dither_4x4_16[(y&3)^3][1];\
801 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
802 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
809 const uint8_t * const d64= dither_8x8_73[y&7];\
810 const uint8_t * const d32= dither_8x8_32[y&7];\
812 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
813 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
820 const uint8_t * const d64= dither_8x8_73 [y&7];\
821 const uint8_t * const d128=dither_8x8_220[y&7];\
823 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
824 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
828 case PIX_FMT_RGB4_BYTE:\
829 case PIX_FMT_BGR4_BYTE:\
831 const uint8_t * const d64= dither_8x8_73 [y&7];\
832 const uint8_t * const d128=dither_8x8_220[y&7];\
834 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
835 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
839 case PIX_FMT_MONOBLACK:\
840 case PIX_FMT_MONOWHITE:\
845 case PIX_FMT_YUYV422:\
847 ((uint8_t*)dest)[2*i2+0]= Y1;\
848 ((uint8_t*)dest)[2*i2+1]= U;\
849 ((uint8_t*)dest)[2*i2+2]= Y2;\
850 ((uint8_t*)dest)[2*i2+3]= V;\
853 case PIX_FMT_UYVY422:\
855 ((uint8_t*)dest)[2*i2+0]= U;\
856 ((uint8_t*)dest)[2*i2+1]= Y1;\
857 ((uint8_t*)dest)[2*i2+2]= V;\
858 ((uint8_t*)dest)[2*i2+3]= Y2;\
861 case PIX_FMT_GRAY16BE:\
863 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
864 ((uint8_t*)dest)[2*i2+1]= Y1;\
865 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
866 ((uint8_t*)dest)[2*i2+3]= Y2;\
869 case PIX_FMT_GRAY16LE:\
871 ((uint8_t*)dest)[2*i2+0]= Y1;\
872 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
873 ((uint8_t*)dest)[2*i2+2]= Y2;\
874 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
879 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
880 const int16_t **lumSrc, int lumFilterSize,
881 const int16_t *chrFilter, const int16_t **chrUSrc,
882 const int16_t **chrVSrc, int chrFilterSize,
883 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
886 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
889 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
890 const int16_t **lumSrc, int lumFilterSize,
891 const int16_t *chrFilter, const int16_t **chrUSrc,
892 const int16_t **chrVSrc, int chrFilterSize,
893 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
896 int step= c->dstFormatBpp/8;
899 switch(c->dstFormat) {
907 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
908 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
909 dest[aidx]= needAlpha ? A : 255;
916 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
917 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
925 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
942 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
943 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
944 dest[aidx]= needAlpha ? A : 255;
951 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
952 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
960 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
975 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
978 uint8_t *ptr = plane + stride*y;
979 for (i=0; i<height; i++) {
980 memset(ptr, val, width);
985 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
986 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
987 uint8_t *dst, const uint8_t *src, int width, \
991 for (i = 0; i < width; i++) { \
992 int compA = rfunc(&src[i*6+0]) >> 8; \
993 int compB = rfunc(&src[i*6+2]) >> 8; \
994 int compC = rfunc(&src[i*6+4]) >> 8; \
996 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1000 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
1001 uint8_t *dstU, uint8_t *dstV, \
1002 const uint8_t *src1, const uint8_t *src2, \
1003 int width, uint32_t *unused) \
1006 assert(src1==src2); \
1007 for (i = 0; i < width; i++) { \
1008 int compA = rfunc(&src1[6*i + 0]) >> 8; \
1009 int compB = rfunc(&src1[6*i + 2]) >> 8; \
1010 int compC = rfunc(&src1[6*i + 4]) >> 8; \
1012 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1013 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1017 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
1018 uint8_t *dstU, uint8_t *dstV, \
1019 const uint8_t *src1, const uint8_t *src2, \
1020 int width, uint32_t *unused) \
1023 assert(src1==src2); \
1024 for (i = 0; i < width; i++) { \
1025 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
1026 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
1027 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1029 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1030 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1033 rgb48funcs(LE, AV_RL16, r, g, b);
1034 rgb48funcs(BE, AV_RB16, r, g, b);
1035 rgb48funcs(LE, AV_RL16, b, g, r);
1036 rgb48funcs(BE, AV_RB16, b, g, r);
1038 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1039 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1040 int width, uint32_t *unused)\
1043 for (i=0; i<width; i++) {\
1044 int b= (((const type*)src)[i]>>shb)&maskb;\
1045 int g= (((const type*)src)[i]>>shg)&maskg;\
1046 int r= (((const type*)src)[i]>>shr)&maskr;\
1048 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1052 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1053 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1054 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1055 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1056 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1057 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1058 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1059 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1061 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1064 for (i=0; i<width; i++) {
1069 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1072 for (i=0; i<width; i++) {
1077 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1078 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1079 const uint8_t *src, const uint8_t *dummy, \
1080 int width, uint32_t *unused)\
1083 for (i=0; i<width; i++) {\
1084 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1085 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1086 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1088 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1089 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1092 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1093 const uint8_t *src, const uint8_t *dummy, \
1094 int width, uint32_t *unused)\
1097 for (i=0; i<width; i++) {\
1098 int pix0= ((const type*)src)[2*i+0]>>shp;\
1099 int pix1= ((const type*)src)[2*i+1]>>shp;\
1100 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1101 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1102 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1103 g&= maskg|(2*maskg);\
1107 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1108 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1112 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1113 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1114 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1115 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1116 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1117 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1118 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1119 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1121 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1124 for (i=0; i<width; i++) {
1127 dst[i]= pal[d] & 0xFF;
1131 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1132 const uint8_t *src1, const uint8_t *src2,
1133 int width, uint32_t *pal)
1136 assert(src1 == src2);
1137 for (i=0; i<width; i++) {
1138 int p= pal[src1[i]];
1145 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1146 int width, uint32_t *unused)
1149 for (i=0; i<width/8; i++) {
1152 dst[8*i+j]= ((d>>(7-j))&1)*255;
1156 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1157 int width, uint32_t *unused)
1160 for (i=0; i<width/8; i++) {
1163 dst[8*i+j]= ((d>>(7-j))&1)*255;
1167 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1168 const int16_t *chrUSrc, const int16_t *chrVSrc,
1169 const int16_t *alpSrc,
1170 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1171 uint8_t *aDest, int dstW, int chrDstW)
1174 for (i=0; i<dstW; i++) {
1175 int val= (lumSrc[i]+64)>>7;
1176 dest[i]= av_clip_uint8(val);
1180 for (i=0; i<chrDstW; i++) {
1181 int u=(chrUSrc[i]+64)>>7;
1182 int v=(chrVSrc[i]+64)>>7;
1183 uDest[i]= av_clip_uint8(u);
1184 vDest[i]= av_clip_uint8(v);
1187 if (CONFIG_SWSCALE_ALPHA && aDest)
1188 for (i=0; i<dstW; i++) {
1189 int val= (alpSrc[i]+64)>>7;
1190 aDest[i]= av_clip_uint8(val);
1195 * vertical bilinear scale YV12 to RGB
1197 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1198 const uint16_t *buf1, const uint16_t *ubuf0,
1199 const uint16_t *ubuf1, const uint16_t *vbuf0,
1200 const uint16_t *vbuf1, const uint16_t *abuf0,
1201 const uint16_t *abuf1, uint8_t *dest, int dstW,
1202 int yalpha, int uvalpha, int y)
1204 int yalpha1=4095- yalpha;
1205 int uvalpha1=4095-uvalpha;
1208 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1212 * YV12 to RGB without scaling or interpolating
1214 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1215 const uint16_t *ubuf0, const uint16_t *ubuf1,
1216 const uint16_t *vbuf0, const uint16_t *vbuf1,
1217 const uint16_t *abuf0, uint8_t *dest, int dstW,
1218 int uvalpha, enum PixelFormat dstFormat,
1221 const int yalpha1=0;
1224 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1225 const int yalpha= 4096; //FIXME ...
1227 if (uvalpha < 2048) {
1228 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1230 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1234 //FIXME yuy2* can read up to 7 samples too much
1236 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1240 for (i=0; i<width; i++)
1244 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1245 const uint8_t *src2, int width, uint32_t *unused)
1248 for (i=0; i<width; i++) {
1249 dstU[i]= src1[4*i + 1];
1250 dstV[i]= src1[4*i + 3];
1252 assert(src1 == src2);
1255 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1256 const uint8_t *src2, int width, uint32_t *unused)
1259 for (i=0; i<width; i++) {
1260 dstU[i]= src1[2*i + 1];
1261 dstV[i]= src2[2*i + 1];
1265 /* This is almost identical to the previous, end exists only because
1266 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1267 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1271 for (i=0; i<width; i++)
1275 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1276 const uint8_t *src2, int width, uint32_t *unused)
1279 for (i=0; i<width; i++) {
1280 dstU[i]= src1[4*i + 0];
1281 dstV[i]= src1[4*i + 2];
1283 assert(src1 == src2);
1286 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1287 const uint8_t *src2, int width, uint32_t *unused)
1290 for (i=0; i<width; i++) {
1296 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1297 const uint8_t *src, int width)
1300 for (i = 0; i < width; i++) {
1301 dst1[i] = src[2*i+0];
1302 dst2[i] = src[2*i+1];
1306 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1307 const uint8_t *src1, const uint8_t *src2,
1308 int width, uint32_t *unused)
1310 nvXXtoUV_c(dstU, dstV, src1, width);
1313 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1314 const uint8_t *src1, const uint8_t *src2,
1315 int width, uint32_t *unused)
1317 nvXXtoUV_c(dstV, dstU, src1, width);
1320 // FIXME Maybe dither instead.
1321 #define YUV_NBPS(depth, endianness, rfunc) \
1322 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1323 const uint8_t *_srcU, const uint8_t *_srcV, \
1324 int width, uint32_t *unused) \
1327 const uint16_t *srcU = (const uint16_t*)_srcU; \
1328 const uint16_t *srcV = (const uint16_t*)_srcV; \
1329 for (i = 0; i < width; i++) { \
1330 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1331 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1335 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1336 int width, uint32_t *unused) \
1339 const uint16_t *srcY = (const uint16_t*)_srcY; \
1340 for (i = 0; i < width; i++) \
1341 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1344 YUV_NBPS( 9, LE, AV_RL16)
1345 YUV_NBPS( 9, BE, AV_RB16)
1346 YUV_NBPS(10, LE, AV_RL16)
1347 YUV_NBPS(10, BE, AV_RB16)
1349 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1350 int width, uint32_t *unused)
1353 for (i=0; i<width; i++) {
1358 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1362 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1363 const uint8_t *src2, int width, uint32_t *unused)
1366 for (i=0; i<width; i++) {
1367 int b= src1[3*i + 0];
1368 int g= src1[3*i + 1];
1369 int r= src1[3*i + 2];
1371 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1372 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1374 assert(src1 == src2);
1377 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1378 const uint8_t *src2, int width, uint32_t *unused)
1381 for (i=0; i<width; i++) {
1382 int b= src1[6*i + 0] + src1[6*i + 3];
1383 int g= src1[6*i + 1] + src1[6*i + 4];
1384 int r= src1[6*i + 2] + src1[6*i + 5];
1386 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1387 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1389 assert(src1 == src2);
1392 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1396 for (i=0; i<width; i++) {
1401 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1405 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1406 const uint8_t *src2, int width, uint32_t *unused)
1410 for (i=0; i<width; i++) {
1411 int r= src1[3*i + 0];
1412 int g= src1[3*i + 1];
1413 int b= src1[3*i + 2];
1415 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1416 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1420 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1421 const uint8_t *src2, int width, uint32_t *unused)
1425 for (i=0; i<width; i++) {
1426 int r= src1[6*i + 0] + src1[6*i + 3];
1427 int g= src1[6*i + 1] + src1[6*i + 4];
1428 int b= src1[6*i + 2] + src1[6*i + 5];
1430 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1431 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1436 // bilinear / bicubic scaling
1437 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1439 const int16_t *filter, const int16_t *filterPos,
1443 for (i=0; i<dstW; i++) {
1445 int srcPos= filterPos[i];
1447 for (j=0; j<filterSize; j++) {
1448 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1450 //filter += hFilterSize;
1451 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1456 //FIXME all pal and rgb srcFormats could do this convertion as well
1457 //FIXME all scalers more complex than bilinear could do half of this transform
1458 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1461 for (i = 0; i < width; i++) {
1462 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1463 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1466 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1469 for (i = 0; i < width; i++) {
1470 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1471 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1474 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1477 for (i = 0; i < width; i++)
1478 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1480 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1483 for (i = 0; i < width; i++)
1484 dst[i] = (dst[i]*14071 + 33561947)>>14;
1487 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1488 const uint8_t *src, int srcW, int xInc)
1491 unsigned int xpos=0;
1492 for (i=0;i<dstWidth;i++) {
1493 register unsigned int xx=xpos>>16;
1494 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1495 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1500 // *** horizontal scale Y line to temp buffer
1501 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1502 const uint8_t *src, int srcW, int xInc,
1503 const int16_t *hLumFilter,
1504 const int16_t *hLumFilterPos, int hLumFilterSize,
1505 uint8_t *formatConvBuffer,
1506 uint32_t *pal, int isAlpha)
1508 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1509 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1512 toYV12(formatConvBuffer, src, srcW, pal);
1513 src= formatConvBuffer;
1516 if (!c->hyscale_fast) {
1517 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1518 } else { // fast bilinear upscale / crap downscale
1519 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1523 convertRange(dst, dstWidth);
1526 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1527 int dstWidth, const uint8_t *src1,
1528 const uint8_t *src2, int srcW, int xInc)
1531 unsigned int xpos=0;
1532 for (i=0;i<dstWidth;i++) {
1533 register unsigned int xx=xpos>>16;
1534 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1535 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1536 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1541 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1542 const uint8_t *src1, const uint8_t *src2,
1543 int srcW, int xInc, const int16_t *hChrFilter,
1544 const int16_t *hChrFilterPos, int hChrFilterSize,
1545 uint8_t *formatConvBuffer, uint32_t *pal)
1548 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1549 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1550 src1= formatConvBuffer;
1554 if (!c->hcscale_fast) {
1555 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1556 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1557 } else { // fast bilinear upscale / crap downscale
1558 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1561 if (c->chrConvertRange)
1562 c->chrConvertRange(dst1, dst2, dstWidth);
1565 static av_always_inline void
1566 find_c_packed_planar_out_funcs(SwsContext *c,
1567 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1568 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1569 yuv2packedX_fn *yuv2packedX)
1571 enum PixelFormat dstFormat = c->dstFormat;
1573 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1574 *yuv2yuvX = yuv2nv12X_c;
1575 } else if (is16BPS(dstFormat)) {
1576 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1577 } else if (is9_OR_10BPS(dstFormat)) {
1578 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1579 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1581 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1584 *yuv2yuv1 = yuv2yuv1_c;
1585 *yuv2yuvX = yuv2yuvX_c;
1587 if(c->flags & SWS_FULL_CHR_H_INT) {
1588 *yuv2packedX = yuv2rgbX_c_full;
1590 *yuv2packed1 = yuv2packed1_c;
1591 *yuv2packed2 = yuv2packed2_c;
1592 *yuv2packedX = yuv2packedX_c;
1596 #define DEBUG_SWSCALE_BUFFERS 0
1597 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1599 static int swScale(SwsContext *c, const uint8_t* src[],
1600 int srcStride[], int srcSliceY,
1601 int srcSliceH, uint8_t* dst[], int dstStride[])
1603 /* load a few things into local vars to make the code more readable? and faster */
1604 const int srcW= c->srcW;
1605 const int dstW= c->dstW;
1606 const int dstH= c->dstH;
1607 const int chrDstW= c->chrDstW;
1608 const int chrSrcW= c->chrSrcW;
1609 const int lumXInc= c->lumXInc;
1610 const int chrXInc= c->chrXInc;
1611 const enum PixelFormat dstFormat= c->dstFormat;
1612 const int flags= c->flags;
1613 int16_t *vLumFilterPos= c->vLumFilterPos;
1614 int16_t *vChrFilterPos= c->vChrFilterPos;
1615 int16_t *hLumFilterPos= c->hLumFilterPos;
1616 int16_t *hChrFilterPos= c->hChrFilterPos;
1617 int16_t *vLumFilter= c->vLumFilter;
1618 int16_t *vChrFilter= c->vChrFilter;
1619 int16_t *hLumFilter= c->hLumFilter;
1620 int16_t *hChrFilter= c->hChrFilter;
1621 int32_t *lumMmxFilter= c->lumMmxFilter;
1622 int32_t *chrMmxFilter= c->chrMmxFilter;
1623 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1624 const int vLumFilterSize= c->vLumFilterSize;
1625 const int vChrFilterSize= c->vChrFilterSize;
1626 const int hLumFilterSize= c->hLumFilterSize;
1627 const int hChrFilterSize= c->hChrFilterSize;
1628 int16_t **lumPixBuf= c->lumPixBuf;
1629 int16_t **chrUPixBuf= c->chrUPixBuf;
1630 int16_t **chrVPixBuf= c->chrVPixBuf;
1631 int16_t **alpPixBuf= c->alpPixBuf;
1632 const int vLumBufSize= c->vLumBufSize;
1633 const int vChrBufSize= c->vChrBufSize;
1634 uint8_t *formatConvBuffer= c->formatConvBuffer;
1635 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1636 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1638 uint32_t *pal=c->pal_yuv;
1639 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1640 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1641 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1642 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1643 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1645 /* vars which will change and which we need to store back in the context */
1647 int lumBufIndex= c->lumBufIndex;
1648 int chrBufIndex= c->chrBufIndex;
1649 int lastInLumBuf= c->lastInLumBuf;
1650 int lastInChrBuf= c->lastInChrBuf;
1652 if (isPacked(c->srcFormat)) {
1660 srcStride[3]= srcStride[0];
1662 srcStride[1]<<= c->vChrDrop;
1663 srcStride[2]<<= c->vChrDrop;
1665 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1666 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1667 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1668 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1669 srcSliceY, srcSliceH, dstY, dstH);
1670 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1671 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1673 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1674 static int warnedAlready=0; //FIXME move this into the context perhaps
1675 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1676 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1677 " ->cannot do aligned memory accesses anymore\n");
1682 /* Note the user might start scaling the picture in the middle so this
1683 will not get executed. This is not really intended but works
1684 currently, so people might do it. */
1685 if (srcSliceY ==0) {
1695 for (;dstY < dstH; dstY++) {
1696 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1697 const int chrDstY= dstY>>c->chrDstVSubSample;
1698 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1699 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1700 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1702 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1703 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1704 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1705 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1706 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1707 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1710 //handle holes (FAST_BILINEAR & weird filters)
1711 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1712 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1713 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1714 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1716 DEBUG_BUFFERS("dstY: %d\n", dstY);
1717 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1718 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1719 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1720 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1722 // Do we have enough lines in this slice to output the dstY line
1723 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1725 if (!enough_lines) {
1726 lastLumSrcY = srcSliceY + srcSliceH - 1;
1727 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1728 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1729 lastLumSrcY, lastChrSrcY);
1732 //Do horizontal scaling
1733 while(lastInLumBuf < lastLumSrcY) {
1734 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1735 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1737 assert(lumBufIndex < 2*vLumBufSize);
1738 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1739 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1740 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1741 hLumFilter, hLumFilterPos, hLumFilterSize,
1744 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1745 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1746 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1750 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1751 lumBufIndex, lastInLumBuf);
1753 while(lastInChrBuf < lastChrSrcY) {
1754 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1755 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1757 assert(chrBufIndex < 2*vChrBufSize);
1758 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1759 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1760 //FIXME replace parameters through context struct (some at least)
1762 if (c->needs_hcscale)
1763 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1764 chrDstW, src1, src2, chrSrcW, chrXInc,
1765 hChrFilter, hChrFilterPos, hChrFilterSize,
1766 formatConvBuffer, pal);
1768 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1769 chrBufIndex, lastInChrBuf);
1771 //wrap buf index around to stay inside the ring buffer
1772 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1773 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1775 break; //we can't output a dstY line so let's try with the next slice
1778 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1780 if (dstY >= dstH-2) {
1781 // hmm looks like we can't use MMX here without overwriting this array's tail
1782 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1783 &yuv2packed1, &yuv2packed2,
1788 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1789 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1790 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1791 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1792 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1793 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1794 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1795 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1796 const int16_t *lumBuf = lumSrcPtr[0];
1797 const int16_t *chrUBuf= chrUSrcPtr[0];
1798 const int16_t *chrVBuf= chrVSrcPtr[0];
1799 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1800 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1801 uDest, vDest, aDest, dstW, chrDstW);
1802 } else { //General YV12
1804 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1805 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1806 chrVSrcPtr, vChrFilterSize,
1807 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1810 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1811 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1812 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1813 int chrAlpha= vChrFilter[2*dstY+1];
1814 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1815 *chrVSrcPtr, *(chrVSrcPtr+1),
1816 alpPixBuf ? *alpSrcPtr : NULL,
1817 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1818 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1819 int lumAlpha= vLumFilter[2*dstY+1];
1820 int chrAlpha= vChrFilter[2*dstY+1];
1822 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1824 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1825 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1826 *chrVSrcPtr, *(chrVSrcPtr+1),
1827 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1828 dest, dstW, lumAlpha, chrAlpha, dstY);
1829 } else { //general RGB
1831 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1832 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1833 alpSrcPtr, dest, dstW, dstY);
1839 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1840 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1843 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1844 __asm__ volatile("sfence":::"memory");
1848 /* store changed local vars back in the context */
1850 c->lumBufIndex= lumBufIndex;
1851 c->chrBufIndex= chrBufIndex;
1852 c->lastInLumBuf= lastInLumBuf;
1853 c->lastInChrBuf= lastInChrBuf;
1855 return dstY - lastDstY;
1858 static void sws_init_swScale_c(SwsContext *c)
1860 enum PixelFormat srcFormat = c->srcFormat;
1862 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1863 &c->yuv2packed1, &c->yuv2packed2,
1866 c->hScale = hScale_c;
1868 if (c->flags & SWS_FAST_BILINEAR) {
1869 c->hyscale_fast = hyscale_fast_c;
1870 c->hcscale_fast = hcscale_fast_c;
1873 c->chrToYV12 = NULL;
1875 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1876 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1877 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1878 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1882 case PIX_FMT_BGR4_BYTE:
1883 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1884 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1885 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1886 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1887 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1888 case PIX_FMT_YUV420P16BE:
1889 case PIX_FMT_YUV422P16BE:
1890 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1891 case PIX_FMT_YUV420P16LE:
1892 case PIX_FMT_YUV422P16LE:
1893 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1895 if (c->chrSrcHSubSample) {
1897 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1898 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1899 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1900 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1901 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1902 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1903 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1904 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1905 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1906 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1907 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1908 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1909 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1910 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1914 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1915 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1916 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1917 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1918 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1919 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1920 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1921 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1922 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1923 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1924 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1925 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1926 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1927 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1931 c->lumToYV12 = NULL;
1932 c->alpToYV12 = NULL;
1933 switch (srcFormat) {
1934 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1935 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1936 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1937 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1938 case PIX_FMT_YUYV422 :
1939 case PIX_FMT_YUV420P16BE:
1940 case PIX_FMT_YUV422P16BE:
1941 case PIX_FMT_YUV444P16BE:
1942 case PIX_FMT_Y400A :
1943 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1944 case PIX_FMT_UYVY422 :
1945 case PIX_FMT_YUV420P16LE:
1946 case PIX_FMT_YUV422P16LE:
1947 case PIX_FMT_YUV444P16LE:
1948 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1949 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1950 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1951 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1952 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1953 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1954 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1958 case PIX_FMT_BGR4_BYTE:
1959 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
1960 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
1961 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
1962 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
1963 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
1964 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
1965 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
1966 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
1967 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
1968 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
1969 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
1972 switch (srcFormat) {
1974 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
1976 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
1977 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
1981 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
1983 c->lumConvertRange = lumRangeFromJpeg_c;
1984 c->chrConvertRange = chrRangeFromJpeg_c;
1986 c->lumConvertRange = lumRangeToJpeg_c;
1987 c->chrConvertRange = chrRangeToJpeg_c;
1991 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
1992 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
1993 c->needs_hcscale = 1;
1996 SwsFunc ff_getSwsFunc(SwsContext *c)
1998 sws_init_swScale_c(c);
2001 ff_sws_init_swScale_mmx(c);
2003 ff_sws_init_swScale_altivec(c);