2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
263 #define yuv2NBPS(bits, BE_LE, is_be) \
264 static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
265 const int16_t **lumSrc, int lumFilterSize, \
266 const int16_t *chrFilter, const int16_t **chrUSrc, \
267 const int16_t **chrVSrc, \
268 int chrFilterSize, const int16_t **alpSrc, \
269 uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
270 uint16_t *aDest, int dstW, int chrDstW) \
272 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
273 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
275 dest, uDest, vDest, aDest, \
276 dstW, chrDstW, is_be, bits); \
285 static inline void yuv2yuvX16_c(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
286 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
287 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
288 enum PixelFormat dstFormat)
290 #define conv16(bits) \
291 if (isBE(dstFormat)) { \
292 yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
293 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
295 dest, uDest, vDest, aDest, \
298 yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
299 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
301 dest, uDest, vDest, aDest, \
304 if (is16BPS(dstFormat)) {
306 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
314 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
315 const int16_t **lumSrc, int lumFilterSize,
316 const int16_t *chrFilter, const int16_t **chrUSrc,
317 const int16_t **chrVSrc,
318 int chrFilterSize, const int16_t **alpSrc,
319 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
320 uint8_t *aDest, int dstW, int chrDstW)
322 //FIXME Optimize (just quickly written not optimized..)
324 for (i=0; i<dstW; i++) {
327 for (j=0; j<lumFilterSize; j++)
328 val += lumSrc[j][i] * lumFilter[j];
330 dest[i]= av_clip_uint8(val>>19);
334 for (i=0; i<chrDstW; i++) {
338 for (j=0; j<chrFilterSize; j++) {
339 u += chrUSrc[j][i] * chrFilter[j];
340 v += chrVSrc[j][i] * chrFilter[j];
343 uDest[i]= av_clip_uint8(u>>19);
344 vDest[i]= av_clip_uint8(v>>19);
347 if (CONFIG_SWSCALE_ALPHA && aDest)
348 for (i=0; i<dstW; i++) {
351 for (j=0; j<lumFilterSize; j++)
352 val += alpSrc[j][i] * lumFilter[j];
354 aDest[i]= av_clip_uint8(val>>19);
359 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
360 const int16_t **lumSrc, int lumFilterSize,
361 const int16_t *chrFilter, const int16_t **chrUSrc,
362 const int16_t **chrVSrc,
363 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
364 int dstW, int chrDstW, enum PixelFormat dstFormat)
366 //FIXME Optimize (just quickly written not optimized..)
368 for (i=0; i<dstW; i++) {
371 for (j=0; j<lumFilterSize; j++)
372 val += lumSrc[j][i] * lumFilter[j];
374 dest[i]= av_clip_uint8(val>>19);
380 if (dstFormat == PIX_FMT_NV12)
381 for (i=0; i<chrDstW; i++) {
385 for (j=0; j<chrFilterSize; j++) {
386 u += chrUSrc[j][i] * chrFilter[j];
387 v += chrVSrc[j][i] * chrFilter[j];
390 uDest[2*i]= av_clip_uint8(u>>19);
391 uDest[2*i+1]= av_clip_uint8(v>>19);
394 for (i=0; i<chrDstW; i++) {
398 for (j=0; j<chrFilterSize; j++) {
399 u += chrUSrc[j][i] * chrFilter[j];
400 v += chrVSrc[j][i] * chrFilter[j];
403 uDest[2*i]= av_clip_uint8(v>>19);
404 uDest[2*i+1]= av_clip_uint8(u>>19);
408 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
409 for (i=0; i<(dstW>>1); i++) {\
415 int av_unused A1, A2;\
416 type av_unused *r, *b, *g;\
419 for (j=0; j<lumFilterSize; j++) {\
420 Y1 += lumSrc[j][i2] * lumFilter[j];\
421 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
423 for (j=0; j<chrFilterSize; j++) {\
424 U += chrUSrc[j][i] * chrFilter[j];\
425 V += chrVSrc[j][i] * chrFilter[j];\
434 for (j=0; j<lumFilterSize; j++) {\
435 A1 += alpSrc[j][i2 ] * lumFilter[j];\
436 A2 += alpSrc[j][i2+1] * lumFilter[j];\
442 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
443 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
444 if ((Y1|Y2|U|V)&256) {\
445 if (Y1>255) Y1=255; \
446 else if (Y1<0)Y1=0; \
447 if (Y2>255) Y2=255; \
448 else if (Y2<0)Y2=0; \
454 if (alpha && ((A1|A2)&256)) {\
455 A1=av_clip_uint8(A1);\
456 A2=av_clip_uint8(A2);\
459 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
460 for (i=0; i<dstW; i++) {\
468 for (j=0; j<lumFilterSize; j++) {\
469 Y += lumSrc[j][i ] * lumFilter[j];\
471 for (j=0; j<chrFilterSize; j++) {\
472 U += chrUSrc[j][i] * chrFilter[j];\
473 V += chrVSrc[j][i] * chrFilter[j];\
480 for (j=0; j<lumFilterSize; j++)\
481 A += alpSrc[j][i ] * lumFilter[j];\
484 A = av_clip_uint8(A);\
487 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
488 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
489 Y-= c->yuv2rgb_y_offset;\
490 Y*= c->yuv2rgb_y_coeff;\
492 R= Y + V*c->yuv2rgb_v2r_coeff;\
493 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
494 B= Y + U*c->yuv2rgb_u2b_coeff;\
495 if ((R|G|B)&(0xC0000000)) {\
496 if (R>=(256<<22)) R=(256<<22)-1; \
498 if (G>=(256<<22)) G=(256<<22)-1; \
500 if (B>=(256<<22)) B=(256<<22)-1; \
504 #define YSCALE_YUV_2_GRAY16_C \
505 for (i=0; i<(dstW>>1); i++) {\
514 for (j=0; j<lumFilterSize; j++) {\
515 Y1 += lumSrc[j][i2] * lumFilter[j];\
516 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
520 if ((Y1|Y2|U|V)&65536) {\
521 if (Y1>65535) Y1=65535; \
522 else if (Y1<0)Y1=0; \
523 if (Y2>65535) Y2=65535; \
524 else if (Y2<0)Y2=0; \
527 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
528 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
529 r = (type *)c->table_rV[V]; \
530 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
531 b = (type *)c->table_bU[U];
533 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
534 for (i=0; i<(dstW>>1); i++) { \
536 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
537 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
538 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
539 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
540 type av_unused *r, *b, *g; \
541 int av_unused A1, A2; \
543 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
544 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
547 #define YSCALE_YUV_2_GRAY16_2_C \
548 for (i=0; i<(dstW>>1); i++) { \
550 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
551 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
553 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
554 YSCALE_YUV_2_PACKED2_C(type,alpha)\
555 r = (type *)c->table_rV[V];\
556 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
557 b = (type *)c->table_bU[U];
559 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
560 for (i=0; i<(dstW>>1); i++) {\
562 int Y1= buf0[i2 ]>>7;\
563 int Y2= buf0[i2+1]>>7;\
564 int U= (ubuf1[i])>>7;\
565 int V= (vbuf1[i])>>7;\
566 type av_unused *r, *b, *g;\
567 int av_unused A1, A2;\
573 #define YSCALE_YUV_2_GRAY16_1_C \
574 for (i=0; i<(dstW>>1); i++) {\
576 int Y1= buf0[i2 ]<<1;\
577 int Y2= buf0[i2+1]<<1;
579 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
580 YSCALE_YUV_2_PACKED1_C(type,alpha)\
581 r = (type *)c->table_rV[V];\
582 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
583 b = (type *)c->table_bU[U];
585 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
586 for (i=0; i<(dstW>>1); i++) {\
588 int Y1= buf0[i2 ]>>7;\
589 int Y2= buf0[i2+1]>>7;\
590 int U= (ubuf0[i] + ubuf1[i])>>8;\
591 int V= (vbuf0[i] + vbuf1[i])>>8;\
592 type av_unused *r, *b, *g;\
593 int av_unused A1, A2;\
599 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
600 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
601 r = (type *)c->table_rV[V];\
602 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
603 b = (type *)c->table_bU[U];
605 #define YSCALE_YUV_2_MONO2_C \
606 const uint8_t * const d128=dither_8x8_220[y&7];\
607 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
608 for (i=0; i<dstW-7; i+=8) {\
610 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
611 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
612 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
613 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
614 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
615 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
616 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
617 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
618 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
622 #define YSCALE_YUV_2_MONOX_C \
623 const uint8_t * const d128=dither_8x8_220[y&7];\
624 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
626 for (i=0; i<dstW-1; i+=2) {\
631 for (j=0; j<lumFilterSize; j++) {\
632 Y1 += lumSrc[j][i] * lumFilter[j];\
633 Y2 += lumSrc[j][i+1] * lumFilter[j];\
643 acc+= acc + g[Y1+d128[(i+0)&7]];\
644 acc+= acc + g[Y2+d128[(i+1)&7]];\
646 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
651 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
652 switch(c->dstFormat) {\
653 case PIX_FMT_RGB48BE:\
654 case PIX_FMT_RGB48LE:\
656 ((uint8_t*)dest)[ 0]= r[Y1];\
657 ((uint8_t*)dest)[ 1]= r[Y1];\
658 ((uint8_t*)dest)[ 2]= g[Y1];\
659 ((uint8_t*)dest)[ 3]= g[Y1];\
660 ((uint8_t*)dest)[ 4]= b[Y1];\
661 ((uint8_t*)dest)[ 5]= b[Y1];\
662 ((uint8_t*)dest)[ 6]= r[Y2];\
663 ((uint8_t*)dest)[ 7]= r[Y2];\
664 ((uint8_t*)dest)[ 8]= g[Y2];\
665 ((uint8_t*)dest)[ 9]= g[Y2];\
666 ((uint8_t*)dest)[10]= b[Y2];\
667 ((uint8_t*)dest)[11]= b[Y2];\
671 case PIX_FMT_BGR48BE:\
672 case PIX_FMT_BGR48LE:\
674 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
675 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
676 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
677 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
678 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
679 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
686 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
687 func(uint32_t,needAlpha)\
688 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
689 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
692 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
694 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
695 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
699 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
700 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
708 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
709 func(uint32_t,needAlpha)\
710 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
711 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
714 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
716 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
717 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
721 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
722 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
729 ((uint8_t*)dest)[0]= r[Y1];\
730 ((uint8_t*)dest)[1]= g[Y1];\
731 ((uint8_t*)dest)[2]= b[Y1];\
732 ((uint8_t*)dest)[3]= r[Y2];\
733 ((uint8_t*)dest)[4]= g[Y2];\
734 ((uint8_t*)dest)[5]= b[Y2];\
740 ((uint8_t*)dest)[0]= b[Y1];\
741 ((uint8_t*)dest)[1]= g[Y1];\
742 ((uint8_t*)dest)[2]= r[Y1];\
743 ((uint8_t*)dest)[3]= b[Y2];\
744 ((uint8_t*)dest)[4]= g[Y2];\
745 ((uint8_t*)dest)[5]= r[Y2];\
749 case PIX_FMT_RGB565BE:\
750 case PIX_FMT_RGB565LE:\
751 case PIX_FMT_BGR565BE:\
752 case PIX_FMT_BGR565LE:\
754 const int dr1= dither_2x2_8[y&1 ][0];\
755 const int dg1= dither_2x2_4[y&1 ][0];\
756 const int db1= dither_2x2_8[(y&1)^1][0];\
757 const int dr2= dither_2x2_8[y&1 ][1];\
758 const int dg2= dither_2x2_4[y&1 ][1];\
759 const int db2= dither_2x2_8[(y&1)^1][1];\
761 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
762 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
766 case PIX_FMT_RGB555BE:\
767 case PIX_FMT_RGB555LE:\
768 case PIX_FMT_BGR555BE:\
769 case PIX_FMT_BGR555LE:\
771 const int dr1= dither_2x2_8[y&1 ][0];\
772 const int dg1= dither_2x2_8[y&1 ][1];\
773 const int db1= dither_2x2_8[(y&1)^1][0];\
774 const int dr2= dither_2x2_8[y&1 ][1];\
775 const int dg2= dither_2x2_8[y&1 ][0];\
776 const int db2= dither_2x2_8[(y&1)^1][1];\
778 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
779 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
783 case PIX_FMT_RGB444BE:\
784 case PIX_FMT_RGB444LE:\
785 case PIX_FMT_BGR444BE:\
786 case PIX_FMT_BGR444LE:\
788 const int dr1= dither_4x4_16[y&3 ][0];\
789 const int dg1= dither_4x4_16[y&3 ][1];\
790 const int db1= dither_4x4_16[(y&3)^3][0];\
791 const int dr2= dither_4x4_16[y&3 ][1];\
792 const int dg2= dither_4x4_16[y&3 ][0];\
793 const int db2= dither_4x4_16[(y&3)^3][1];\
795 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
796 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
803 const uint8_t * const d64= dither_8x8_73[y&7];\
804 const uint8_t * const d32= dither_8x8_32[y&7];\
806 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
807 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
814 const uint8_t * const d64= dither_8x8_73 [y&7];\
815 const uint8_t * const d128=dither_8x8_220[y&7];\
817 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
818 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
822 case PIX_FMT_RGB4_BYTE:\
823 case PIX_FMT_BGR4_BYTE:\
825 const uint8_t * const d64= dither_8x8_73 [y&7];\
826 const uint8_t * const d128=dither_8x8_220[y&7];\
828 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
829 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
833 case PIX_FMT_MONOBLACK:\
834 case PIX_FMT_MONOWHITE:\
839 case PIX_FMT_YUYV422:\
841 ((uint8_t*)dest)[2*i2+0]= Y1;\
842 ((uint8_t*)dest)[2*i2+1]= U;\
843 ((uint8_t*)dest)[2*i2+2]= Y2;\
844 ((uint8_t*)dest)[2*i2+3]= V;\
847 case PIX_FMT_UYVY422:\
849 ((uint8_t*)dest)[2*i2+0]= U;\
850 ((uint8_t*)dest)[2*i2+1]= Y1;\
851 ((uint8_t*)dest)[2*i2+2]= V;\
852 ((uint8_t*)dest)[2*i2+3]= Y2;\
855 case PIX_FMT_GRAY16BE:\
857 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
858 ((uint8_t*)dest)[2*i2+1]= Y1;\
859 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
860 ((uint8_t*)dest)[2*i2+3]= Y2;\
863 case PIX_FMT_GRAY16LE:\
865 ((uint8_t*)dest)[2*i2+0]= Y1;\
866 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
867 ((uint8_t*)dest)[2*i2+2]= Y2;\
868 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
873 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
874 const int16_t **lumSrc, int lumFilterSize,
875 const int16_t *chrFilter, const int16_t **chrUSrc,
876 const int16_t **chrVSrc, int chrFilterSize,
877 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
880 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
883 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
884 const int16_t **lumSrc, int lumFilterSize,
885 const int16_t *chrFilter, const int16_t **chrUSrc,
886 const int16_t **chrVSrc, int chrFilterSize,
887 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
890 int step= c->dstFormatBpp/8;
893 switch(c->dstFormat) {
901 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
902 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
903 dest[aidx]= needAlpha ? A : 255;
910 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
911 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
919 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
936 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
937 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
938 dest[aidx]= needAlpha ? A : 255;
945 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
946 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
954 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
969 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
972 uint8_t *ptr = plane + stride*y;
973 for (i=0; i<height; i++) {
974 memset(ptr, val, width);
979 static void rgb48ToY_c(uint8_t *dst, const uint8_t *src, int width,
983 for (i = 0; i < width; i++) {
988 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
992 static void rgb48ToUV_c(uint8_t *dstU, uint8_t *dstV,
993 const uint8_t *src1, const uint8_t *src2,
994 int width, uint32_t *unused)
998 for (i = 0; i < width; i++) {
999 int r = src1[6*i + 0];
1000 int g = src1[6*i + 2];
1001 int b = src1[6*i + 4];
1003 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1004 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1008 static void rgb48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
1009 const uint8_t *src1, const uint8_t *src2,
1010 int width, uint32_t *unused)
1014 for (i = 0; i < width; i++) {
1015 int r= src1[12*i + 0] + src1[12*i + 6];
1016 int g= src1[12*i + 2] + src1[12*i + 8];
1017 int b= src1[12*i + 4] + src1[12*i + 10];
1019 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1020 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1024 static void bgr48ToY_c(uint8_t *dst, const uint8_t *src, int width,
1028 for (i = 0; i < width; i++) {
1033 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1037 static void bgr48ToUV_c(uint8_t *dstU, uint8_t *dstV,
1038 const uint8_t *src1, const uint8_t *src2,
1039 int width, uint32_t *unused)
1042 for (i = 0; i < width; i++) {
1043 int b = src1[6*i + 0];
1044 int g = src1[6*i + 2];
1045 int r = src1[6*i + 4];
1047 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1048 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1052 static void bgr48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
1053 const uint8_t *src1, const uint8_t *src2,
1054 int width, uint32_t *unused)
1057 for (i = 0; i < width; i++) {
1058 int b= src1[12*i + 0] + src1[12*i + 6];
1059 int g= src1[12*i + 2] + src1[12*i + 8];
1060 int r= src1[12*i + 4] + src1[12*i + 10];
1062 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1063 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1067 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1068 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1069 int width, uint32_t *unused)\
1072 for (i=0; i<width; i++) {\
1073 int b= (((const type*)src)[i]>>shb)&maskb;\
1074 int g= (((const type*)src)[i]>>shg)&maskg;\
1075 int r= (((const type*)src)[i]>>shr)&maskr;\
1077 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1081 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1082 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1083 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1084 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1085 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1086 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1087 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1088 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1090 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1093 for (i=0; i<width; i++) {
1098 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1099 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1100 const uint8_t *src, const uint8_t *dummy, \
1101 int width, uint32_t *unused)\
1104 for (i=0; i<width; i++) {\
1105 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1106 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1107 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1109 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1110 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1113 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1114 const uint8_t *src, const uint8_t *dummy, \
1115 int width, uint32_t *unused)\
1118 for (i=0; i<width; i++) {\
1119 int pix0= ((const type*)src)[2*i+0]>>shp;\
1120 int pix1= ((const type*)src)[2*i+1]>>shp;\
1121 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1122 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1123 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1124 g&= maskg|(2*maskg);\
1128 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1129 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1133 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1134 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1135 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1136 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1137 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1138 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1139 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1140 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1142 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1145 for (i=0; i<width; i++) {
1148 dst[i]= pal[d] & 0xFF;
1152 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1153 const uint8_t *src1, const uint8_t *src2,
1154 int width, uint32_t *pal)
1157 assert(src1 == src2);
1158 for (i=0; i<width; i++) {
1159 int p= pal[src1[i]];
1166 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1167 int width, uint32_t *unused)
1170 for (i=0; i<width/8; i++) {
1173 dst[8*i+j]= ((d>>(7-j))&1)*255;
1177 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1178 int width, uint32_t *unused)
1181 for (i=0; i<width/8; i++) {
1184 dst[8*i+j]= ((d>>(7-j))&1)*255;
1188 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1189 const int16_t *chrUSrc, const int16_t *chrVSrc,
1190 const int16_t *alpSrc,
1191 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1192 uint8_t *aDest, int dstW, int chrDstW)
1195 for (i=0; i<dstW; i++) {
1196 int val= (lumSrc[i]+64)>>7;
1197 dest[i]= av_clip_uint8(val);
1201 for (i=0; i<chrDstW; i++) {
1202 int u=(chrUSrc[i]+64)>>7;
1203 int v=(chrVSrc[i]+64)>>7;
1204 uDest[i]= av_clip_uint8(u);
1205 vDest[i]= av_clip_uint8(v);
1208 if (CONFIG_SWSCALE_ALPHA && aDest)
1209 for (i=0; i<dstW; i++) {
1210 int val= (alpSrc[i]+64)>>7;
1211 aDest[i]= av_clip_uint8(val);
1216 * vertical bilinear scale YV12 to RGB
1218 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1219 const uint16_t *buf1, const uint16_t *ubuf0,
1220 const uint16_t *ubuf1, const uint16_t *vbuf0,
1221 const uint16_t *vbuf1, const uint16_t *abuf0,
1222 const uint16_t *abuf1, uint8_t *dest, int dstW,
1223 int yalpha, int uvalpha, int y)
1225 int yalpha1=4095- yalpha;
1226 int uvalpha1=4095-uvalpha;
1229 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1233 * YV12 to RGB without scaling or interpolating
1235 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1236 const uint16_t *ubuf0, const uint16_t *ubuf1,
1237 const uint16_t *vbuf0, const uint16_t *vbuf1,
1238 const uint16_t *abuf0, uint8_t *dest, int dstW,
1239 int uvalpha, enum PixelFormat dstFormat,
1242 const int yalpha1=0;
1245 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1246 const int yalpha= 4096; //FIXME ...
1248 if (uvalpha < 2048) {
1249 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1251 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1255 //FIXME yuy2* can read up to 7 samples too much
1257 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1261 for (i=0; i<width; i++)
1265 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1266 const uint8_t *src2, int width, uint32_t *unused)
1269 for (i=0; i<width; i++) {
1270 dstU[i]= src1[4*i + 1];
1271 dstV[i]= src1[4*i + 3];
1273 assert(src1 == src2);
1276 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1277 const uint8_t *src2, int width, uint32_t *unused)
1280 for (i=0; i<width; i++) {
1281 dstU[i]= src1[2*i + 1];
1282 dstV[i]= src2[2*i + 1];
1286 /* This is almost identical to the previous, end exists only because
1287 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1288 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1292 for (i=0; i<width; i++)
1296 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1297 const uint8_t *src2, int width, uint32_t *unused)
1300 for (i=0; i<width; i++) {
1301 dstU[i]= src1[4*i + 0];
1302 dstV[i]= src1[4*i + 2];
1304 assert(src1 == src2);
1307 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1308 const uint8_t *src2, int width, uint32_t *unused)
1311 for (i=0; i<width; i++) {
1317 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1318 const uint8_t *src, int width)
1321 for (i = 0; i < width; i++) {
1322 dst1[i] = src[2*i+0];
1323 dst2[i] = src[2*i+1];
1327 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1328 const uint8_t *src1, const uint8_t *src2,
1329 int width, uint32_t *unused)
1331 nvXXtoUV_c(dstU, dstV, src1, width);
1334 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1335 const uint8_t *src1, const uint8_t *src2,
1336 int width, uint32_t *unused)
1338 nvXXtoUV_c(dstV, dstU, src1, width);
1341 // FIXME Maybe dither instead.
1342 #define YUV_NBPS(depth, endianness, rfunc) \
1343 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1344 const uint8_t *_srcU, const uint8_t *_srcV, \
1345 int width, uint32_t *unused) \
1348 const uint16_t *srcU = (const uint16_t*)_srcU; \
1349 const uint16_t *srcV = (const uint16_t*)_srcV; \
1350 for (i = 0; i < width; i++) { \
1351 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1352 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1356 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1357 int width, uint32_t *unused) \
1360 const uint16_t *srcY = (const uint16_t*)_srcY; \
1361 for (i = 0; i < width; i++) \
1362 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1365 YUV_NBPS( 9, LE, AV_RL16)
1366 YUV_NBPS( 9, BE, AV_RB16)
1367 YUV_NBPS(10, LE, AV_RL16)
1368 YUV_NBPS(10, BE, AV_RB16)
1370 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1371 int width, uint32_t *unused)
1374 for (i=0; i<width; i++) {
1379 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1383 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1384 const uint8_t *src2, int width, uint32_t *unused)
1387 for (i=0; i<width; i++) {
1388 int b= src1[3*i + 0];
1389 int g= src1[3*i + 1];
1390 int r= src1[3*i + 2];
1392 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1393 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1395 assert(src1 == src2);
1398 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1399 const uint8_t *src2, int width, uint32_t *unused)
1402 for (i=0; i<width; i++) {
1403 int b= src1[6*i + 0] + src1[6*i + 3];
1404 int g= src1[6*i + 1] + src1[6*i + 4];
1405 int r= src1[6*i + 2] + src1[6*i + 5];
1407 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1408 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1410 assert(src1 == src2);
1413 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1417 for (i=0; i<width; i++) {
1422 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1426 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1427 const uint8_t *src2, int width, uint32_t *unused)
1431 for (i=0; i<width; i++) {
1432 int r= src1[3*i + 0];
1433 int g= src1[3*i + 1];
1434 int b= src1[3*i + 2];
1436 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1437 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1441 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1442 const uint8_t *src2, int width, uint32_t *unused)
1446 for (i=0; i<width; i++) {
1447 int r= src1[6*i + 0] + src1[6*i + 3];
1448 int g= src1[6*i + 1] + src1[6*i + 4];
1449 int b= src1[6*i + 2] + src1[6*i + 5];
1451 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1452 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1457 // bilinear / bicubic scaling
1458 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1460 const int16_t *filter, const int16_t *filterPos,
1464 for (i=0; i<dstW; i++) {
1466 int srcPos= filterPos[i];
1468 for (j=0; j<filterSize; j++) {
1469 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1471 //filter += hFilterSize;
1472 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1477 //FIXME all pal and rgb srcFormats could do this convertion as well
1478 //FIXME all scalers more complex than bilinear could do half of this transform
1479 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1482 for (i = 0; i < width; i++) {
1483 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1484 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1487 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1490 for (i = 0; i < width; i++) {
1491 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1492 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1495 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1498 for (i = 0; i < width; i++)
1499 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1501 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1504 for (i = 0; i < width; i++)
1505 dst[i] = (dst[i]*14071 + 33561947)>>14;
1508 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1509 const uint8_t *src, int srcW, int xInc)
1512 unsigned int xpos=0;
1513 for (i=0;i<dstWidth;i++) {
1514 register unsigned int xx=xpos>>16;
1515 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1516 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1521 // *** horizontal scale Y line to temp buffer
1522 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1523 const uint8_t *src, int srcW, int xInc,
1524 const int16_t *hLumFilter,
1525 const int16_t *hLumFilterPos, int hLumFilterSize,
1526 uint8_t *formatConvBuffer,
1527 uint32_t *pal, int isAlpha)
1529 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1530 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1532 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
1535 toYV12(formatConvBuffer, src, srcW, pal);
1536 src= formatConvBuffer;
1539 if (!c->hyscale_fast) {
1540 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1541 } else { // fast bilinear upscale / crap downscale
1542 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1546 convertRange(dst, dstWidth);
1549 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1550 int dstWidth, const uint8_t *src1,
1551 const uint8_t *src2, int srcW, int xInc)
1554 unsigned int xpos=0;
1555 for (i=0;i<dstWidth;i++) {
1556 register unsigned int xx=xpos>>16;
1557 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1558 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1559 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1564 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1565 const uint8_t *src1, const uint8_t *src2,
1566 int srcW, int xInc, const int16_t *hChrFilter,
1567 const int16_t *hChrFilterPos, int hChrFilterSize,
1568 uint8_t *formatConvBuffer, uint32_t *pal)
1571 src1 += c->chrSrcOffset;
1572 src2 += c->chrSrcOffset;
1575 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1576 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1577 src1= formatConvBuffer;
1581 if (!c->hcscale_fast) {
1582 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1583 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1584 } else { // fast bilinear upscale / crap downscale
1585 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1588 if (c->chrConvertRange)
1589 c->chrConvertRange(dst1, dst2, dstWidth);
1592 #define DEBUG_SWSCALE_BUFFERS 0
1593 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1595 static int swScale(SwsContext *c, const uint8_t* src[],
1596 int srcStride[], int srcSliceY,
1597 int srcSliceH, uint8_t* dst[], int dstStride[])
1599 /* load a few things into local vars to make the code more readable? and faster */
1600 const int srcW= c->srcW;
1601 const int dstW= c->dstW;
1602 const int dstH= c->dstH;
1603 const int chrDstW= c->chrDstW;
1604 const int chrSrcW= c->chrSrcW;
1605 const int lumXInc= c->lumXInc;
1606 const int chrXInc= c->chrXInc;
1607 const enum PixelFormat dstFormat= c->dstFormat;
1608 const int flags= c->flags;
1609 int16_t *vLumFilterPos= c->vLumFilterPos;
1610 int16_t *vChrFilterPos= c->vChrFilterPos;
1611 int16_t *hLumFilterPos= c->hLumFilterPos;
1612 int16_t *hChrFilterPos= c->hChrFilterPos;
1613 int16_t *vLumFilter= c->vLumFilter;
1614 int16_t *vChrFilter= c->vChrFilter;
1615 int16_t *hLumFilter= c->hLumFilter;
1616 int16_t *hChrFilter= c->hChrFilter;
1617 int32_t *lumMmxFilter= c->lumMmxFilter;
1618 int32_t *chrMmxFilter= c->chrMmxFilter;
1619 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1620 const int vLumFilterSize= c->vLumFilterSize;
1621 const int vChrFilterSize= c->vChrFilterSize;
1622 const int hLumFilterSize= c->hLumFilterSize;
1623 const int hChrFilterSize= c->hChrFilterSize;
1624 int16_t **lumPixBuf= c->lumPixBuf;
1625 int16_t **chrUPixBuf= c->chrUPixBuf;
1626 int16_t **chrVPixBuf= c->chrVPixBuf;
1627 int16_t **alpPixBuf= c->alpPixBuf;
1628 const int vLumBufSize= c->vLumBufSize;
1629 const int vChrBufSize= c->vChrBufSize;
1630 uint8_t *formatConvBuffer= c->formatConvBuffer;
1631 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1632 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1634 uint32_t *pal=c->pal_yuv;
1636 /* vars which will change and which we need to store back in the context */
1638 int lumBufIndex= c->lumBufIndex;
1639 int chrBufIndex= c->chrBufIndex;
1640 int lastInLumBuf= c->lastInLumBuf;
1641 int lastInChrBuf= c->lastInChrBuf;
1643 if (isPacked(c->srcFormat)) {
1651 srcStride[3]= srcStride[0];
1653 srcStride[1]<<= c->vChrDrop;
1654 srcStride[2]<<= c->vChrDrop;
1656 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1657 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1658 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1659 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1660 srcSliceY, srcSliceH, dstY, dstH);
1661 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1662 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1664 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1665 static int warnedAlready=0; //FIXME move this into the context perhaps
1666 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1667 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1668 " ->cannot do aligned memory accesses anymore\n");
1673 /* Note the user might start scaling the picture in the middle so this
1674 will not get executed. This is not really intended but works
1675 currently, so people might do it. */
1676 if (srcSliceY ==0) {
1686 for (;dstY < dstH; dstY++) {
1687 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1688 const int chrDstY= dstY>>c->chrDstVSubSample;
1689 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1690 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1691 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1693 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1694 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1695 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1696 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1697 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1698 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1701 //handle holes (FAST_BILINEAR & weird filters)
1702 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1703 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1704 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1705 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1707 DEBUG_BUFFERS("dstY: %d\n", dstY);
1708 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1709 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1710 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1711 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1713 // Do we have enough lines in this slice to output the dstY line
1714 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1716 if (!enough_lines) {
1717 lastLumSrcY = srcSliceY + srcSliceH - 1;
1718 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1719 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1720 lastLumSrcY, lastChrSrcY);
1723 //Do horizontal scaling
1724 while(lastInLumBuf < lastLumSrcY) {
1725 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1726 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1728 assert(lumBufIndex < 2*vLumBufSize);
1729 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1730 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1731 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1732 hLumFilter, hLumFilterPos, hLumFilterSize,
1735 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1736 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1737 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1741 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1742 lumBufIndex, lastInLumBuf);
1744 while(lastInChrBuf < lastChrSrcY) {
1745 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1746 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1748 assert(chrBufIndex < 2*vChrBufSize);
1749 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1750 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1751 //FIXME replace parameters through context struct (some at least)
1753 if (c->needs_hcscale)
1754 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1755 chrDstW, src1, src2, chrSrcW, chrXInc,
1756 hChrFilter, hChrFilterPos, hChrFilterSize,
1757 formatConvBuffer, pal);
1759 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1760 chrBufIndex, lastInChrBuf);
1762 //wrap buf index around to stay inside the ring buffer
1763 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1764 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1766 break; //we can't output a dstY line so let's try with the next slice
1769 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1771 if (dstY < dstH-2) {
1772 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1773 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1774 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1775 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1776 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1777 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1778 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1780 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1781 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1782 dest, uDest, dstW, chrDstW, dstFormat);
1783 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1784 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1785 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1786 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1787 yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1788 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1789 chrVSrcPtr, vChrFilterSize,
1790 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
1791 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
1793 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1794 const int16_t *lumBuf = lumSrcPtr[0];
1795 const int16_t *chrUBuf= chrUSrcPtr[0];
1796 const int16_t *chrVBuf= chrVSrcPtr[0];
1797 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1798 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1799 uDest, vDest, aDest, dstW, chrDstW);
1800 } else { //General YV12
1802 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1803 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1804 chrVSrcPtr, vChrFilterSize,
1805 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1808 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1809 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1810 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1811 int chrAlpha= vChrFilter[2*dstY+1];
1812 if(flags & SWS_FULL_CHR_H_INT) {
1813 yuv2rgbX_c_full(c, //FIXME write a packed1_full function
1814 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1815 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
1816 chrVSrcPtr, vChrFilterSize,
1817 alpSrcPtr, dest, dstW, dstY);
1819 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1820 *chrVSrcPtr, *(chrVSrcPtr+1),
1821 alpPixBuf ? *alpSrcPtr : NULL,
1822 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1824 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1825 int lumAlpha= vLumFilter[2*dstY+1];
1826 int chrAlpha= vChrFilter[2*dstY+1];
1828 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1830 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1831 if(flags & SWS_FULL_CHR_H_INT) {
1832 yuv2rgbX_c_full(c, //FIXME write a packed2_full function
1833 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1834 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1835 alpSrcPtr, dest, dstW, dstY);
1837 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1838 *chrVSrcPtr, *(chrVSrcPtr+1),
1839 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1840 dest, dstW, lumAlpha, chrAlpha, dstY);
1842 } else { //general RGB
1843 if(flags & SWS_FULL_CHR_H_INT) {
1845 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1846 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1847 alpSrcPtr, dest, dstW, dstY);
1850 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1851 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1852 alpSrcPtr, dest, dstW, dstY);
1856 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
1857 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1858 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1859 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1860 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1861 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1862 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1863 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1864 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
1865 lumSrcPtr, vLumFilterSize,
1866 vChrFilter+chrDstY*vChrFilterSize,
1867 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1868 dest, uDest, dstW, chrDstW, dstFormat);
1869 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
1870 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1871 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1872 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1873 yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1874 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1875 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
1878 yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
1879 lumSrcPtr, vLumFilterSize,
1880 vChrFilter+chrDstY*vChrFilterSize,
1881 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1882 alpSrcPtr, dest, uDest, vDest, aDest,
1886 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1887 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1888 if(flags & SWS_FULL_CHR_H_INT) {
1890 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1891 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1892 alpSrcPtr, dest, dstW, dstY);
1895 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1896 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1897 alpSrcPtr, dest, dstW, dstY);
1903 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1904 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1907 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1908 __asm__ volatile("sfence":::"memory");
1912 /* store changed local vars back in the context */
1914 c->lumBufIndex= lumBufIndex;
1915 c->chrBufIndex= chrBufIndex;
1916 c->lastInLumBuf= lastInLumBuf;
1917 c->lastInChrBuf= lastInChrBuf;
1919 return dstY - lastDstY;
1922 static void sws_init_swScale_c(SwsContext *c)
1924 enum PixelFormat srcFormat = c->srcFormat;
1926 c->yuv2nv12X = yuv2nv12X_c;
1927 c->yuv2yuv1 = yuv2yuv1_c;
1928 c->yuv2yuvX = yuv2yuvX_c;
1929 c->yuv2packed1 = yuv2packed1_c;
1930 c->yuv2packed2 = yuv2packed2_c;
1931 c->yuv2packedX = yuv2packedX_c;
1933 c->hScale = hScale_c;
1935 if (c->flags & SWS_FAST_BILINEAR)
1937 c->hyscale_fast = hyscale_fast_c;
1938 c->hcscale_fast = hcscale_fast_c;
1941 c->chrToYV12 = NULL;
1943 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1944 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1945 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1946 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1950 case PIX_FMT_BGR4_BYTE:
1951 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1952 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1953 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1954 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1955 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1956 case PIX_FMT_YUV420P16BE:
1957 case PIX_FMT_YUV422P16BE:
1958 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1959 case PIX_FMT_YUV420P16LE:
1960 case PIX_FMT_YUV422P16LE:
1961 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1963 if (c->chrSrcHSubSample) {
1965 case PIX_FMT_RGB48BE:
1966 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half_c; break;
1967 case PIX_FMT_BGR48BE:
1968 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half_c; break;
1969 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1970 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1971 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1972 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1973 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1974 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1975 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1976 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1977 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1978 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1982 case PIX_FMT_RGB48BE:
1983 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_c; break;
1984 case PIX_FMT_BGR48BE:
1985 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_c; break;
1986 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1987 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1988 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1989 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1990 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1991 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1992 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1993 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1994 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1995 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1999 c->lumToYV12 = NULL;
2000 c->alpToYV12 = NULL;
2001 switch (srcFormat) {
2002 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2003 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2004 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2005 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2006 case PIX_FMT_YUYV422 :
2007 case PIX_FMT_YUV420P16BE:
2008 case PIX_FMT_YUV422P16BE:
2009 case PIX_FMT_YUV444P16BE:
2010 case PIX_FMT_Y400A :
2011 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2012 case PIX_FMT_UYVY422 :
2013 case PIX_FMT_YUV420P16LE:
2014 case PIX_FMT_YUV422P16LE:
2015 case PIX_FMT_YUV444P16LE:
2016 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2017 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2018 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
2019 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
2020 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2021 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
2022 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
2026 case PIX_FMT_BGR4_BYTE:
2027 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2028 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2029 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2030 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2031 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2032 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2033 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2034 case PIX_FMT_RGB48BE:
2035 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY_c; break;
2036 case PIX_FMT_BGR48BE:
2037 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY_c; break;
2040 switch (srcFormat) {
2041 case PIX_FMT_RGB32 :
2042 case PIX_FMT_RGB32_1:
2043 case PIX_FMT_BGR32 :
2044 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA_c; break;
2045 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
2049 switch (srcFormat) {
2050 case PIX_FMT_Y400A :
2051 c->alpSrcOffset = 1;
2053 case PIX_FMT_RGB32 :
2054 case PIX_FMT_BGR32 :
2055 c->alpSrcOffset = 3;
2057 case PIX_FMT_RGB48LE:
2058 case PIX_FMT_BGR48LE:
2059 c->lumSrcOffset = 1;
2060 c->chrSrcOffset = 1;
2061 c->alpSrcOffset = 1;
2065 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2067 c->lumConvertRange = lumRangeFromJpeg_c;
2068 c->chrConvertRange = chrRangeFromJpeg_c;
2070 c->lumConvertRange = lumRangeToJpeg_c;
2071 c->chrConvertRange = chrRangeToJpeg_c;
2075 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2076 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2077 c->needs_hcscale = 1;
2080 SwsFunc ff_getSwsFunc(SwsContext *c)
2082 sws_init_swScale_c(c);
2085 ff_sws_init_swScale_mmx(c);
2087 ff_sws_init_swScale_altivec(c);