2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define isPacked(x) ( \
74 || (x)==PIX_FMT_YUYV422 \
75 || (x)==PIX_FMT_UYVY422 \
76 || (x)==PIX_FMT_Y400A \
80 #define RGB2YUV_SHIFT 15
81 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
82 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
84 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
85 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
86 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
87 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
88 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
89 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
91 static const double rgb2yuv_table[8][9]={
92 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
93 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
94 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
95 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
96 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
97 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
98 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
99 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
104 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
107 more intelligent misalignment avoidance for the horizontal scaler
108 write special vertical cubic upscale version
109 optimize C code (YV12 / minmax)
110 add support for packed pixel YUV input & output
111 add support for Y8 output
112 optimize BGR24 & BGR32
113 add BGR4 output support
114 write special BGR->BGR scaler
117 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
118 { 1, 3, 1, 3, 1, 3, 1, 3, },
119 { 2, 0, 2, 0, 2, 0, 2, 0, },
122 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
123 { 6, 2, 6, 2, 6, 2, 6, 2, },
124 { 0, 4, 0, 4, 0, 4, 0, 4, },
127 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
128 { 8, 4, 11, 7, 8, 4, 11, 7, },
129 { 2, 14, 1, 13, 2, 14, 1, 13, },
130 { 10, 6, 9, 5, 10, 6, 9, 5, },
131 { 0, 12, 3, 15, 0, 12, 3, 15, },
134 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
135 { 17, 9, 23, 15, 16, 8, 22, 14, },
136 { 5, 29, 3, 27, 4, 28, 2, 26, },
137 { 21, 13, 19, 11, 20, 12, 18, 10, },
138 { 0, 24, 6, 30, 1, 25, 7, 31, },
139 { 16, 8, 22, 14, 17, 9, 23, 15, },
140 { 4, 28, 2, 26, 5, 29, 3, 27, },
141 { 20, 12, 18, 10, 21, 13, 19, 11, },
142 { 1, 25, 7, 31, 0, 24, 6, 30, },
145 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
146 { 0, 55, 14, 68, 3, 58, 17, 72, },
147 { 37, 18, 50, 32, 40, 22, 54, 35, },
148 { 9, 64, 5, 59, 13, 67, 8, 63, },
149 { 46, 27, 41, 23, 49, 31, 44, 26, },
150 { 2, 57, 16, 71, 1, 56, 15, 70, },
151 { 39, 21, 52, 34, 38, 19, 51, 33, },
152 { 11, 66, 7, 62, 10, 65, 6, 60, },
153 { 48, 30, 43, 25, 47, 29, 42, 24, },
157 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
158 {117, 62, 158, 103, 113, 58, 155, 100, },
159 { 34, 199, 21, 186, 31, 196, 17, 182, },
160 {144, 89, 131, 76, 141, 86, 127, 72, },
161 { 0, 165, 41, 206, 10, 175, 52, 217, },
162 {110, 55, 151, 96, 120, 65, 162, 107, },
163 { 28, 193, 14, 179, 38, 203, 24, 189, },
164 {138, 83, 124, 69, 148, 93, 134, 79, },
165 { 7, 172, 48, 213, 3, 168, 45, 210, },
168 // tries to correct a gamma of 1.5
169 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
170 { 0, 143, 18, 200, 2, 156, 25, 215, },
171 { 78, 28, 125, 64, 89, 36, 138, 74, },
172 { 10, 180, 3, 161, 16, 195, 8, 175, },
173 {109, 51, 93, 38, 121, 60, 105, 47, },
174 { 1, 152, 23, 210, 0, 147, 20, 205, },
175 { 85, 33, 134, 71, 81, 30, 130, 67, },
176 { 14, 190, 6, 171, 12, 185, 5, 166, },
177 {117, 57, 101, 44, 113, 54, 97, 41, },
180 // tries to correct a gamma of 2.0
181 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
182 { 0, 124, 8, 193, 0, 140, 12, 213, },
183 { 55, 14, 104, 42, 66, 19, 119, 52, },
184 { 3, 168, 1, 145, 6, 187, 3, 162, },
185 { 86, 31, 70, 21, 99, 39, 82, 28, },
186 { 0, 134, 11, 206, 0, 129, 9, 200, },
187 { 62, 17, 114, 48, 58, 16, 109, 45, },
188 { 5, 181, 2, 157, 4, 175, 1, 151, },
189 { 95, 36, 78, 26, 90, 34, 74, 24, },
192 // tries to correct a gamma of 2.5
193 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
194 { 0, 107, 3, 187, 0, 125, 6, 212, },
195 { 39, 7, 86, 28, 49, 11, 102, 36, },
196 { 1, 158, 0, 131, 3, 180, 1, 151, },
197 { 68, 19, 52, 12, 81, 25, 64, 17, },
198 { 0, 119, 5, 203, 0, 113, 4, 195, },
199 { 45, 9, 96, 33, 42, 8, 91, 30, },
200 { 2, 172, 1, 144, 2, 165, 0, 137, },
201 { 77, 23, 60, 15, 72, 21, 56, 14, },
205 static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
206 const int16_t *chrFilter, const int16_t **chrUSrc,
207 const int16_t **chrVSrc, int chrFilterSize,
208 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
209 int dstW, int chrDstW, int big_endian, int output_bits)
211 //FIXME Optimize (just quickly written not optimized..)
213 int shift = 11 + 16 - output_bits;
215 #define output_pixel(pos, val) \
217 if (output_bits == 16) { \
218 AV_WB16(pos, av_clip_uint16(val >> shift)); \
220 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
223 if (output_bits == 16) { \
224 AV_WL16(pos, av_clip_uint16(val >> shift)); \
226 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
229 for (i = 0; i < dstW; i++) {
230 int val = 1 << (26-output_bits);
233 for (j = 0; j < lumFilterSize; j++)
234 val += lumSrc[j][i] * lumFilter[j];
236 output_pixel(&dest[i], val);
240 for (i = 0; i < chrDstW; i++) {
241 int u = 1 << (26-output_bits);
242 int v = 1 << (26-output_bits);
245 for (j = 0; j < chrFilterSize; j++) {
246 u += chrUSrc[j][i] * chrFilter[j];
247 v += chrVSrc[j][i] * chrFilter[j];
250 output_pixel(&uDest[i], u);
251 output_pixel(&vDest[i], v);
255 if (CONFIG_SWSCALE_ALPHA && aDest) {
256 for (i = 0; i < dstW; i++) {
257 int val = 1 << (26-output_bits);
260 for (j = 0; j < lumFilterSize; j++)
261 val += alpSrc[j][i] * lumFilter[j];
263 output_pixel(&aDest[i], val);
268 #define yuv2NBPS(bits, BE_LE, is_be) \
269 static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
270 const int16_t **lumSrc, int lumFilterSize, \
271 const int16_t *chrFilter, const int16_t **chrUSrc, \
272 const int16_t **chrVSrc, \
273 int chrFilterSize, const int16_t **alpSrc, \
274 uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
275 uint16_t *aDest, int dstW, int chrDstW) \
277 yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
278 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
280 dest, uDest, vDest, aDest, \
281 dstW, chrDstW, is_be, bits); \
290 static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
291 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
292 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
293 enum PixelFormat dstFormat)
295 #define conv16(bits) \
296 if (isBE(dstFormat)) { \
297 yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
298 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
300 dest, uDest, vDest, aDest, \
303 yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
304 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
306 dest, uDest, vDest, aDest, \
309 if (is16BPS(dstFormat)) {
311 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
319 static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
320 const int16_t *chrFilter, const int16_t **chrUSrc,
321 const int16_t **chrVSrc, int chrFilterSize,
322 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
324 //FIXME Optimize (just quickly written not optimized..)
326 for (i=0; i<dstW; i++) {
329 for (j=0; j<lumFilterSize; j++)
330 val += lumSrc[j][i] * lumFilter[j];
332 dest[i]= av_clip_uint8(val>>19);
336 for (i=0; i<chrDstW; i++) {
340 for (j=0; j<chrFilterSize; j++) {
341 u += chrUSrc[j][i] * chrFilter[j];
342 v += chrVSrc[j][i] * chrFilter[j];
345 uDest[i]= av_clip_uint8(u>>19);
346 vDest[i]= av_clip_uint8(v>>19);
349 if (CONFIG_SWSCALE_ALPHA && aDest)
350 for (i=0; i<dstW; i++) {
353 for (j=0; j<lumFilterSize; j++)
354 val += alpSrc[j][i] * lumFilter[j];
356 aDest[i]= av_clip_uint8(val>>19);
361 static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
362 const int16_t *chrFilter, const int16_t **chrUSrc,
363 const int16_t **chrVSrc, int chrFilterSize,
364 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
366 //FIXME Optimize (just quickly written not optimized..)
368 for (i=0; i<dstW; i++) {
371 for (j=0; j<lumFilterSize; j++)
372 val += lumSrc[j][i] * lumFilter[j];
374 dest[i]= av_clip_uint8(val>>19);
380 if (dstFormat == PIX_FMT_NV12)
381 for (i=0; i<chrDstW; i++) {
385 for (j=0; j<chrFilterSize; j++) {
386 u += chrUSrc[j][i] * chrFilter[j];
387 v += chrVSrc[j][i] * chrFilter[j];
390 uDest[2*i]= av_clip_uint8(u>>19);
391 uDest[2*i+1]= av_clip_uint8(v>>19);
394 for (i=0; i<chrDstW; i++) {
398 for (j=0; j<chrFilterSize; j++) {
399 u += chrUSrc[j][i] * chrFilter[j];
400 v += chrVSrc[j][i] * chrFilter[j];
403 uDest[2*i]= av_clip_uint8(v>>19);
404 uDest[2*i+1]= av_clip_uint8(u>>19);
408 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
409 for (i=0; i<(dstW>>1); i++) {\
415 int av_unused A1, A2;\
416 type av_unused *r, *b, *g;\
419 for (j=0; j<lumFilterSize; j++) {\
420 Y1 += lumSrc[j][i2] * lumFilter[j];\
421 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
423 for (j=0; j<chrFilterSize; j++) {\
424 U += chrUSrc[j][i] * chrFilter[j];\
425 V += chrVSrc[j][i] * chrFilter[j];\
434 for (j=0; j<lumFilterSize; j++) {\
435 A1 += alpSrc[j][i2 ] * lumFilter[j];\
436 A2 += alpSrc[j][i2+1] * lumFilter[j];\
442 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
443 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
444 if ((Y1|Y2|U|V)&256) {\
445 if (Y1>255) Y1=255; \
446 else if (Y1<0)Y1=0; \
447 if (Y2>255) Y2=255; \
448 else if (Y2<0)Y2=0; \
454 if (alpha && ((A1|A2)&256)) {\
455 A1=av_clip_uint8(A1);\
456 A2=av_clip_uint8(A2);\
459 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
460 for (i=0; i<dstW; i++) {\
468 for (j=0; j<lumFilterSize; j++) {\
469 Y += lumSrc[j][i ] * lumFilter[j];\
471 for (j=0; j<chrFilterSize; j++) {\
472 U += chrUSrc[j][i] * chrFilter[j];\
473 V += chrVSrc[j][i] * chrFilter[j];\
480 for (j=0; j<lumFilterSize; j++)\
481 A += alpSrc[j][i ] * lumFilter[j];\
484 A = av_clip_uint8(A);\
487 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
488 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
489 Y-= c->yuv2rgb_y_offset;\
490 Y*= c->yuv2rgb_y_coeff;\
492 R= Y + V*c->yuv2rgb_v2r_coeff;\
493 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
494 B= Y + U*c->yuv2rgb_u2b_coeff;\
495 if ((R|G|B)&(0xC0000000)) {\
496 if (R>=(256<<22)) R=(256<<22)-1; \
498 if (G>=(256<<22)) G=(256<<22)-1; \
500 if (B>=(256<<22)) B=(256<<22)-1; \
504 #define YSCALE_YUV_2_GRAY16_C \
505 for (i=0; i<(dstW>>1); i++) {\
514 for (j=0; j<lumFilterSize; j++) {\
515 Y1 += lumSrc[j][i2] * lumFilter[j];\
516 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
520 if ((Y1|Y2|U|V)&65536) {\
521 if (Y1>65535) Y1=65535; \
522 else if (Y1<0)Y1=0; \
523 if (Y2>65535) Y2=65535; \
524 else if (Y2<0)Y2=0; \
527 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
528 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
529 r = (type *)c->table_rV[V]; \
530 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
531 b = (type *)c->table_bU[U];
533 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
534 for (i=0; i<(dstW>>1); i++) { \
536 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
537 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
538 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
539 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
540 type av_unused *r, *b, *g; \
541 int av_unused A1, A2; \
543 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
544 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
547 #define YSCALE_YUV_2_GRAY16_2_C \
548 for (i=0; i<(dstW>>1); i++) { \
550 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
551 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
553 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
554 YSCALE_YUV_2_PACKED2_C(type,alpha)\
555 r = (type *)c->table_rV[V];\
556 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
557 b = (type *)c->table_bU[U];
559 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
560 for (i=0; i<(dstW>>1); i++) {\
562 int Y1= buf0[i2 ]>>7;\
563 int Y2= buf0[i2+1]>>7;\
564 int U= (ubuf1[i])>>7;\
565 int V= (vbuf1[i])>>7;\
566 type av_unused *r, *b, *g;\
567 int av_unused A1, A2;\
573 #define YSCALE_YUV_2_GRAY16_1_C \
574 for (i=0; i<(dstW>>1); i++) {\
576 int Y1= buf0[i2 ]<<1;\
577 int Y2= buf0[i2+1]<<1;
579 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
580 YSCALE_YUV_2_PACKED1_C(type,alpha)\
581 r = (type *)c->table_rV[V];\
582 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
583 b = (type *)c->table_bU[U];
585 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
586 for (i=0; i<(dstW>>1); i++) {\
588 int Y1= buf0[i2 ]>>7;\
589 int Y2= buf0[i2+1]>>7;\
590 int U= (ubuf0[i] + ubuf1[i])>>8;\
591 int V= (vbuf0[i] + vbuf1[i])>>8;\
592 type av_unused *r, *b, *g;\
593 int av_unused A1, A2;\
599 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
600 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
601 r = (type *)c->table_rV[V];\
602 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
603 b = (type *)c->table_bU[U];
605 #define YSCALE_YUV_2_MONO2_C \
606 const uint8_t * const d128=dither_8x8_220[y&7];\
607 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
608 for (i=0; i<dstW-7; i+=8) {\
610 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
611 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
612 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
613 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
614 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
615 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
616 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
617 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
618 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
622 #define YSCALE_YUV_2_MONOX_C \
623 const uint8_t * const d128=dither_8x8_220[y&7];\
624 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
626 for (i=0; i<dstW-1; i+=2) {\
631 for (j=0; j<lumFilterSize; j++) {\
632 Y1 += lumSrc[j][i] * lumFilter[j];\
633 Y2 += lumSrc[j][i+1] * lumFilter[j];\
643 acc+= acc + g[Y1+d128[(i+0)&7]];\
644 acc+= acc + g[Y2+d128[(i+1)&7]];\
646 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
651 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
652 switch(c->dstFormat) {\
653 case PIX_FMT_RGB48BE:\
654 case PIX_FMT_RGB48LE:\
656 ((uint8_t*)dest)[ 0]= r[Y1];\
657 ((uint8_t*)dest)[ 1]= r[Y1];\
658 ((uint8_t*)dest)[ 2]= g[Y1];\
659 ((uint8_t*)dest)[ 3]= g[Y1];\
660 ((uint8_t*)dest)[ 4]= b[Y1];\
661 ((uint8_t*)dest)[ 5]= b[Y1];\
662 ((uint8_t*)dest)[ 6]= r[Y2];\
663 ((uint8_t*)dest)[ 7]= r[Y2];\
664 ((uint8_t*)dest)[ 8]= g[Y2];\
665 ((uint8_t*)dest)[ 9]= g[Y2];\
666 ((uint8_t*)dest)[10]= b[Y2];\
667 ((uint8_t*)dest)[11]= b[Y2];\
671 case PIX_FMT_BGR48BE:\
672 case PIX_FMT_BGR48LE:\
674 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
675 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
676 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
677 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
678 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
679 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
686 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
687 func(uint32_t,needAlpha)\
688 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
689 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
692 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
694 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
695 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
699 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
700 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
708 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
709 func(uint32_t,needAlpha)\
710 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
711 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
714 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
716 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
717 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
721 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
722 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
729 ((uint8_t*)dest)[0]= r[Y1];\
730 ((uint8_t*)dest)[1]= g[Y1];\
731 ((uint8_t*)dest)[2]= b[Y1];\
732 ((uint8_t*)dest)[3]= r[Y2];\
733 ((uint8_t*)dest)[4]= g[Y2];\
734 ((uint8_t*)dest)[5]= b[Y2];\
740 ((uint8_t*)dest)[0]= b[Y1];\
741 ((uint8_t*)dest)[1]= g[Y1];\
742 ((uint8_t*)dest)[2]= r[Y1];\
743 ((uint8_t*)dest)[3]= b[Y2];\
744 ((uint8_t*)dest)[4]= g[Y2];\
745 ((uint8_t*)dest)[5]= r[Y2];\
749 case PIX_FMT_RGB565BE:\
750 case PIX_FMT_RGB565LE:\
751 case PIX_FMT_BGR565BE:\
752 case PIX_FMT_BGR565LE:\
754 const int dr1= dither_2x2_8[y&1 ][0];\
755 const int dg1= dither_2x2_4[y&1 ][0];\
756 const int db1= dither_2x2_8[(y&1)^1][0];\
757 const int dr2= dither_2x2_8[y&1 ][1];\
758 const int dg2= dither_2x2_4[y&1 ][1];\
759 const int db2= dither_2x2_8[(y&1)^1][1];\
761 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
762 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
766 case PIX_FMT_RGB555BE:\
767 case PIX_FMT_RGB555LE:\
768 case PIX_FMT_BGR555BE:\
769 case PIX_FMT_BGR555LE:\
771 const int dr1= dither_2x2_8[y&1 ][0];\
772 const int dg1= dither_2x2_8[y&1 ][1];\
773 const int db1= dither_2x2_8[(y&1)^1][0];\
774 const int dr2= dither_2x2_8[y&1 ][1];\
775 const int dg2= dither_2x2_8[y&1 ][0];\
776 const int db2= dither_2x2_8[(y&1)^1][1];\
778 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
779 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
783 case PIX_FMT_RGB444BE:\
784 case PIX_FMT_RGB444LE:\
785 case PIX_FMT_BGR444BE:\
786 case PIX_FMT_BGR444LE:\
788 const int dr1= dither_4x4_16[y&3 ][0];\
789 const int dg1= dither_4x4_16[y&3 ][1];\
790 const int db1= dither_4x4_16[(y&3)^3][0];\
791 const int dr2= dither_4x4_16[y&3 ][1];\
792 const int dg2= dither_4x4_16[y&3 ][0];\
793 const int db2= dither_4x4_16[(y&3)^3][1];\
795 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
796 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
803 const uint8_t * const d64= dither_8x8_73[y&7];\
804 const uint8_t * const d32= dither_8x8_32[y&7];\
806 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
807 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
814 const uint8_t * const d64= dither_8x8_73 [y&7];\
815 const uint8_t * const d128=dither_8x8_220[y&7];\
817 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
818 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
822 case PIX_FMT_RGB4_BYTE:\
823 case PIX_FMT_BGR4_BYTE:\
825 const uint8_t * const d64= dither_8x8_73 [y&7];\
826 const uint8_t * const d128=dither_8x8_220[y&7];\
828 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
829 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
833 case PIX_FMT_MONOBLACK:\
834 case PIX_FMT_MONOWHITE:\
839 case PIX_FMT_YUYV422:\
841 ((uint8_t*)dest)[2*i2+0]= Y1;\
842 ((uint8_t*)dest)[2*i2+1]= U;\
843 ((uint8_t*)dest)[2*i2+2]= Y2;\
844 ((uint8_t*)dest)[2*i2+3]= V;\
847 case PIX_FMT_UYVY422:\
849 ((uint8_t*)dest)[2*i2+0]= U;\
850 ((uint8_t*)dest)[2*i2+1]= Y1;\
851 ((uint8_t*)dest)[2*i2+2]= V;\
852 ((uint8_t*)dest)[2*i2+3]= Y2;\
855 case PIX_FMT_GRAY16BE:\
857 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
858 ((uint8_t*)dest)[2*i2+1]= Y1;\
859 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
860 ((uint8_t*)dest)[2*i2+3]= Y2;\
863 case PIX_FMT_GRAY16LE:\
865 ((uint8_t*)dest)[2*i2+0]= Y1;\
866 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
867 ((uint8_t*)dest)[2*i2+2]= Y2;\
868 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
873 static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter,
874 const int16_t **lumSrc, int lumFilterSize,
875 const int16_t *chrFilter, const int16_t **chrUSrc,
876 const int16_t **chrVSrc, int chrFilterSize,
877 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
880 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
883 static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter,
884 const int16_t **lumSrc, int lumFilterSize,
885 const int16_t *chrFilter, const int16_t **chrUSrc,
886 const int16_t **chrVSrc, int chrFilterSize,
887 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
890 int step= c->dstFormatBpp/8;
893 switch(c->dstFormat) {
901 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
902 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
903 dest[aidx]= needAlpha ? A : 255;
910 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
911 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
919 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
936 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
937 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
938 dest[aidx]= needAlpha ? A : 255;
945 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
946 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
954 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
969 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
972 uint8_t *ptr = plane + stride*y;
973 for (i=0; i<height; i++) {
974 memset(ptr, val, width);
979 static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width,
983 for (i = 0; i < width; i++) {
988 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
992 static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
993 const uint8_t *src1, const uint8_t *src2,
994 int width, uint32_t *unused)
998 for (i = 0; i < width; i++) {
999 int r = src1[6*i + 0];
1000 int g = src1[6*i + 2];
1001 int b = src1[6*i + 4];
1003 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1004 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1008 static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
1009 const uint8_t *src1, const uint8_t *src2,
1010 int width, uint32_t *unused)
1014 for (i = 0; i < width; i++) {
1015 int r= src1[12*i + 0] + src1[12*i + 6];
1016 int g= src1[12*i + 2] + src1[12*i + 8];
1017 int b= src1[12*i + 4] + src1[12*i + 10];
1019 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1020 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1024 static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, int width,
1028 for (i = 0; i < width; i++) {
1033 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1037 static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
1038 const uint8_t *src1, const uint8_t *src2,
1039 int width, uint32_t *unused)
1042 for (i = 0; i < width; i++) {
1043 int b = src1[6*i + 0];
1044 int g = src1[6*i + 2];
1045 int r = src1[6*i + 4];
1047 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1048 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1052 static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
1053 const uint8_t *src1, const uint8_t *src2,
1054 int width, uint32_t *unused)
1057 for (i = 0; i < width; i++) {
1058 int b= src1[12*i + 0] + src1[12*i + 6];
1059 int g= src1[12*i + 2] + src1[12*i + 8];
1060 int r= src1[12*i + 4] + src1[12*i + 10];
1062 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1063 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1067 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1068 static inline void name(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)\
1071 for (i=0; i<width; i++) {\
1072 int b= (((const type*)src)[i]>>shb)&maskb;\
1073 int g= (((const type*)src)[i]>>shg)&maskg;\
1074 int r= (((const type*)src)[i]>>shr)&maskr;\
1076 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1080 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1081 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1082 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1083 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1084 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1085 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1086 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1087 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1089 static inline void abgrToA(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1092 for (i=0; i<width; i++) {
1097 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1098 static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
1101 for (i=0; i<width; i++) {\
1102 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1103 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1104 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1106 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1107 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1110 static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
1113 for (i=0; i<width; i++) {\
1114 int pix0= ((const type*)src)[2*i+0]>>shp;\
1115 int pix1= ((const type*)src)[2*i+1]>>shp;\
1116 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1117 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1118 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1119 g&= maskg|(2*maskg);\
1123 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1124 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1128 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1129 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1130 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1131 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1132 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1133 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1134 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1135 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1137 static inline void palToY(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1140 for (i=0; i<width; i++) {
1143 dst[i]= pal[d] & 0xFF;
1147 static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
1148 const uint8_t *src1, const uint8_t *src2,
1149 int width, uint32_t *pal)
1152 assert(src1 == src2);
1153 for (i=0; i<width; i++) {
1154 int p= pal[src1[i]];
1161 static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1164 for (i=0; i<width/8; i++) {
1167 dst[8*i+j]= ((d>>(7-j))&1)*255;
1171 static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1174 for (i=0; i<width/8; i++) {
1177 dst[8*i+j]= ((d>>(7-j))&1)*255;
1181 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
1182 const int16_t **lumSrc, int lumFilterSize,
1183 const int16_t *chrFilter, const int16_t **chrUSrc,
1184 const int16_t **chrVSrc,
1185 int chrFilterSize, const int16_t **alpSrc,
1186 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1187 uint8_t *aDest, int dstW, int chrDstW)
1189 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
1190 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
1191 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
1194 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
1195 const int16_t **lumSrc, int lumFilterSize,
1196 const int16_t *chrFilter, const int16_t **chrUSrc,
1197 const int16_t **chrVSrc,
1198 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
1199 int dstW, int chrDstW, enum PixelFormat dstFormat)
1201 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
1202 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
1203 dest, uDest, dstW, chrDstW, dstFormat);
1206 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1207 const int16_t *chrUSrc, const int16_t *chrVSrc,
1208 const int16_t *alpSrc,
1209 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1210 uint8_t *aDest, int dstW, int chrDstW)
1213 for (i=0; i<dstW; i++) {
1214 int val= (lumSrc[i]+64)>>7;
1215 dest[i]= av_clip_uint8(val);
1219 for (i=0; i<chrDstW; i++) {
1220 int u=(chrUSrc[i]+64)>>7;
1221 int v=(chrVSrc[i]+64)>>7;
1222 uDest[i]= av_clip_uint8(u);
1223 vDest[i]= av_clip_uint8(v);
1226 if (CONFIG_SWSCALE_ALPHA && aDest)
1227 for (i=0; i<dstW; i++) {
1228 int val= (alpSrc[i]+64)>>7;
1229 aDest[i]= av_clip_uint8(val);
1235 * vertical scale YV12 to RGB
1237 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
1238 const int16_t **lumSrc, int lumFilterSize,
1239 const int16_t *chrFilter, const int16_t **chrUSrc,
1240 const int16_t **chrVSrc,
1241 int chrFilterSize, const int16_t **alpSrc,
1242 uint8_t *dest, int dstW, int dstY)
1244 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1245 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
1246 alpSrc, dest, dstW, dstY);
1250 * vertical bilinear scale YV12 to RGB
1252 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1253 const uint16_t *buf1, const uint16_t *ubuf0,
1254 const uint16_t *ubuf1, const uint16_t *vbuf0,
1255 const uint16_t *vbuf1, const uint16_t *abuf0,
1256 const uint16_t *abuf1, uint8_t *dest, int dstW,
1257 int yalpha, int uvalpha, int y)
1259 int yalpha1=4095- yalpha;
1260 int uvalpha1=4095-uvalpha;
1263 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1267 * YV12 to RGB without scaling or interpolating
1269 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1270 const uint16_t *ubuf0, const uint16_t *ubuf1,
1271 const uint16_t *vbuf0, const uint16_t *vbuf1,
1272 const uint16_t *abuf0, uint8_t *dest, int dstW,
1273 int uvalpha, enum PixelFormat dstFormat,
1276 const int yalpha1=0;
1279 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1280 const int yalpha= 4096; //FIXME ...
1282 if (uvalpha < 2048) {
1283 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1285 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1289 //FIXME yuy2* can read up to 7 samples too much
1291 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1295 for (i=0; i<width; i++)
1299 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1300 const uint8_t *src2, int width, uint32_t *unused)
1303 for (i=0; i<width; i++) {
1304 dstU[i]= src1[4*i + 1];
1305 dstV[i]= src1[4*i + 3];
1307 assert(src1 == src2);
1310 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1311 const uint8_t *src2, int width, uint32_t *unused)
1314 // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
1315 // we need to skip each second pixel. Same for BEToUV.
1316 for (i=0; i<width; i++) {
1317 dstU[i]= src1[2*i + 1];
1318 dstV[i]= src2[2*i + 1];
1322 /* This is almost identical to the previous, end exists only because
1323 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1324 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1328 for (i=0; i<width; i++)
1332 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1333 const uint8_t *src2, int width, uint32_t *unused)
1336 for (i=0; i<width; i++) {
1337 dstU[i]= src1[4*i + 0];
1338 dstV[i]= src1[4*i + 2];
1340 assert(src1 == src2);
1343 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1344 const uint8_t *src2, int width, uint32_t *unused)
1347 for (i=0; i<width; i++) {
1353 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1354 const uint8_t *src, int width)
1357 for (i = 0; i < width; i++) {
1358 dst1[i] = src[2*i+0];
1359 dst2[i] = src[2*i+1];
1363 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1364 const uint8_t *src1, const uint8_t *src2,
1365 int width, uint32_t *unused)
1367 nvXXtoUV_c(dstU, dstV, src1, width);
1370 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1371 const uint8_t *src1, const uint8_t *src2,
1372 int width, uint32_t *unused)
1374 nvXXtoUV_c(dstV, dstU, src1, width);
1377 // FIXME Maybe dither instead.
1378 #define YUV_NBPS(depth, endianness, rfunc) \
1379 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1380 const uint8_t *_srcU, const uint8_t *_srcV, \
1381 int width, uint32_t *unused) \
1384 const uint16_t *srcU = (const uint16_t*)_srcU; \
1385 const uint16_t *srcV = (const uint16_t*)_srcV; \
1386 for (i = 0; i < width; i++) { \
1387 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1388 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1392 static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, int width, uint32_t *unused) \
1395 const uint16_t *srcY = (const uint16_t*)_srcY; \
1396 for (i = 0; i < width; i++) \
1397 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1400 YUV_NBPS( 9, LE, AV_RL16)
1401 YUV_NBPS( 9, BE, AV_RB16)
1402 YUV_NBPS(10, LE, AV_RL16)
1403 YUV_NBPS(10, BE, AV_RB16)
1405 static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1406 int width, uint32_t *unused)
1409 for (i=0; i<width; i++) {
1414 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1418 static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1419 const uint8_t *src2, int width, uint32_t *unused)
1422 for (i=0; i<width; i++) {
1423 int b= src1[3*i + 0];
1424 int g= src1[3*i + 1];
1425 int r= src1[3*i + 2];
1427 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1428 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1430 assert(src1 == src2);
1433 static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1434 const uint8_t *src2, int width, uint32_t *unused)
1437 for (i=0; i<width; i++) {
1438 int b= src1[6*i + 0] + src1[6*i + 3];
1439 int g= src1[6*i + 1] + src1[6*i + 4];
1440 int r= src1[6*i + 2] + src1[6*i + 5];
1442 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1443 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1445 assert(src1 == src2);
1448 static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1452 for (i=0; i<width; i++) {
1457 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1461 static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1462 const uint8_t *src2, int width, uint32_t *unused)
1466 for (i=0; i<width; i++) {
1467 int r= src1[3*i + 0];
1468 int g= src1[3*i + 1];
1469 int b= src1[3*i + 2];
1471 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1472 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1476 static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1477 const uint8_t *src2, int width, uint32_t *unused)
1481 for (i=0; i<width; i++) {
1482 int r= src1[6*i + 0] + src1[6*i + 3];
1483 int g= src1[6*i + 1] + src1[6*i + 4];
1484 int b= src1[6*i + 2] + src1[6*i + 5];
1486 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1487 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1492 // bilinear / bicubic scaling
1493 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1495 const int16_t *filter, const int16_t *filterPos,
1499 for (i=0; i<dstW; i++) {
1501 int srcPos= filterPos[i];
1503 for (j=0; j<filterSize; j++) {
1504 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1506 //filter += hFilterSize;
1507 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1512 //FIXME all pal and rgb srcFormats could do this convertion as well
1513 //FIXME all scalers more complex than bilinear could do half of this transform
1514 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1517 for (i = 0; i < width; i++) {
1518 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1519 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1522 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1525 for (i = 0; i < width; i++) {
1526 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1527 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1530 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1533 for (i = 0; i < width; i++)
1534 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1536 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1539 for (i = 0; i < width; i++)
1540 dst[i] = (dst[i]*14071 + 33561947)>>14;
1543 static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1544 const uint8_t *src, int srcW, int xInc)
1547 unsigned int xpos=0;
1548 for (i=0;i<dstWidth;i++) {
1549 register unsigned int xx=xpos>>16;
1550 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1551 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1556 // *** horizontal scale Y line to temp buffer
1557 static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
1558 const uint8_t *src, int srcW, int xInc,
1559 const int16_t *hLumFilter,
1560 const int16_t *hLumFilterPos, int hLumFilterSize,
1561 uint8_t *formatConvBuffer,
1562 uint32_t *pal, int isAlpha)
1564 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1565 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1567 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
1570 toYV12(formatConvBuffer, src, srcW, pal);
1571 src= formatConvBuffer;
1574 if (!c->hyscale_fast) {
1575 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1576 } else { // fast bilinear upscale / crap downscale
1577 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1581 convertRange(dst, dstWidth);
1584 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1585 int dstWidth, const uint8_t *src1,
1586 const uint8_t *src2, int srcW, int xInc)
1589 unsigned int xpos=0;
1590 for (i=0;i<dstWidth;i++) {
1591 register unsigned int xx=xpos>>16;
1592 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1593 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1594 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1599 inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1600 const uint8_t *src1, const uint8_t *src2,
1601 int srcW, int xInc, const int16_t *hChrFilter,
1602 const int16_t *hChrFilterPos, int hChrFilterSize,
1603 uint8_t *formatConvBuffer, uint32_t *pal)
1606 src1 += c->chrSrcOffset;
1607 src2 += c->chrSrcOffset;
1610 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1611 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1612 src1= formatConvBuffer;
1616 if (!c->hcscale_fast) {
1617 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1618 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1619 } else { // fast bilinear upscale / crap downscale
1620 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1623 if (c->chrConvertRange)
1624 c->chrConvertRange(dst1, dst2, dstWidth);
1627 #define DEBUG_SWSCALE_BUFFERS 0
1628 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1630 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
1631 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
1633 /* load a few things into local vars to make the code more readable? and faster */
1634 const int srcW= c->srcW;
1635 const int dstW= c->dstW;
1636 const int dstH= c->dstH;
1637 const int chrDstW= c->chrDstW;
1638 const int chrSrcW= c->chrSrcW;
1639 const int lumXInc= c->lumXInc;
1640 const int chrXInc= c->chrXInc;
1641 const enum PixelFormat dstFormat= c->dstFormat;
1642 const int flags= c->flags;
1643 int16_t *vLumFilterPos= c->vLumFilterPos;
1644 int16_t *vChrFilterPos= c->vChrFilterPos;
1645 int16_t *hLumFilterPos= c->hLumFilterPos;
1646 int16_t *hChrFilterPos= c->hChrFilterPos;
1647 int16_t *vLumFilter= c->vLumFilter;
1648 int16_t *vChrFilter= c->vChrFilter;
1649 int16_t *hLumFilter= c->hLumFilter;
1650 int16_t *hChrFilter= c->hChrFilter;
1651 int32_t *lumMmxFilter= c->lumMmxFilter;
1652 int32_t *chrMmxFilter= c->chrMmxFilter;
1653 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1654 const int vLumFilterSize= c->vLumFilterSize;
1655 const int vChrFilterSize= c->vChrFilterSize;
1656 const int hLumFilterSize= c->hLumFilterSize;
1657 const int hChrFilterSize= c->hChrFilterSize;
1658 int16_t **lumPixBuf= c->lumPixBuf;
1659 int16_t **chrUPixBuf= c->chrUPixBuf;
1660 int16_t **chrVPixBuf= c->chrVPixBuf;
1661 int16_t **alpPixBuf= c->alpPixBuf;
1662 const int vLumBufSize= c->vLumBufSize;
1663 const int vChrBufSize= c->vChrBufSize;
1664 uint8_t *formatConvBuffer= c->formatConvBuffer;
1665 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1666 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1668 uint32_t *pal=c->pal_yuv;
1670 /* vars which will change and which we need to store back in the context */
1672 int lumBufIndex= c->lumBufIndex;
1673 int chrBufIndex= c->chrBufIndex;
1674 int lastInLumBuf= c->lastInLumBuf;
1675 int lastInChrBuf= c->lastInChrBuf;
1677 if (isPacked(c->srcFormat)) {
1685 srcStride[3]= srcStride[0];
1687 srcStride[1]<<= c->vChrDrop;
1688 srcStride[2]<<= c->vChrDrop;
1690 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1691 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1692 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1693 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1694 srcSliceY, srcSliceH, dstY, dstH);
1695 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1696 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1698 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1699 static int warnedAlready=0; //FIXME move this into the context perhaps
1700 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1701 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1702 " ->cannot do aligned memory accesses anymore\n");
1707 /* Note the user might start scaling the picture in the middle so this
1708 will not get executed. This is not really intended but works
1709 currently, so people might do it. */
1710 if (srcSliceY ==0) {
1720 for (;dstY < dstH; dstY++) {
1721 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1722 const int chrDstY= dstY>>c->chrDstVSubSample;
1723 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1724 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1725 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1727 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1728 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1729 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1730 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1731 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1732 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1735 //handle holes (FAST_BILINEAR & weird filters)
1736 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1737 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1738 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1739 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1741 DEBUG_BUFFERS("dstY: %d\n", dstY);
1742 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1743 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1744 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1745 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1747 // Do we have enough lines in this slice to output the dstY line
1748 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1750 if (!enough_lines) {
1751 lastLumSrcY = srcSliceY + srcSliceH - 1;
1752 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1753 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1754 lastLumSrcY, lastChrSrcY);
1757 //Do horizontal scaling
1758 while(lastInLumBuf < lastLumSrcY) {
1759 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1760 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1762 assert(lumBufIndex < 2*vLumBufSize);
1763 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1764 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1765 hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1766 hLumFilter, hLumFilterPos, hLumFilterSize,
1769 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1770 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1771 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1775 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1776 lumBufIndex, lastInLumBuf);
1778 while(lastInChrBuf < lastChrSrcY) {
1779 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1780 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1782 assert(chrBufIndex < 2*vChrBufSize);
1783 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1784 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1785 //FIXME replace parameters through context struct (some at least)
1787 if (c->needs_hcscale)
1788 hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1789 chrDstW, src1, src2, chrSrcW, chrXInc,
1790 hChrFilter, hChrFilterPos, hChrFilterSize,
1791 formatConvBuffer, pal);
1793 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1794 chrBufIndex, lastInChrBuf);
1796 //wrap buf index around to stay inside the ring buffer
1797 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1798 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1800 break; //we can't output a dstY line so let's try with the next slice
1803 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1805 if (dstY < dstH-2) {
1806 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1807 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1808 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1809 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1810 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1811 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1812 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1814 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1815 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1816 dest, uDest, dstW, chrDstW, dstFormat);
1817 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1818 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1819 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1820 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1821 yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1822 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1823 chrVSrcPtr, vChrFilterSize,
1824 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
1825 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
1827 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1828 const int16_t *lumBuf = lumSrcPtr[0];
1829 const int16_t *chrUBuf= chrUSrcPtr[0];
1830 const int16_t *chrVBuf= chrVSrcPtr[0];
1831 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1832 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1833 uDest, vDest, aDest, dstW, chrDstW);
1834 } else { //General YV12
1836 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1837 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1838 chrVSrcPtr, vChrFilterSize,
1839 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1842 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1843 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1844 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1845 int chrAlpha= vChrFilter[2*dstY+1];
1846 if(flags & SWS_FULL_CHR_H_INT) {
1847 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
1848 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1849 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
1850 chrVSrcPtr, vChrFilterSize,
1851 alpSrcPtr, dest, dstW, dstY);
1853 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1854 *chrVSrcPtr, *(chrVSrcPtr+1),
1855 alpPixBuf ? *alpSrcPtr : NULL,
1856 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1858 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1859 int lumAlpha= vLumFilter[2*dstY+1];
1860 int chrAlpha= vChrFilter[2*dstY+1];
1862 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1864 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1865 if(flags & SWS_FULL_CHR_H_INT) {
1866 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
1867 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1868 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1869 alpSrcPtr, dest, dstW, dstY);
1871 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1872 *chrVSrcPtr, *(chrVSrcPtr+1),
1873 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1874 dest, dstW, lumAlpha, chrAlpha, dstY);
1876 } else { //general RGB
1877 if(flags & SWS_FULL_CHR_H_INT) {
1879 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1880 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1881 alpSrcPtr, dest, dstW, dstY);
1884 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1885 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1886 alpSrcPtr, dest, dstW, dstY);
1890 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
1891 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1892 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1893 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1894 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1895 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1896 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1897 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1899 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1900 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1901 dest, uDest, dstW, chrDstW, dstFormat);
1902 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
1903 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1904 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1905 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1907 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1908 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1909 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
1913 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1914 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1915 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1918 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1919 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1920 if(flags & SWS_FULL_CHR_H_INT) {
1922 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1923 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1924 alpSrcPtr, dest, dstW, dstY);
1927 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1928 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1929 alpSrcPtr, dest, dstW, dstY);
1935 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1936 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1939 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1940 __asm__ volatile("sfence":::"memory");
1944 /* store changed local vars back in the context */
1946 c->lumBufIndex= lumBufIndex;
1947 c->chrBufIndex= chrBufIndex;
1948 c->lastInLumBuf= lastInLumBuf;
1949 c->lastInChrBuf= lastInChrBuf;
1951 return dstY - lastDstY;
1954 static void sws_init_swScale_c(SwsContext *c)
1956 enum PixelFormat srcFormat = c->srcFormat;
1958 c->yuv2nv12X = yuv2nv12X_c;
1959 c->yuv2yuv1 = yuv2yuv1_c;
1960 c->yuv2yuvX = yuv2yuvX_c;
1961 c->yuv2packed1 = yuv2packed1_c;
1962 c->yuv2packed2 = yuv2packed2_c;
1963 c->yuv2packedX = yuv2packedX_c;
1965 c->hScale = hScale_c;
1967 if (c->flags & SWS_FAST_BILINEAR)
1969 c->hyscale_fast = hyscale_fast_c;
1970 c->hcscale_fast = hcscale_fast_c;
1973 c->chrToYV12 = NULL;
1975 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1976 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1977 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1978 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1982 case PIX_FMT_BGR4_BYTE:
1983 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
1984 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1985 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1986 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1987 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1988 case PIX_FMT_YUV420P16BE:
1989 case PIX_FMT_YUV422P16BE:
1990 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1991 case PIX_FMT_YUV420P16LE:
1992 case PIX_FMT_YUV422P16LE:
1993 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1995 if (c->chrSrcHSubSample) {
1997 case PIX_FMT_RGB48BE:
1998 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
1999 case PIX_FMT_BGR48BE:
2000 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
2001 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break;
2002 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
2003 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2004 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
2005 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
2006 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break;
2007 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
2008 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2009 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
2010 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
2014 case PIX_FMT_RGB48BE:
2015 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
2016 case PIX_FMT_BGR48BE:
2017 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
2018 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break;
2019 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
2020 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2021 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
2022 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
2023 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break;
2024 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
2025 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2026 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
2027 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
2031 c->lumToYV12 = NULL;
2032 c->alpToYV12 = NULL;
2033 switch (srcFormat) {
2034 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2035 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2036 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2037 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2038 case PIX_FMT_YUYV422 :
2039 case PIX_FMT_YUV420P16BE:
2040 case PIX_FMT_YUV422P16BE:
2041 case PIX_FMT_YUV444P16BE:
2042 case PIX_FMT_Y400A :
2043 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2044 case PIX_FMT_UYVY422 :
2045 case PIX_FMT_YUV420P16LE:
2046 case PIX_FMT_YUV422P16LE:
2047 case PIX_FMT_YUV444P16LE:
2048 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2049 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2050 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
2051 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
2052 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2053 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
2054 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
2058 case PIX_FMT_BGR4_BYTE:
2059 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
2060 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
2061 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
2062 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break;
2063 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
2064 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break;
2065 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
2066 case PIX_FMT_RGB48BE:
2067 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
2068 case PIX_FMT_BGR48BE:
2069 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
2072 switch (srcFormat) {
2073 case PIX_FMT_RGB32 :
2074 case PIX_FMT_RGB32_1:
2075 case PIX_FMT_BGR32 :
2076 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
2077 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
2081 switch (srcFormat) {
2082 case PIX_FMT_Y400A :
2083 c->alpSrcOffset = 1;
2085 case PIX_FMT_RGB32 :
2086 case PIX_FMT_BGR32 :
2087 c->alpSrcOffset = 3;
2089 case PIX_FMT_RGB48LE:
2090 case PIX_FMT_BGR48LE:
2091 c->lumSrcOffset = 1;
2092 c->chrSrcOffset = 1;
2093 c->alpSrcOffset = 1;
2097 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2099 c->lumConvertRange = lumRangeFromJpeg_c;
2100 c->chrConvertRange = chrRangeFromJpeg_c;
2102 c->lumConvertRange = lumRangeToJpeg_c;
2103 c->chrConvertRange = chrRangeToJpeg_c;
2107 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2108 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2109 c->needs_hcscale = 1;
2112 SwsFunc ff_getSwsFunc(SwsContext *c)
2114 sws_init_swScale_c(c);
2117 ff_sws_init_swScale_mmx(c);
2119 ff_sws_init_swScale_altivec(c);
2124 static void copyPlane(const uint8_t *src, int srcStride,
2125 int srcSliceY, int srcSliceH, int width,
2126 uint8_t *dst, int dstStride)
2128 dst += dstStride * srcSliceY;
2129 if (dstStride == srcStride && srcStride > 0) {
2130 memcpy(dst, src, srcSliceH * dstStride);
2133 for (i=0; i<srcSliceH; i++) {
2134 memcpy(dst, src, width);
2141 static int planarToNv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2142 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2144 uint8_t *dst = dstParam[1] + dstStride[1]*srcSliceY/2;
2146 copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
2147 dstParam[0], dstStride[0]);
2149 if (c->dstFormat == PIX_FMT_NV12)
2150 interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
2152 interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
2157 static int planarToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2158 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2160 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2162 yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2167 static int planarToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2168 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2170 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2172 yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2177 static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2178 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2180 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2182 yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2187 static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2188 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2190 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2192 yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2197 static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2198 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2200 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2201 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2202 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2204 yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2207 fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2212 static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2213 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2215 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2216 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2217 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2219 yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2224 static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2225 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2227 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2228 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2229 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2231 uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2234 fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2239 static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2240 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2242 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2243 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2244 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2246 uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2251 static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2254 for (i=0; i<num_pixels; i++)
2255 ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
2258 static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2262 for (i=0; i<num_pixels; i++)
2263 ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
2266 static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2270 for (i=0; i<num_pixels; i++) {
2272 dst[0]= palette[src[i<<1]*4+0];
2273 dst[1]= palette[src[i<<1]*4+1];
2274 dst[2]= palette[src[i<<1]*4+2];
2279 static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2280 int srcSliceH, uint8_t* dst[], int dstStride[])
2282 const enum PixelFormat srcFormat= c->srcFormat;
2283 const enum PixelFormat dstFormat= c->dstFormat;
2284 void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
2285 const uint8_t *palette)=NULL;
2287 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2288 const uint8_t *srcPtr= src[0];
2290 if (srcFormat == PIX_FMT_Y400A) {
2291 switch (dstFormat) {
2292 case PIX_FMT_RGB32 : conv = gray8aToPacked32; break;
2293 case PIX_FMT_BGR32 : conv = gray8aToPacked32; break;
2294 case PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break;
2295 case PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break;
2296 case PIX_FMT_RGB24 : conv = gray8aToPacked24; break;
2297 case PIX_FMT_BGR24 : conv = gray8aToPacked24; break;
2299 } else if (usePal(srcFormat)) {
2300 switch (dstFormat) {
2301 case PIX_FMT_RGB32 : conv = sws_convertPalette8ToPacked32; break;
2302 case PIX_FMT_BGR32 : conv = sws_convertPalette8ToPacked32; break;
2303 case PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break;
2304 case PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break;
2305 case PIX_FMT_RGB24 : conv = sws_convertPalette8ToPacked24; break;
2306 case PIX_FMT_BGR24 : conv = sws_convertPalette8ToPacked24; break;
2311 av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2312 sws_format_name(srcFormat), sws_format_name(dstFormat));
2314 for (i=0; i<srcSliceH; i++) {
2315 conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
2316 srcPtr+= srcStride[0];
2317 dstPtr+= dstStride[0];
2324 #define isRGBA32(x) ( \
2325 (x) == PIX_FMT_ARGB \
2326 || (x) == PIX_FMT_RGBA \
2327 || (x) == PIX_FMT_BGRA \
2328 || (x) == PIX_FMT_ABGR \
2331 /* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
2332 static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2333 int srcSliceH, uint8_t* dst[], int dstStride[])
2335 const enum PixelFormat srcFormat= c->srcFormat;
2336 const enum PixelFormat dstFormat= c->dstFormat;
2337 const int srcBpp= (c->srcFormatBpp + 7) >> 3;
2338 const int dstBpp= (c->dstFormatBpp + 7) >> 3;
2339 const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
2340 const int dstId= c->dstFormatBpp >> 2;
2341 void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;
2343 #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
2345 if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
2346 if ( CONV_IS(ABGR, RGBA)
2347 || CONV_IS(ARGB, BGRA)
2348 || CONV_IS(BGRA, ARGB)
2349 || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210;
2350 else if (CONV_IS(ABGR, ARGB)
2351 || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321;
2352 else if (CONV_IS(ABGR, BGRA)
2353 || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230;
2354 else if (CONV_IS(BGRA, RGBA)
2355 || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
2356 else if (CONV_IS(BGRA, ABGR)
2357 || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
2360 if ( (isBGRinInt(srcFormat) && isBGRinInt(dstFormat))
2361 || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) {
2362 switch(srcId | (dstId<<4)) {
2363 case 0x34: conv= rgb16to15; break;
2364 case 0x36: conv= rgb24to15; break;
2365 case 0x38: conv= rgb32to15; break;
2366 case 0x43: conv= rgb15to16; break;
2367 case 0x46: conv= rgb24to16; break;
2368 case 0x48: conv= rgb32to16; break;
2369 case 0x63: conv= rgb15to24; break;
2370 case 0x64: conv= rgb16to24; break;
2371 case 0x68: conv= rgb32to24; break;
2372 case 0x83: conv= rgb15to32; break;
2373 case 0x84: conv= rgb16to32; break;
2374 case 0x86: conv= rgb24to32; break;
2376 } else if ( (isBGRinInt(srcFormat) && isRGBinInt(dstFormat))
2377 || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) {
2378 switch(srcId | (dstId<<4)) {
2379 case 0x33: conv= rgb15tobgr15; break;
2380 case 0x34: conv= rgb16tobgr15; break;
2381 case 0x36: conv= rgb24tobgr15; break;
2382 case 0x38: conv= rgb32tobgr15; break;
2383 case 0x43: conv= rgb15tobgr16; break;
2384 case 0x44: conv= rgb16tobgr16; break;
2385 case 0x46: conv= rgb24tobgr16; break;
2386 case 0x48: conv= rgb32tobgr16; break;
2387 case 0x63: conv= rgb15tobgr24; break;
2388 case 0x64: conv= rgb16tobgr24; break;
2389 case 0x66: conv= rgb24tobgr24; break;
2390 case 0x68: conv= rgb32tobgr24; break;
2391 case 0x83: conv= rgb15tobgr32; break;
2392 case 0x84: conv= rgb16tobgr32; break;
2393 case 0x86: conv= rgb24tobgr32; break;
2398 av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2399 sws_format_name(srcFormat), sws_format_name(dstFormat));
2401 const uint8_t *srcPtr= src[0];
2402 uint8_t *dstPtr= dst[0];
2403 if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat))
2404 srcPtr += ALT32_CORR;
2406 if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat))
2407 dstPtr += ALT32_CORR;
2409 if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
2410 conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
2413 dstPtr += dstStride[0]*srcSliceY;
2415 for (i=0; i<srcSliceH; i++) {
2416 conv(srcPtr, dstPtr, c->srcW*srcBpp);
2417 srcPtr+= srcStride[0];
2418 dstPtr+= dstStride[0];
2425 static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2426 int srcSliceH, uint8_t* dst[], int dstStride[])
2430 dst[0]+ srcSliceY *dstStride[0],
2431 dst[1]+(srcSliceY>>1)*dstStride[1],
2432 dst[2]+(srcSliceY>>1)*dstStride[2],
2434 dstStride[0], dstStride[1], srcStride[0]);
2436 fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2440 static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2441 int srcSliceH, uint8_t* dst[], int dstStride[])
2443 copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
2444 dst[0], dstStride[0]);
2446 planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2447 srcSliceH >> 2, srcStride[1], dstStride[1]);
2448 planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2449 srcSliceH >> 2, srcStride[2], dstStride[2]);
2451 fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2455 /* unscaled copy like stuff (assumes nearly identical formats) */
2456 static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2457 int srcSliceH, uint8_t* dst[], int dstStride[])
2459 if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2460 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
2463 const uint8_t *srcPtr= src[0];
2464 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2467 /* universal length finder */
2468 while(length+c->srcW <= FFABS(dstStride[0])
2469 && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
2472 for (i=0; i<srcSliceH; i++) {
2473 memcpy(dstPtr, srcPtr, length);
2474 srcPtr+= srcStride[0];
2475 dstPtr+= dstStride[0];
2481 static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2482 int srcSliceH, uint8_t* dst[], int dstStride[])
2485 for (plane=0; plane<4; plane++) {
2486 int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
2487 int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
2488 int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2489 const uint8_t *srcPtr= src[plane];
2490 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
2492 if (!dst[plane]) continue;
2493 // ignore palette for GRAY8
2494 if (plane == 1 && !dst[2]) continue;
2495 if (!src[plane] || (plane == 1 && !src[2])) {
2496 if(is16BPS(c->dstFormat))
2498 fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
2500 if(is9_OR_10BPS(c->srcFormat)) {
2501 const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
2502 const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
2503 const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
2505 if (is16BPS(c->dstFormat)) {
2506 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
2507 #define COPY9_OR_10TO16(rfunc, wfunc) \
2508 for (i = 0; i < height; i++) { \
2509 for (j = 0; j < length; j++) { \
2510 int srcpx = rfunc(&srcPtr2[j]); \
2511 wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
2513 dstPtr2 += dstStride[plane]/2; \
2514 srcPtr2 += srcStride[plane]/2; \
2516 if (isBE(c->dstFormat)) {
2517 if (isBE(c->srcFormat)) {
2518 COPY9_OR_10TO16(AV_RB16, AV_WB16);
2520 COPY9_OR_10TO16(AV_RL16, AV_WB16);
2523 if (isBE(c->srcFormat)) {
2524 COPY9_OR_10TO16(AV_RB16, AV_WL16);
2526 COPY9_OR_10TO16(AV_RL16, AV_WL16);
2529 } else if (is9_OR_10BPS(c->dstFormat)) {
2530 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
2531 #define COPY9_OR_10TO9_OR_10(loop) \
2532 for (i = 0; i < height; i++) { \
2533 for (j = 0; j < length; j++) { \
2536 dstPtr2 += dstStride[plane]/2; \
2537 srcPtr2 += srcStride[plane]/2; \
2539 #define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
2540 if (dst_depth > src_depth) { \
2541 COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
2542 wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
2543 } else if (dst_depth < src_depth) { \
2544 COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \
2546 COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
2548 if (isBE(c->dstFormat)) {
2549 if (isBE(c->srcFormat)) {
2550 COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
2552 COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
2555 if (isBE(c->srcFormat)) {
2556 COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
2558 COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
2562 // FIXME Maybe dither instead.
2563 #define COPY9_OR_10TO8(rfunc) \
2564 for (i = 0; i < height; i++) { \
2565 for (j = 0; j < length; j++) { \
2566 dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \
2568 dstPtr += dstStride[plane]; \
2569 srcPtr2 += srcStride[plane]/2; \
2571 if (isBE(c->srcFormat)) {
2572 COPY9_OR_10TO8(AV_RB16);
2574 COPY9_OR_10TO8(AV_RL16);
2577 } else if(is9_OR_10BPS(c->dstFormat)) {
2578 const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
2579 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
2581 if (is16BPS(c->srcFormat)) {
2582 const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
2583 #define COPY16TO9_OR_10(rfunc, wfunc) \
2584 for (i = 0; i < height; i++) { \
2585 for (j = 0; j < length; j++) { \
2586 wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \
2588 dstPtr2 += dstStride[plane]/2; \
2589 srcPtr2 += srcStride[plane]/2; \
2591 if (isBE(c->dstFormat)) {
2592 if (isBE(c->srcFormat)) {
2593 COPY16TO9_OR_10(AV_RB16, AV_WB16);
2595 COPY16TO9_OR_10(AV_RL16, AV_WB16);
2598 if (isBE(c->srcFormat)) {
2599 COPY16TO9_OR_10(AV_RB16, AV_WL16);
2601 COPY16TO9_OR_10(AV_RL16, AV_WL16);
2605 #define COPY8TO9_OR_10(wfunc) \
2606 for (i = 0; i < height; i++) { \
2607 for (j = 0; j < length; j++) { \
2608 const int srcpx = srcPtr[j]; \
2609 wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
2611 dstPtr2 += dstStride[plane]/2; \
2612 srcPtr += srcStride[plane]; \
2614 if (isBE(c->dstFormat)) {
2615 COPY8TO9_OR_10(AV_WB16);
2617 COPY8TO9_OR_10(AV_WL16);
2620 } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
2621 if (!isBE(c->srcFormat)) srcPtr++;
2622 for (i=0; i<height; i++) {
2623 for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
2624 srcPtr+= srcStride[plane];
2625 dstPtr+= dstStride[plane];
2627 } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
2628 for (i=0; i<height; i++) {
2629 for (j=0; j<length; j++) {
2630 dstPtr[ j<<1 ] = srcPtr[j];
2631 dstPtr[(j<<1)+1] = srcPtr[j];
2633 srcPtr+= srcStride[plane];
2634 dstPtr+= dstStride[plane];
2636 } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
2637 && isBE(c->srcFormat) != isBE(c->dstFormat)) {
2639 for (i=0; i<height; i++) {
2640 for (j=0; j<length; j++)
2641 ((uint16_t*)dstPtr)[j] = av_bswap16(((const uint16_t*)srcPtr)[j]);
2642 srcPtr+= srcStride[plane];
2643 dstPtr+= dstStride[plane];
2645 } else if (dstStride[plane] == srcStride[plane] &&
2646 srcStride[plane] > 0 && srcStride[plane] == length) {
2647 memcpy(dst[plane] + dstStride[plane]*y, src[plane],
2648 height*dstStride[plane]);
2650 if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
2652 for (i=0; i<height; i++) {
2653 memcpy(dstPtr, srcPtr, length);
2654 srcPtr+= srcStride[plane];
2655 dstPtr+= dstStride[plane];
2663 void ff_get_unscaled_swscale(SwsContext *c)
2665 const enum PixelFormat srcFormat = c->srcFormat;
2666 const enum PixelFormat dstFormat = c->dstFormat;
2667 const int flags = c->flags;
2668 const int dstH = c->dstH;
2671 needsDither= isAnyRGB(dstFormat)
2672 && c->dstFormatBpp < 24
2673 && (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat)));
2676 if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) {
2677 c->swScale= planarToNv12Wrapper;
2680 if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && isAnyRGB(dstFormat)
2681 && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) {
2682 c->swScale= ff_yuv2rgb_get_func_ptr(c);
2685 if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) {
2686 c->swScale= yvu9ToYv12Wrapper;
2690 if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
2691 c->swScale= bgr24ToYv12Wrapper;
2693 /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2694 if ( isAnyRGB(srcFormat)
2695 && isAnyRGB(dstFormat)
2696 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8
2697 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8
2698 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4
2699 && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4
2700 && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2701 && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2702 && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2703 && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2704 && srcFormat != PIX_FMT_RGB48LE && dstFormat != PIX_FMT_RGB48LE
2705 && srcFormat != PIX_FMT_RGB48BE && dstFormat != PIX_FMT_RGB48BE
2706 && srcFormat != PIX_FMT_BGR48LE && dstFormat != PIX_FMT_BGR48LE
2707 && srcFormat != PIX_FMT_BGR48BE && dstFormat != PIX_FMT_BGR48BE
2708 && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2709 c->swScale= rgbToRgbWrapper;
2711 if ((usePal(srcFormat) && (
2712 dstFormat == PIX_FMT_RGB32 ||
2713 dstFormat == PIX_FMT_RGB32_1 ||
2714 dstFormat == PIX_FMT_RGB24 ||
2715 dstFormat == PIX_FMT_BGR32 ||
2716 dstFormat == PIX_FMT_BGR32_1 ||
2717 dstFormat == PIX_FMT_BGR24)))
2718 c->swScale= palToRgbWrapper;
2720 if (srcFormat == PIX_FMT_YUV422P) {
2721 if (dstFormat == PIX_FMT_YUYV422)
2722 c->swScale= yuv422pToYuy2Wrapper;
2723 else if (dstFormat == PIX_FMT_UYVY422)
2724 c->swScale= yuv422pToUyvyWrapper;
2727 /* LQ converters if -sws 0 or -sws 4*/
2728 if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
2730 if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) {
2731 if (dstFormat == PIX_FMT_YUYV422)
2732 c->swScale= planarToYuy2Wrapper;
2733 else if (dstFormat == PIX_FMT_UYVY422)
2734 c->swScale= planarToUyvyWrapper;
2737 if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
2738 c->swScale= yuyvToYuv420Wrapper;
2739 if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
2740 c->swScale= uyvyToYuv420Wrapper;
2741 if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
2742 c->swScale= yuyvToYuv422Wrapper;
2743 if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
2744 c->swScale= uyvyToYuv422Wrapper;
2747 if ( srcFormat == dstFormat
2748 || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
2749 || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
2750 || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2751 || (isPlanarYUV(dstFormat) && isGray(srcFormat))
2752 || (isGray(dstFormat) && isGray(srcFormat))
2753 || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat)
2754 && c->chrDstHSubSample == c->chrSrcHSubSample
2755 && c->chrDstVSubSample == c->chrSrcVSubSample
2756 && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21
2757 && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21))
2759 if (isPacked(c->srcFormat))
2760 c->swScale= packedCopyWrapper;
2761 else /* Planar YUV or gray */
2762 c->swScale= planarCopyWrapper;
2766 ff_bfin_get_unscaled_swscale(c);
2768 ff_swscale_get_unscaled_altivec(c);
2771 static void reset_ptr(const uint8_t* src[], int format)
2773 if(!isALPHA(format))
2775 if(!isPlanarYUV(format)) {
2778 if (!usePal(format))
2783 static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt,
2784 const int linesizes[4])
2786 const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
2789 for (i = 0; i < 4; i++) {
2790 int plane = desc->comp[i].plane;
2791 if (!data[plane] || !linesizes[plane])
2799 * swscale wrapper, so we don't need to export the SwsContext.
2800 * Assumes planar YUV to be in YUV order instead of YVU.
2802 int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], int srcSliceY,
2803 int srcSliceH, uint8_t* const dst[], const int dstStride[])
2806 const uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
2807 uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
2809 // do not mess up sliceDir if we have a "trailing" 0-size slice
2813 if (!check_image_pointers(src, c->srcFormat, srcStride)) {
2814 av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
2817 if (!check_image_pointers(dst, c->dstFormat, dstStride)) {
2818 av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
2822 if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2823 av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
2826 if (c->sliceDir == 0) {
2827 if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2830 if (usePal(c->srcFormat)) {
2831 for (i=0; i<256; i++) {
2832 int p, r, g, b,y,u,v;
2833 if(c->srcFormat == PIX_FMT_PAL8) {
2834 p=((const uint32_t*)(src[1]))[i];
2838 } else if(c->srcFormat == PIX_FMT_RGB8) {
2842 } else if(c->srcFormat == PIX_FMT_BGR8) {
2846 } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) {
2850 } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) {
2853 assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
2858 y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2859 u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2860 v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2861 c->pal_yuv[i]= y + (u<<8) + (v<<16);
2863 switch(c->dstFormat) {
2868 c->pal_rgb[i]= r + (g<<8) + (b<<16);
2870 case PIX_FMT_BGR32_1:
2874 c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
2876 case PIX_FMT_RGB32_1:
2880 c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
2887 c->pal_rgb[i]= b + (g<<8) + (r<<16);
2892 // copy strides, so they can safely be modified
2893 if (c->sliceDir == 1) {
2894 // slices go from top to bottom
2895 int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
2896 int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
2898 reset_ptr(src2, c->srcFormat);
2899 reset_ptr((const uint8_t**)dst2, c->dstFormat);
2901 /* reset slice direction at end of frame */
2902 if (srcSliceY + srcSliceH == c->srcH)
2905 return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2);
2907 // slices go from bottom to top => we flip the image internally
2908 int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
2909 int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
2911 src2[0] += (srcSliceH-1)*srcStride[0];
2912 if (!usePal(c->srcFormat))
2913 src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2914 src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2915 src2[3] += (srcSliceH-1)*srcStride[3];
2916 dst2[0] += ( c->dstH -1)*dstStride[0];
2917 dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1];
2918 dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2];
2919 dst2[3] += ( c->dstH -1)*dstStride[3];
2921 reset_ptr(src2, c->srcFormat);
2922 reset_ptr((const uint8_t**)dst2, c->dstFormat);
2924 /* reset slice direction at end of frame */
2928 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2932 /* Convert the palette to the same packed 32-bit format as the palette */
2933 void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2937 for (i=0; i<num_pixels; i++)
2938 ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
2941 /* Palette format: ABCD -> dst format: ABC */
2942 void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2946 for (i=0; i<num_pixels; i++) {
2948 dst[0]= palette[src[i]*4+0];
2949 dst[1]= palette[src[i]*4+1];
2950 dst[2]= palette[src[i]*4+2];