2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define isPacked(x) ( \
74 || (x)==PIX_FMT_YUYV422 \
75 || (x)==PIX_FMT_UYVY422 \
76 || (x)==PIX_FMT_Y400A \
80 #define RGB2YUV_SHIFT 15
81 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
82 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
84 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
85 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
86 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
87 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
88 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
89 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
91 static const double rgb2yuv_table[8][9]={
92 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
93 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
94 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
95 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
96 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
97 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
98 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
99 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
104 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
107 more intelligent misalignment avoidance for the horizontal scaler
108 write special vertical cubic upscale version
109 optimize C code (YV12 / minmax)
110 add support for packed pixel YUV input & output
111 add support for Y8 output
112 optimize BGR24 & BGR32
113 add BGR4 output support
114 write special BGR->BGR scaler
117 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
118 { 1, 3, 1, 3, 1, 3, 1, 3, },
119 { 2, 0, 2, 0, 2, 0, 2, 0, },
122 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
123 { 6, 2, 6, 2, 6, 2, 6, 2, },
124 { 0, 4, 0, 4, 0, 4, 0, 4, },
127 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
128 { 8, 4, 11, 7, 8, 4, 11, 7, },
129 { 2, 14, 1, 13, 2, 14, 1, 13, },
130 { 10, 6, 9, 5, 10, 6, 9, 5, },
131 { 0, 12, 3, 15, 0, 12, 3, 15, },
134 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
135 { 17, 9, 23, 15, 16, 8, 22, 14, },
136 { 5, 29, 3, 27, 4, 28, 2, 26, },
137 { 21, 13, 19, 11, 20, 12, 18, 10, },
138 { 0, 24, 6, 30, 1, 25, 7, 31, },
139 { 16, 8, 22, 14, 17, 9, 23, 15, },
140 { 4, 28, 2, 26, 5, 29, 3, 27, },
141 { 20, 12, 18, 10, 21, 13, 19, 11, },
142 { 1, 25, 7, 31, 0, 24, 6, 30, },
145 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
146 { 0, 55, 14, 68, 3, 58, 17, 72, },
147 { 37, 18, 50, 32, 40, 22, 54, 35, },
148 { 9, 64, 5, 59, 13, 67, 8, 63, },
149 { 46, 27, 41, 23, 49, 31, 44, 26, },
150 { 2, 57, 16, 71, 1, 56, 15, 70, },
151 { 39, 21, 52, 34, 38, 19, 51, 33, },
152 { 11, 66, 7, 62, 10, 65, 6, 60, },
153 { 48, 30, 43, 25, 47, 29, 42, 24, },
157 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
158 {117, 62, 158, 103, 113, 58, 155, 100, },
159 { 34, 199, 21, 186, 31, 196, 17, 182, },
160 {144, 89, 131, 76, 141, 86, 127, 72, },
161 { 0, 165, 41, 206, 10, 175, 52, 217, },
162 {110, 55, 151, 96, 120, 65, 162, 107, },
163 { 28, 193, 14, 179, 38, 203, 24, 189, },
164 {138, 83, 124, 69, 148, 93, 134, 79, },
165 { 7, 172, 48, 213, 3, 168, 45, 210, },
168 // tries to correct a gamma of 1.5
169 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
170 { 0, 143, 18, 200, 2, 156, 25, 215, },
171 { 78, 28, 125, 64, 89, 36, 138, 74, },
172 { 10, 180, 3, 161, 16, 195, 8, 175, },
173 {109, 51, 93, 38, 121, 60, 105, 47, },
174 { 1, 152, 23, 210, 0, 147, 20, 205, },
175 { 85, 33, 134, 71, 81, 30, 130, 67, },
176 { 14, 190, 6, 171, 12, 185, 5, 166, },
177 {117, 57, 101, 44, 113, 54, 97, 41, },
180 // tries to correct a gamma of 2.0
181 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
182 { 0, 124, 8, 193, 0, 140, 12, 213, },
183 { 55, 14, 104, 42, 66, 19, 119, 52, },
184 { 3, 168, 1, 145, 6, 187, 3, 162, },
185 { 86, 31, 70, 21, 99, 39, 82, 28, },
186 { 0, 134, 11, 206, 0, 129, 9, 200, },
187 { 62, 17, 114, 48, 58, 16, 109, 45, },
188 { 5, 181, 2, 157, 4, 175, 1, 151, },
189 { 95, 36, 78, 26, 90, 34, 74, 24, },
192 // tries to correct a gamma of 2.5
193 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
194 { 0, 107, 3, 187, 0, 125, 6, 212, },
195 { 39, 7, 86, 28, 49, 11, 102, 36, },
196 { 1, 158, 0, 131, 3, 180, 1, 151, },
197 { 68, 19, 52, 12, 81, 25, 64, 17, },
198 { 0, 119, 5, 203, 0, 113, 4, 195, },
199 { 45, 9, 96, 33, 42, 8, 91, 30, },
200 { 2, 172, 1, 144, 2, 165, 0, 137, },
201 { 77, 23, 60, 15, 72, 21, 56, 14, },
205 static av_always_inline void
206 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
207 int lumFilterSize, const int16_t *chrFilter,
208 const int16_t **chrUSrc, const int16_t **chrVSrc,
209 int chrFilterSize, const int16_t **alpSrc,
210 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
211 uint16_t *aDest, int dstW, int chrDstW,
212 int big_endian, int output_bits)
214 //FIXME Optimize (just quickly written not optimized..)
216 int shift = 11 + 16 - output_bits;
218 #define output_pixel(pos, val) \
220 if (output_bits == 16) { \
221 AV_WB16(pos, av_clip_uint16(val >> shift)); \
223 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
226 if (output_bits == 16) { \
227 AV_WL16(pos, av_clip_uint16(val >> shift)); \
229 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
232 for (i = 0; i < dstW; i++) {
233 int val = 1 << (26-output_bits);
236 for (j = 0; j < lumFilterSize; j++)
237 val += lumSrc[j][i] * lumFilter[j];
239 output_pixel(&dest[i], val);
243 for (i = 0; i < chrDstW; i++) {
244 int u = 1 << (26-output_bits);
245 int v = 1 << (26-output_bits);
248 for (j = 0; j < chrFilterSize; j++) {
249 u += chrUSrc[j][i] * chrFilter[j];
250 v += chrVSrc[j][i] * chrFilter[j];
253 output_pixel(&uDest[i], u);
254 output_pixel(&vDest[i], v);
258 if (CONFIG_SWSCALE_ALPHA && aDest) {
259 for (i = 0; i < dstW; i++) {
260 int val = 1 << (26-output_bits);
263 for (j = 0; j < lumFilterSize; j++)
264 val += alpSrc[j][i] * lumFilter[j];
266 output_pixel(&aDest[i], val);
271 #define yuv2NBPS(bits, BE_LE, is_be) \
272 static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
273 const int16_t **lumSrc, int lumFilterSize, \
274 const int16_t *chrFilter, const int16_t **chrUSrc, \
275 const int16_t **chrVSrc, \
276 int chrFilterSize, const int16_t **alpSrc, \
277 uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
278 uint16_t *aDest, int dstW, int chrDstW) \
280 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
281 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
283 dest, uDest, vDest, aDest, \
284 dstW, chrDstW, is_be, bits); \
293 static inline void yuv2yuvX16_c(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
294 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
295 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
296 enum PixelFormat dstFormat)
298 #define conv16(bits) \
299 if (isBE(dstFormat)) { \
300 yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
301 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
303 dest, uDest, vDest, aDest, \
306 yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
307 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
309 dest, uDest, vDest, aDest, \
312 if (is16BPS(dstFormat)) {
314 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
322 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
323 const int16_t **lumSrc, int lumFilterSize,
324 const int16_t *chrFilter, const int16_t **chrUSrc,
325 const int16_t **chrVSrc,
326 int chrFilterSize, const int16_t **alpSrc,
327 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
328 uint8_t *aDest, int dstW, int chrDstW)
330 //FIXME Optimize (just quickly written not optimized..)
332 for (i=0; i<dstW; i++) {
335 for (j=0; j<lumFilterSize; j++)
336 val += lumSrc[j][i] * lumFilter[j];
338 dest[i]= av_clip_uint8(val>>19);
342 for (i=0; i<chrDstW; i++) {
346 for (j=0; j<chrFilterSize; j++) {
347 u += chrUSrc[j][i] * chrFilter[j];
348 v += chrVSrc[j][i] * chrFilter[j];
351 uDest[i]= av_clip_uint8(u>>19);
352 vDest[i]= av_clip_uint8(v>>19);
355 if (CONFIG_SWSCALE_ALPHA && aDest)
356 for (i=0; i<dstW; i++) {
359 for (j=0; j<lumFilterSize; j++)
360 val += alpSrc[j][i] * lumFilter[j];
362 aDest[i]= av_clip_uint8(val>>19);
367 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
368 const int16_t **lumSrc, int lumFilterSize,
369 const int16_t *chrFilter, const int16_t **chrUSrc,
370 const int16_t **chrVSrc,
371 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
372 int dstW, int chrDstW, enum PixelFormat dstFormat)
374 //FIXME Optimize (just quickly written not optimized..)
376 for (i=0; i<dstW; i++) {
379 for (j=0; j<lumFilterSize; j++)
380 val += lumSrc[j][i] * lumFilter[j];
382 dest[i]= av_clip_uint8(val>>19);
388 if (dstFormat == PIX_FMT_NV12)
389 for (i=0; i<chrDstW; i++) {
393 for (j=0; j<chrFilterSize; j++) {
394 u += chrUSrc[j][i] * chrFilter[j];
395 v += chrVSrc[j][i] * chrFilter[j];
398 uDest[2*i]= av_clip_uint8(u>>19);
399 uDest[2*i+1]= av_clip_uint8(v>>19);
402 for (i=0; i<chrDstW; i++) {
406 for (j=0; j<chrFilterSize; j++) {
407 u += chrUSrc[j][i] * chrFilter[j];
408 v += chrVSrc[j][i] * chrFilter[j];
411 uDest[2*i]= av_clip_uint8(v>>19);
412 uDest[2*i+1]= av_clip_uint8(u>>19);
416 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
417 for (i=0; i<(dstW>>1); i++) {\
423 int av_unused A1, A2;\
424 type av_unused *r, *b, *g;\
427 for (j=0; j<lumFilterSize; j++) {\
428 Y1 += lumSrc[j][i2] * lumFilter[j];\
429 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
431 for (j=0; j<chrFilterSize; j++) {\
432 U += chrUSrc[j][i] * chrFilter[j];\
433 V += chrVSrc[j][i] * chrFilter[j];\
442 for (j=0; j<lumFilterSize; j++) {\
443 A1 += alpSrc[j][i2 ] * lumFilter[j];\
444 A2 += alpSrc[j][i2+1] * lumFilter[j];\
450 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
451 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
452 if ((Y1|Y2|U|V)&256) {\
453 if (Y1>255) Y1=255; \
454 else if (Y1<0)Y1=0; \
455 if (Y2>255) Y2=255; \
456 else if (Y2<0)Y2=0; \
462 if (alpha && ((A1|A2)&256)) {\
463 A1=av_clip_uint8(A1);\
464 A2=av_clip_uint8(A2);\
467 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
468 for (i=0; i<dstW; i++) {\
476 for (j=0; j<lumFilterSize; j++) {\
477 Y += lumSrc[j][i ] * lumFilter[j];\
479 for (j=0; j<chrFilterSize; j++) {\
480 U += chrUSrc[j][i] * chrFilter[j];\
481 V += chrVSrc[j][i] * chrFilter[j];\
488 for (j=0; j<lumFilterSize; j++)\
489 A += alpSrc[j][i ] * lumFilter[j];\
492 A = av_clip_uint8(A);\
495 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
496 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
497 Y-= c->yuv2rgb_y_offset;\
498 Y*= c->yuv2rgb_y_coeff;\
500 R= Y + V*c->yuv2rgb_v2r_coeff;\
501 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
502 B= Y + U*c->yuv2rgb_u2b_coeff;\
503 if ((R|G|B)&(0xC0000000)) {\
504 if (R>=(256<<22)) R=(256<<22)-1; \
506 if (G>=(256<<22)) G=(256<<22)-1; \
508 if (B>=(256<<22)) B=(256<<22)-1; \
512 #define YSCALE_YUV_2_GRAY16_C \
513 for (i=0; i<(dstW>>1); i++) {\
522 for (j=0; j<lumFilterSize; j++) {\
523 Y1 += lumSrc[j][i2] * lumFilter[j];\
524 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
528 if ((Y1|Y2|U|V)&65536) {\
529 if (Y1>65535) Y1=65535; \
530 else if (Y1<0)Y1=0; \
531 if (Y2>65535) Y2=65535; \
532 else if (Y2<0)Y2=0; \
535 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
536 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
537 r = (type *)c->table_rV[V]; \
538 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
539 b = (type *)c->table_bU[U];
541 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
542 for (i=0; i<(dstW>>1); i++) { \
544 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
545 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
546 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
547 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
548 type av_unused *r, *b, *g; \
549 int av_unused A1, A2; \
551 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
552 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
555 #define YSCALE_YUV_2_GRAY16_2_C \
556 for (i=0; i<(dstW>>1); i++) { \
558 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
559 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
561 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
562 YSCALE_YUV_2_PACKED2_C(type,alpha)\
563 r = (type *)c->table_rV[V];\
564 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
565 b = (type *)c->table_bU[U];
567 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
568 for (i=0; i<(dstW>>1); i++) {\
570 int Y1= buf0[i2 ]>>7;\
571 int Y2= buf0[i2+1]>>7;\
572 int U= (ubuf1[i])>>7;\
573 int V= (vbuf1[i])>>7;\
574 type av_unused *r, *b, *g;\
575 int av_unused A1, A2;\
581 #define YSCALE_YUV_2_GRAY16_1_C \
582 for (i=0; i<(dstW>>1); i++) {\
584 int Y1= buf0[i2 ]<<1;\
585 int Y2= buf0[i2+1]<<1;
587 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
588 YSCALE_YUV_2_PACKED1_C(type,alpha)\
589 r = (type *)c->table_rV[V];\
590 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
591 b = (type *)c->table_bU[U];
593 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
594 for (i=0; i<(dstW>>1); i++) {\
596 int Y1= buf0[i2 ]>>7;\
597 int Y2= buf0[i2+1]>>7;\
598 int U= (ubuf0[i] + ubuf1[i])>>8;\
599 int V= (vbuf0[i] + vbuf1[i])>>8;\
600 type av_unused *r, *b, *g;\
601 int av_unused A1, A2;\
607 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
608 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
609 r = (type *)c->table_rV[V];\
610 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
611 b = (type *)c->table_bU[U];
613 #define YSCALE_YUV_2_MONO2_C \
614 const uint8_t * const d128=dither_8x8_220[y&7];\
615 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
616 for (i=0; i<dstW-7; i+=8) {\
618 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
619 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
620 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
621 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
622 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
623 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
624 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
625 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
626 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
630 #define YSCALE_YUV_2_MONOX_C \
631 const uint8_t * const d128=dither_8x8_220[y&7];\
632 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
634 for (i=0; i<dstW-1; i+=2) {\
639 for (j=0; j<lumFilterSize; j++) {\
640 Y1 += lumSrc[j][i] * lumFilter[j];\
641 Y2 += lumSrc[j][i+1] * lumFilter[j];\
651 acc+= acc + g[Y1+d128[(i+0)&7]];\
652 acc+= acc + g[Y2+d128[(i+1)&7]];\
654 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
659 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
660 switch(c->dstFormat) {\
661 case PIX_FMT_RGB48BE:\
662 case PIX_FMT_RGB48LE:\
664 ((uint8_t*)dest)[ 0]= r[Y1];\
665 ((uint8_t*)dest)[ 1]= r[Y1];\
666 ((uint8_t*)dest)[ 2]= g[Y1];\
667 ((uint8_t*)dest)[ 3]= g[Y1];\
668 ((uint8_t*)dest)[ 4]= b[Y1];\
669 ((uint8_t*)dest)[ 5]= b[Y1];\
670 ((uint8_t*)dest)[ 6]= r[Y2];\
671 ((uint8_t*)dest)[ 7]= r[Y2];\
672 ((uint8_t*)dest)[ 8]= g[Y2];\
673 ((uint8_t*)dest)[ 9]= g[Y2];\
674 ((uint8_t*)dest)[10]= b[Y2];\
675 ((uint8_t*)dest)[11]= b[Y2];\
679 case PIX_FMT_BGR48BE:\
680 case PIX_FMT_BGR48LE:\
682 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
683 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
684 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
685 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
686 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
687 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
694 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
695 func(uint32_t,needAlpha)\
696 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
697 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
700 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
702 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
703 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
707 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
708 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
716 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
717 func(uint32_t,needAlpha)\
718 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
719 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
722 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
724 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
725 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
729 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
730 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
737 ((uint8_t*)dest)[0]= r[Y1];\
738 ((uint8_t*)dest)[1]= g[Y1];\
739 ((uint8_t*)dest)[2]= b[Y1];\
740 ((uint8_t*)dest)[3]= r[Y2];\
741 ((uint8_t*)dest)[4]= g[Y2];\
742 ((uint8_t*)dest)[5]= b[Y2];\
748 ((uint8_t*)dest)[0]= b[Y1];\
749 ((uint8_t*)dest)[1]= g[Y1];\
750 ((uint8_t*)dest)[2]= r[Y1];\
751 ((uint8_t*)dest)[3]= b[Y2];\
752 ((uint8_t*)dest)[4]= g[Y2];\
753 ((uint8_t*)dest)[5]= r[Y2];\
757 case PIX_FMT_RGB565BE:\
758 case PIX_FMT_RGB565LE:\
759 case PIX_FMT_BGR565BE:\
760 case PIX_FMT_BGR565LE:\
762 const int dr1= dither_2x2_8[y&1 ][0];\
763 const int dg1= dither_2x2_4[y&1 ][0];\
764 const int db1= dither_2x2_8[(y&1)^1][0];\
765 const int dr2= dither_2x2_8[y&1 ][1];\
766 const int dg2= dither_2x2_4[y&1 ][1];\
767 const int db2= dither_2x2_8[(y&1)^1][1];\
769 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
770 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
774 case PIX_FMT_RGB555BE:\
775 case PIX_FMT_RGB555LE:\
776 case PIX_FMT_BGR555BE:\
777 case PIX_FMT_BGR555LE:\
779 const int dr1= dither_2x2_8[y&1 ][0];\
780 const int dg1= dither_2x2_8[y&1 ][1];\
781 const int db1= dither_2x2_8[(y&1)^1][0];\
782 const int dr2= dither_2x2_8[y&1 ][1];\
783 const int dg2= dither_2x2_8[y&1 ][0];\
784 const int db2= dither_2x2_8[(y&1)^1][1];\
786 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
787 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
791 case PIX_FMT_RGB444BE:\
792 case PIX_FMT_RGB444LE:\
793 case PIX_FMT_BGR444BE:\
794 case PIX_FMT_BGR444LE:\
796 const int dr1= dither_4x4_16[y&3 ][0];\
797 const int dg1= dither_4x4_16[y&3 ][1];\
798 const int db1= dither_4x4_16[(y&3)^3][0];\
799 const int dr2= dither_4x4_16[y&3 ][1];\
800 const int dg2= dither_4x4_16[y&3 ][0];\
801 const int db2= dither_4x4_16[(y&3)^3][1];\
803 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
804 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
811 const uint8_t * const d64= dither_8x8_73[y&7];\
812 const uint8_t * const d32= dither_8x8_32[y&7];\
814 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
815 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
822 const uint8_t * const d64= dither_8x8_73 [y&7];\
823 const uint8_t * const d128=dither_8x8_220[y&7];\
825 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
826 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
830 case PIX_FMT_RGB4_BYTE:\
831 case PIX_FMT_BGR4_BYTE:\
833 const uint8_t * const d64= dither_8x8_73 [y&7];\
834 const uint8_t * const d128=dither_8x8_220[y&7];\
836 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
837 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
841 case PIX_FMT_MONOBLACK:\
842 case PIX_FMT_MONOWHITE:\
847 case PIX_FMT_YUYV422:\
849 ((uint8_t*)dest)[2*i2+0]= Y1;\
850 ((uint8_t*)dest)[2*i2+1]= U;\
851 ((uint8_t*)dest)[2*i2+2]= Y2;\
852 ((uint8_t*)dest)[2*i2+3]= V;\
855 case PIX_FMT_UYVY422:\
857 ((uint8_t*)dest)[2*i2+0]= U;\
858 ((uint8_t*)dest)[2*i2+1]= Y1;\
859 ((uint8_t*)dest)[2*i2+2]= V;\
860 ((uint8_t*)dest)[2*i2+3]= Y2;\
863 case PIX_FMT_GRAY16BE:\
865 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
866 ((uint8_t*)dest)[2*i2+1]= Y1;\
867 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
868 ((uint8_t*)dest)[2*i2+3]= Y2;\
871 case PIX_FMT_GRAY16LE:\
873 ((uint8_t*)dest)[2*i2+0]= Y1;\
874 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
875 ((uint8_t*)dest)[2*i2+2]= Y2;\
876 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
881 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
882 const int16_t **lumSrc, int lumFilterSize,
883 const int16_t *chrFilter, const int16_t **chrUSrc,
884 const int16_t **chrVSrc, int chrFilterSize,
885 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
888 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
891 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
892 const int16_t **lumSrc, int lumFilterSize,
893 const int16_t *chrFilter, const int16_t **chrUSrc,
894 const int16_t **chrVSrc, int chrFilterSize,
895 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
898 int step= c->dstFormatBpp/8;
901 switch(c->dstFormat) {
909 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
910 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
911 dest[aidx]= needAlpha ? A : 255;
918 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
919 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
927 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
944 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
945 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
946 dest[aidx]= needAlpha ? A : 255;
953 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
954 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
962 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
977 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
980 uint8_t *ptr = plane + stride*y;
981 for (i=0; i<height; i++) {
982 memset(ptr, val, width);
987 static void rgb48ToY_c(uint8_t *dst, const uint8_t *src, int width,
991 for (i = 0; i < width; i++) {
996 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1000 static void rgb48ToUV_c(uint8_t *dstU, uint8_t *dstV,
1001 const uint8_t *src1, const uint8_t *src2,
1002 int width, uint32_t *unused)
1006 for (i = 0; i < width; i++) {
1007 int r = src1[6*i + 0];
1008 int g = src1[6*i + 2];
1009 int b = src1[6*i + 4];
1011 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1012 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1016 static void rgb48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
1017 const uint8_t *src1, const uint8_t *src2,
1018 int width, uint32_t *unused)
1022 for (i = 0; i < width; i++) {
1023 int r= src1[12*i + 0] + src1[12*i + 6];
1024 int g= src1[12*i + 2] + src1[12*i + 8];
1025 int b= src1[12*i + 4] + src1[12*i + 10];
1027 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1028 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1032 static void bgr48ToY_c(uint8_t *dst, const uint8_t *src, int width,
1036 for (i = 0; i < width; i++) {
1041 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1045 static void bgr48ToUV_c(uint8_t *dstU, uint8_t *dstV,
1046 const uint8_t *src1, const uint8_t *src2,
1047 int width, uint32_t *unused)
1050 for (i = 0; i < width; i++) {
1051 int b = src1[6*i + 0];
1052 int g = src1[6*i + 2];
1053 int r = src1[6*i + 4];
1055 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1056 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1060 static void bgr48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
1061 const uint8_t *src1, const uint8_t *src2,
1062 int width, uint32_t *unused)
1065 for (i = 0; i < width; i++) {
1066 int b= src1[12*i + 0] + src1[12*i + 6];
1067 int g= src1[12*i + 2] + src1[12*i + 8];
1068 int r= src1[12*i + 4] + src1[12*i + 10];
1070 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1071 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1075 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1076 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1077 int width, uint32_t *unused)\
1080 for (i=0; i<width; i++) {\
1081 int b= (((const type*)src)[i]>>shb)&maskb;\
1082 int g= (((const type*)src)[i]>>shg)&maskg;\
1083 int r= (((const type*)src)[i]>>shr)&maskr;\
1085 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1089 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1090 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1091 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1092 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1093 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1094 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1095 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1096 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1098 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1101 for (i=0; i<width; i++) {
1106 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1107 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1108 const uint8_t *src, const uint8_t *dummy, \
1109 int width, uint32_t *unused)\
1112 for (i=0; i<width; i++) {\
1113 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1114 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1115 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1117 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1118 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1121 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1122 const uint8_t *src, const uint8_t *dummy, \
1123 int width, uint32_t *unused)\
1126 for (i=0; i<width; i++) {\
1127 int pix0= ((const type*)src)[2*i+0]>>shp;\
1128 int pix1= ((const type*)src)[2*i+1]>>shp;\
1129 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1130 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1131 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1132 g&= maskg|(2*maskg);\
1136 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1137 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1141 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1142 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1143 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1144 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1145 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1146 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1147 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1148 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1150 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1153 for (i=0; i<width; i++) {
1156 dst[i]= pal[d] & 0xFF;
1160 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1161 const uint8_t *src1, const uint8_t *src2,
1162 int width, uint32_t *pal)
1165 assert(src1 == src2);
1166 for (i=0; i<width; i++) {
1167 int p= pal[src1[i]];
1174 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1175 int width, uint32_t *unused)
1178 for (i=0; i<width/8; i++) {
1181 dst[8*i+j]= ((d>>(7-j))&1)*255;
1185 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1186 int width, uint32_t *unused)
1189 for (i=0; i<width/8; i++) {
1192 dst[8*i+j]= ((d>>(7-j))&1)*255;
1196 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1197 const int16_t *chrUSrc, const int16_t *chrVSrc,
1198 const int16_t *alpSrc,
1199 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1200 uint8_t *aDest, int dstW, int chrDstW)
1203 for (i=0; i<dstW; i++) {
1204 int val= (lumSrc[i]+64)>>7;
1205 dest[i]= av_clip_uint8(val);
1209 for (i=0; i<chrDstW; i++) {
1210 int u=(chrUSrc[i]+64)>>7;
1211 int v=(chrVSrc[i]+64)>>7;
1212 uDest[i]= av_clip_uint8(u);
1213 vDest[i]= av_clip_uint8(v);
1216 if (CONFIG_SWSCALE_ALPHA && aDest)
1217 for (i=0; i<dstW; i++) {
1218 int val= (alpSrc[i]+64)>>7;
1219 aDest[i]= av_clip_uint8(val);
1224 * vertical bilinear scale YV12 to RGB
1226 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1227 const uint16_t *buf1, const uint16_t *ubuf0,
1228 const uint16_t *ubuf1, const uint16_t *vbuf0,
1229 const uint16_t *vbuf1, const uint16_t *abuf0,
1230 const uint16_t *abuf1, uint8_t *dest, int dstW,
1231 int yalpha, int uvalpha, int y)
1233 int yalpha1=4095- yalpha;
1234 int uvalpha1=4095-uvalpha;
1237 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1241 * YV12 to RGB without scaling or interpolating
1243 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1244 const uint16_t *ubuf0, const uint16_t *ubuf1,
1245 const uint16_t *vbuf0, const uint16_t *vbuf1,
1246 const uint16_t *abuf0, uint8_t *dest, int dstW,
1247 int uvalpha, enum PixelFormat dstFormat,
1250 const int yalpha1=0;
1253 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1254 const int yalpha= 4096; //FIXME ...
1256 if (uvalpha < 2048) {
1257 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1259 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1263 //FIXME yuy2* can read up to 7 samples too much
1265 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1269 for (i=0; i<width; i++)
1273 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1274 const uint8_t *src2, int width, uint32_t *unused)
1277 for (i=0; i<width; i++) {
1278 dstU[i]= src1[4*i + 1];
1279 dstV[i]= src1[4*i + 3];
1281 assert(src1 == src2);
1284 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1285 const uint8_t *src2, int width, uint32_t *unused)
1288 for (i=0; i<width; i++) {
1289 dstU[i]= src1[2*i + 1];
1290 dstV[i]= src2[2*i + 1];
1294 /* This is almost identical to the previous, end exists only because
1295 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1296 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1300 for (i=0; i<width; i++)
1304 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1305 const uint8_t *src2, int width, uint32_t *unused)
1308 for (i=0; i<width; i++) {
1309 dstU[i]= src1[4*i + 0];
1310 dstV[i]= src1[4*i + 2];
1312 assert(src1 == src2);
1315 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1316 const uint8_t *src2, int width, uint32_t *unused)
1319 for (i=0; i<width; i++) {
1325 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1326 const uint8_t *src, int width)
1329 for (i = 0; i < width; i++) {
1330 dst1[i] = src[2*i+0];
1331 dst2[i] = src[2*i+1];
1335 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1336 const uint8_t *src1, const uint8_t *src2,
1337 int width, uint32_t *unused)
1339 nvXXtoUV_c(dstU, dstV, src1, width);
1342 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1343 const uint8_t *src1, const uint8_t *src2,
1344 int width, uint32_t *unused)
1346 nvXXtoUV_c(dstV, dstU, src1, width);
1349 // FIXME Maybe dither instead.
1350 #define YUV_NBPS(depth, endianness, rfunc) \
1351 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1352 const uint8_t *_srcU, const uint8_t *_srcV, \
1353 int width, uint32_t *unused) \
1356 const uint16_t *srcU = (const uint16_t*)_srcU; \
1357 const uint16_t *srcV = (const uint16_t*)_srcV; \
1358 for (i = 0; i < width; i++) { \
1359 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1360 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1364 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1365 int width, uint32_t *unused) \
1368 const uint16_t *srcY = (const uint16_t*)_srcY; \
1369 for (i = 0; i < width; i++) \
1370 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1373 YUV_NBPS( 9, LE, AV_RL16)
1374 YUV_NBPS( 9, BE, AV_RB16)
1375 YUV_NBPS(10, LE, AV_RL16)
1376 YUV_NBPS(10, BE, AV_RB16)
1378 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1379 int width, uint32_t *unused)
1382 for (i=0; i<width; i++) {
1387 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1391 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1392 const uint8_t *src2, int width, uint32_t *unused)
1395 for (i=0; i<width; i++) {
1396 int b= src1[3*i + 0];
1397 int g= src1[3*i + 1];
1398 int r= src1[3*i + 2];
1400 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1401 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1403 assert(src1 == src2);
1406 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1407 const uint8_t *src2, int width, uint32_t *unused)
1410 for (i=0; i<width; i++) {
1411 int b= src1[6*i + 0] + src1[6*i + 3];
1412 int g= src1[6*i + 1] + src1[6*i + 4];
1413 int r= src1[6*i + 2] + src1[6*i + 5];
1415 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1416 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1418 assert(src1 == src2);
1421 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1425 for (i=0; i<width; i++) {
1430 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1434 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1435 const uint8_t *src2, int width, uint32_t *unused)
1439 for (i=0; i<width; i++) {
1440 int r= src1[3*i + 0];
1441 int g= src1[3*i + 1];
1442 int b= src1[3*i + 2];
1444 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1445 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1449 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1450 const uint8_t *src2, int width, uint32_t *unused)
1454 for (i=0; i<width; i++) {
1455 int r= src1[6*i + 0] + src1[6*i + 3];
1456 int g= src1[6*i + 1] + src1[6*i + 4];
1457 int b= src1[6*i + 2] + src1[6*i + 5];
1459 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1460 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1465 // bilinear / bicubic scaling
1466 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1468 const int16_t *filter, const int16_t *filterPos,
1472 for (i=0; i<dstW; i++) {
1474 int srcPos= filterPos[i];
1476 for (j=0; j<filterSize; j++) {
1477 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1479 //filter += hFilterSize;
1480 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1485 //FIXME all pal and rgb srcFormats could do this convertion as well
1486 //FIXME all scalers more complex than bilinear could do half of this transform
1487 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1490 for (i = 0; i < width; i++) {
1491 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1492 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1495 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1498 for (i = 0; i < width; i++) {
1499 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1500 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1503 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1506 for (i = 0; i < width; i++)
1507 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1509 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1512 for (i = 0; i < width; i++)
1513 dst[i] = (dst[i]*14071 + 33561947)>>14;
1516 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1517 const uint8_t *src, int srcW, int xInc)
1520 unsigned int xpos=0;
1521 for (i=0;i<dstWidth;i++) {
1522 register unsigned int xx=xpos>>16;
1523 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1524 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1529 // *** horizontal scale Y line to temp buffer
1530 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1531 const uint8_t *src, int srcW, int xInc,
1532 const int16_t *hLumFilter,
1533 const int16_t *hLumFilterPos, int hLumFilterSize,
1534 uint8_t *formatConvBuffer,
1535 uint32_t *pal, int isAlpha)
1537 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1538 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1540 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
1543 toYV12(formatConvBuffer, src, srcW, pal);
1544 src= formatConvBuffer;
1547 if (!c->hyscale_fast) {
1548 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1549 } else { // fast bilinear upscale / crap downscale
1550 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1554 convertRange(dst, dstWidth);
1557 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1558 int dstWidth, const uint8_t *src1,
1559 const uint8_t *src2, int srcW, int xInc)
1562 unsigned int xpos=0;
1563 for (i=0;i<dstWidth;i++) {
1564 register unsigned int xx=xpos>>16;
1565 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1566 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1567 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1572 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1573 const uint8_t *src1, const uint8_t *src2,
1574 int srcW, int xInc, const int16_t *hChrFilter,
1575 const int16_t *hChrFilterPos, int hChrFilterSize,
1576 uint8_t *formatConvBuffer, uint32_t *pal)
1579 src1 += c->chrSrcOffset;
1580 src2 += c->chrSrcOffset;
1583 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1584 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1585 src1= formatConvBuffer;
1589 if (!c->hcscale_fast) {
1590 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1591 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1592 } else { // fast bilinear upscale / crap downscale
1593 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1596 if (c->chrConvertRange)
1597 c->chrConvertRange(dst1, dst2, dstWidth);
1600 #define DEBUG_SWSCALE_BUFFERS 0
1601 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1603 static int swScale(SwsContext *c, const uint8_t* src[],
1604 int srcStride[], int srcSliceY,
1605 int srcSliceH, uint8_t* dst[], int dstStride[])
1607 /* load a few things into local vars to make the code more readable? and faster */
1608 const int srcW= c->srcW;
1609 const int dstW= c->dstW;
1610 const int dstH= c->dstH;
1611 const int chrDstW= c->chrDstW;
1612 const int chrSrcW= c->chrSrcW;
1613 const int lumXInc= c->lumXInc;
1614 const int chrXInc= c->chrXInc;
1615 const enum PixelFormat dstFormat= c->dstFormat;
1616 const int flags= c->flags;
1617 int16_t *vLumFilterPos= c->vLumFilterPos;
1618 int16_t *vChrFilterPos= c->vChrFilterPos;
1619 int16_t *hLumFilterPos= c->hLumFilterPos;
1620 int16_t *hChrFilterPos= c->hChrFilterPos;
1621 int16_t *vLumFilter= c->vLumFilter;
1622 int16_t *vChrFilter= c->vChrFilter;
1623 int16_t *hLumFilter= c->hLumFilter;
1624 int16_t *hChrFilter= c->hChrFilter;
1625 int32_t *lumMmxFilter= c->lumMmxFilter;
1626 int32_t *chrMmxFilter= c->chrMmxFilter;
1627 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1628 const int vLumFilterSize= c->vLumFilterSize;
1629 const int vChrFilterSize= c->vChrFilterSize;
1630 const int hLumFilterSize= c->hLumFilterSize;
1631 const int hChrFilterSize= c->hChrFilterSize;
1632 int16_t **lumPixBuf= c->lumPixBuf;
1633 int16_t **chrUPixBuf= c->chrUPixBuf;
1634 int16_t **chrVPixBuf= c->chrVPixBuf;
1635 int16_t **alpPixBuf= c->alpPixBuf;
1636 const int vLumBufSize= c->vLumBufSize;
1637 const int vChrBufSize= c->vChrBufSize;
1638 uint8_t *formatConvBuffer= c->formatConvBuffer;
1639 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1640 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1642 uint32_t *pal=c->pal_yuv;
1644 /* vars which will change and which we need to store back in the context */
1646 int lumBufIndex= c->lumBufIndex;
1647 int chrBufIndex= c->chrBufIndex;
1648 int lastInLumBuf= c->lastInLumBuf;
1649 int lastInChrBuf= c->lastInChrBuf;
1651 if (isPacked(c->srcFormat)) {
1659 srcStride[3]= srcStride[0];
1661 srcStride[1]<<= c->vChrDrop;
1662 srcStride[2]<<= c->vChrDrop;
1664 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1665 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1666 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1667 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1668 srcSliceY, srcSliceH, dstY, dstH);
1669 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1670 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1672 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1673 static int warnedAlready=0; //FIXME move this into the context perhaps
1674 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1675 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1676 " ->cannot do aligned memory accesses anymore\n");
1681 /* Note the user might start scaling the picture in the middle so this
1682 will not get executed. This is not really intended but works
1683 currently, so people might do it. */
1684 if (srcSliceY ==0) {
1694 for (;dstY < dstH; dstY++) {
1695 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1696 const int chrDstY= dstY>>c->chrDstVSubSample;
1697 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1698 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1699 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1701 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1702 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1703 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1704 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1705 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1706 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1709 //handle holes (FAST_BILINEAR & weird filters)
1710 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1711 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1712 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1713 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1715 DEBUG_BUFFERS("dstY: %d\n", dstY);
1716 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1717 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1718 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1719 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1721 // Do we have enough lines in this slice to output the dstY line
1722 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1724 if (!enough_lines) {
1725 lastLumSrcY = srcSliceY + srcSliceH - 1;
1726 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1727 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1728 lastLumSrcY, lastChrSrcY);
1731 //Do horizontal scaling
1732 while(lastInLumBuf < lastLumSrcY) {
1733 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1734 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1736 assert(lumBufIndex < 2*vLumBufSize);
1737 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1738 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1739 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1740 hLumFilter, hLumFilterPos, hLumFilterSize,
1743 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1744 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1745 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1749 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1750 lumBufIndex, lastInLumBuf);
1752 while(lastInChrBuf < lastChrSrcY) {
1753 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1754 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1756 assert(chrBufIndex < 2*vChrBufSize);
1757 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1758 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1759 //FIXME replace parameters through context struct (some at least)
1761 if (c->needs_hcscale)
1762 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1763 chrDstW, src1, src2, chrSrcW, chrXInc,
1764 hChrFilter, hChrFilterPos, hChrFilterSize,
1765 formatConvBuffer, pal);
1767 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1768 chrBufIndex, lastInChrBuf);
1770 //wrap buf index around to stay inside the ring buffer
1771 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1772 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1774 break; //we can't output a dstY line so let's try with the next slice
1777 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1779 if (dstY < dstH-2) {
1780 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1781 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1782 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1783 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1784 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1785 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1786 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1788 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1789 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1790 dest, uDest, dstW, chrDstW, dstFormat);
1791 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1792 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1793 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1794 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1795 yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1796 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1797 chrVSrcPtr, vChrFilterSize,
1798 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
1799 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
1801 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1802 const int16_t *lumBuf = lumSrcPtr[0];
1803 const int16_t *chrUBuf= chrUSrcPtr[0];
1804 const int16_t *chrVBuf= chrVSrcPtr[0];
1805 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1806 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1807 uDest, vDest, aDest, dstW, chrDstW);
1808 } else { //General YV12
1810 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1811 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1812 chrVSrcPtr, vChrFilterSize,
1813 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1816 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1817 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1818 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1819 int chrAlpha= vChrFilter[2*dstY+1];
1820 if(flags & SWS_FULL_CHR_H_INT) {
1821 yuv2rgbX_c_full(c, //FIXME write a packed1_full function
1822 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1823 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
1824 chrVSrcPtr, vChrFilterSize,
1825 alpSrcPtr, dest, dstW, dstY);
1827 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1828 *chrVSrcPtr, *(chrVSrcPtr+1),
1829 alpPixBuf ? *alpSrcPtr : NULL,
1830 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1832 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1833 int lumAlpha= vLumFilter[2*dstY+1];
1834 int chrAlpha= vChrFilter[2*dstY+1];
1836 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1838 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1839 if(flags & SWS_FULL_CHR_H_INT) {
1840 yuv2rgbX_c_full(c, //FIXME write a packed2_full function
1841 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1842 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1843 alpSrcPtr, dest, dstW, dstY);
1845 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1846 *chrVSrcPtr, *(chrVSrcPtr+1),
1847 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1848 dest, dstW, lumAlpha, chrAlpha, dstY);
1850 } else { //general RGB
1851 if(flags & SWS_FULL_CHR_H_INT) {
1853 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1854 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1855 alpSrcPtr, dest, dstW, dstY);
1858 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1859 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1860 alpSrcPtr, dest, dstW, dstY);
1864 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
1865 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1866 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1867 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1868 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1869 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1870 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1871 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1872 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
1873 lumSrcPtr, vLumFilterSize,
1874 vChrFilter+chrDstY*vChrFilterSize,
1875 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1876 dest, uDest, dstW, chrDstW, dstFormat);
1877 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
1878 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1879 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1880 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1881 yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1882 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1883 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
1886 yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
1887 lumSrcPtr, vLumFilterSize,
1888 vChrFilter+chrDstY*vChrFilterSize,
1889 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1890 alpSrcPtr, dest, uDest, vDest, aDest,
1894 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1895 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1896 if(flags & SWS_FULL_CHR_H_INT) {
1898 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1899 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1900 alpSrcPtr, dest, dstW, dstY);
1903 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1904 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1905 alpSrcPtr, dest, dstW, dstY);
1911 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1912 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1915 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1916 __asm__ volatile("sfence":::"memory");
1920 /* store changed local vars back in the context */
1922 c->lumBufIndex= lumBufIndex;
1923 c->chrBufIndex= chrBufIndex;
1924 c->lastInLumBuf= lastInLumBuf;
1925 c->lastInChrBuf= lastInChrBuf;
1927 return dstY - lastDstY;
1930 static void sws_init_swScale_c(SwsContext *c)
1932 enum PixelFormat srcFormat = c->srcFormat;
1934 c->yuv2nv12X = yuv2nv12X_c;
1935 c->yuv2yuv1 = yuv2yuv1_c;
1936 c->yuv2yuvX = yuv2yuvX_c;
1937 c->yuv2packed1 = yuv2packed1_c;
1938 c->yuv2packed2 = yuv2packed2_c;
1939 c->yuv2packedX = yuv2packedX_c;
1941 c->hScale = hScale_c;
1943 if (c->flags & SWS_FAST_BILINEAR)
1945 c->hyscale_fast = hyscale_fast_c;
1946 c->hcscale_fast = hcscale_fast_c;
1949 c->chrToYV12 = NULL;
1951 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1952 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1953 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1954 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1958 case PIX_FMT_BGR4_BYTE:
1959 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1960 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1961 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1962 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1963 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1964 case PIX_FMT_YUV420P16BE:
1965 case PIX_FMT_YUV422P16BE:
1966 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1967 case PIX_FMT_YUV420P16LE:
1968 case PIX_FMT_YUV422P16LE:
1969 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1971 if (c->chrSrcHSubSample) {
1973 case PIX_FMT_RGB48BE:
1974 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half_c; break;
1975 case PIX_FMT_BGR48BE:
1976 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half_c; break;
1977 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1978 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1979 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1980 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1981 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1982 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1983 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1984 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1985 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1986 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1990 case PIX_FMT_RGB48BE:
1991 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_c; break;
1992 case PIX_FMT_BGR48BE:
1993 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_c; break;
1994 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1995 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1996 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1997 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1998 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1999 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2000 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
2001 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2002 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
2003 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
2007 c->lumToYV12 = NULL;
2008 c->alpToYV12 = NULL;
2009 switch (srcFormat) {
2010 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2011 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2012 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2013 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2014 case PIX_FMT_YUYV422 :
2015 case PIX_FMT_YUV420P16BE:
2016 case PIX_FMT_YUV422P16BE:
2017 case PIX_FMT_YUV444P16BE:
2018 case PIX_FMT_Y400A :
2019 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2020 case PIX_FMT_UYVY422 :
2021 case PIX_FMT_YUV420P16LE:
2022 case PIX_FMT_YUV422P16LE:
2023 case PIX_FMT_YUV444P16LE:
2024 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2025 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2026 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
2027 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
2028 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2029 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
2030 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
2034 case PIX_FMT_BGR4_BYTE:
2035 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2036 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2037 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2038 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2039 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2040 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2041 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2042 case PIX_FMT_RGB48BE:
2043 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY_c; break;
2044 case PIX_FMT_BGR48BE:
2045 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY_c; break;
2048 switch (srcFormat) {
2049 case PIX_FMT_RGB32 :
2050 case PIX_FMT_RGB32_1:
2051 case PIX_FMT_BGR32 :
2052 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA_c; break;
2053 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
2057 switch (srcFormat) {
2058 case PIX_FMT_Y400A :
2059 c->alpSrcOffset = 1;
2061 case PIX_FMT_RGB32 :
2062 case PIX_FMT_BGR32 :
2063 c->alpSrcOffset = 3;
2065 case PIX_FMT_RGB48LE:
2066 case PIX_FMT_BGR48LE:
2067 c->lumSrcOffset = 1;
2068 c->chrSrcOffset = 1;
2069 c->alpSrcOffset = 1;
2073 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2075 c->lumConvertRange = lumRangeFromJpeg_c;
2076 c->chrConvertRange = chrRangeFromJpeg_c;
2078 c->lumConvertRange = lumRangeToJpeg_c;
2079 c->chrConvertRange = chrRangeToJpeg_c;
2083 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2084 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2085 c->needs_hcscale = 1;
2088 SwsFunc ff_getSwsFunc(SwsContext *c)
2090 sws_init_swScale_c(c);
2093 ff_sws_init_swScale_mmx(c);
2095 ff_sws_init_swScale_altivec(c);
2100 static void copyPlane(const uint8_t *src, int srcStride,
2101 int srcSliceY, int srcSliceH, int width,
2102 uint8_t *dst, int dstStride)
2104 dst += dstStride * srcSliceY;
2105 if (dstStride == srcStride && srcStride > 0) {
2106 memcpy(dst, src, srcSliceH * dstStride);
2109 for (i=0; i<srcSliceH; i++) {
2110 memcpy(dst, src, width);
2117 static int planarToNv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2118 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2120 uint8_t *dst = dstParam[1] + dstStride[1]*srcSliceY/2;
2122 copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
2123 dstParam[0], dstStride[0]);
2125 if (c->dstFormat == PIX_FMT_NV12)
2126 interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
2128 interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
2133 static int planarToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2134 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2136 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2138 yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2143 static int planarToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2144 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2146 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2148 yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2153 static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2154 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2156 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2158 yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2163 static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2164 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2166 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2168 yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2173 static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2174 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2176 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2177 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2178 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2180 yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2183 fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2188 static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2189 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2191 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2192 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2193 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2195 yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2200 static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2201 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2203 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2204 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2205 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2207 uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2210 fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2215 static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2216 int srcSliceH, uint8_t* dstParam[], int dstStride[])
2218 uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2219 uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2220 uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2222 uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2227 static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2230 for (i=0; i<num_pixels; i++)
2231 ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
2234 static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2238 for (i=0; i<num_pixels; i++)
2239 ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
2242 static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2246 for (i=0; i<num_pixels; i++) {
2248 dst[0]= palette[src[i<<1]*4+0];
2249 dst[1]= palette[src[i<<1]*4+1];
2250 dst[2]= palette[src[i<<1]*4+2];
2255 static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2256 int srcSliceH, uint8_t* dst[], int dstStride[])
2258 const enum PixelFormat srcFormat= c->srcFormat;
2259 const enum PixelFormat dstFormat= c->dstFormat;
2260 void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
2261 const uint8_t *palette)=NULL;
2263 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2264 const uint8_t *srcPtr= src[0];
2266 if (srcFormat == PIX_FMT_Y400A) {
2267 switch (dstFormat) {
2268 case PIX_FMT_RGB32 : conv = gray8aToPacked32; break;
2269 case PIX_FMT_BGR32 : conv = gray8aToPacked32; break;
2270 case PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break;
2271 case PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break;
2272 case PIX_FMT_RGB24 : conv = gray8aToPacked24; break;
2273 case PIX_FMT_BGR24 : conv = gray8aToPacked24; break;
2275 } else if (usePal(srcFormat)) {
2276 switch (dstFormat) {
2277 case PIX_FMT_RGB32 : conv = sws_convertPalette8ToPacked32; break;
2278 case PIX_FMT_BGR32 : conv = sws_convertPalette8ToPacked32; break;
2279 case PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break;
2280 case PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break;
2281 case PIX_FMT_RGB24 : conv = sws_convertPalette8ToPacked24; break;
2282 case PIX_FMT_BGR24 : conv = sws_convertPalette8ToPacked24; break;
2287 av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2288 sws_format_name(srcFormat), sws_format_name(dstFormat));
2290 for (i=0; i<srcSliceH; i++) {
2291 conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
2292 srcPtr+= srcStride[0];
2293 dstPtr+= dstStride[0];
2300 #define isRGBA32(x) ( \
2301 (x) == PIX_FMT_ARGB \
2302 || (x) == PIX_FMT_RGBA \
2303 || (x) == PIX_FMT_BGRA \
2304 || (x) == PIX_FMT_ABGR \
2307 /* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
2308 static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2309 int srcSliceH, uint8_t* dst[], int dstStride[])
2311 const enum PixelFormat srcFormat= c->srcFormat;
2312 const enum PixelFormat dstFormat= c->dstFormat;
2313 const int srcBpp= (c->srcFormatBpp + 7) >> 3;
2314 const int dstBpp= (c->dstFormatBpp + 7) >> 3;
2315 const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
2316 const int dstId= c->dstFormatBpp >> 2;
2317 void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;
2319 #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
2321 if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
2322 if ( CONV_IS(ABGR, RGBA)
2323 || CONV_IS(ARGB, BGRA)
2324 || CONV_IS(BGRA, ARGB)
2325 || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210;
2326 else if (CONV_IS(ABGR, ARGB)
2327 || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321;
2328 else if (CONV_IS(ABGR, BGRA)
2329 || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230;
2330 else if (CONV_IS(BGRA, RGBA)
2331 || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
2332 else if (CONV_IS(BGRA, ABGR)
2333 || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
2336 if ( (isBGRinInt(srcFormat) && isBGRinInt(dstFormat))
2337 || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) {
2338 switch(srcId | (dstId<<4)) {
2339 case 0x34: conv= rgb16to15; break;
2340 case 0x36: conv= rgb24to15; break;
2341 case 0x38: conv= rgb32to15; break;
2342 case 0x43: conv= rgb15to16; break;
2343 case 0x46: conv= rgb24to16; break;
2344 case 0x48: conv= rgb32to16; break;
2345 case 0x63: conv= rgb15to24; break;
2346 case 0x64: conv= rgb16to24; break;
2347 case 0x68: conv= rgb32to24; break;
2348 case 0x83: conv= rgb15to32; break;
2349 case 0x84: conv= rgb16to32; break;
2350 case 0x86: conv= rgb24to32; break;
2352 } else if ( (isBGRinInt(srcFormat) && isRGBinInt(dstFormat))
2353 || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) {
2354 switch(srcId | (dstId<<4)) {
2355 case 0x33: conv= rgb15tobgr15; break;
2356 case 0x34: conv= rgb16tobgr15; break;
2357 case 0x36: conv= rgb24tobgr15; break;
2358 case 0x38: conv= rgb32tobgr15; break;
2359 case 0x43: conv= rgb15tobgr16; break;
2360 case 0x44: conv= rgb16tobgr16; break;
2361 case 0x46: conv= rgb24tobgr16; break;
2362 case 0x48: conv= rgb32tobgr16; break;
2363 case 0x63: conv= rgb15tobgr24; break;
2364 case 0x64: conv= rgb16tobgr24; break;
2365 case 0x66: conv= rgb24tobgr24; break;
2366 case 0x68: conv= rgb32tobgr24; break;
2367 case 0x83: conv= rgb15tobgr32; break;
2368 case 0x84: conv= rgb16tobgr32; break;
2369 case 0x86: conv= rgb24tobgr32; break;
2374 av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2375 sws_format_name(srcFormat), sws_format_name(dstFormat));
2377 const uint8_t *srcPtr= src[0];
2378 uint8_t *dstPtr= dst[0];
2379 if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat))
2380 srcPtr += ALT32_CORR;
2382 if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat))
2383 dstPtr += ALT32_CORR;
2385 if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
2386 conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
2389 dstPtr += dstStride[0]*srcSliceY;
2391 for (i=0; i<srcSliceH; i++) {
2392 conv(srcPtr, dstPtr, c->srcW*srcBpp);
2393 srcPtr+= srcStride[0];
2394 dstPtr+= dstStride[0];
2401 static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2402 int srcSliceH, uint8_t* dst[], int dstStride[])
2406 dst[0]+ srcSliceY *dstStride[0],
2407 dst[1]+(srcSliceY>>1)*dstStride[1],
2408 dst[2]+(srcSliceY>>1)*dstStride[2],
2410 dstStride[0], dstStride[1], srcStride[0]);
2412 fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2416 static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2417 int srcSliceH, uint8_t* dst[], int dstStride[])
2419 copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
2420 dst[0], dstStride[0]);
2422 planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2423 srcSliceH >> 2, srcStride[1], dstStride[1]);
2424 planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2425 srcSliceH >> 2, srcStride[2], dstStride[2]);
2427 fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2431 /* unscaled copy like stuff (assumes nearly identical formats) */
2432 static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2433 int srcSliceH, uint8_t* dst[], int dstStride[])
2435 if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2436 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
2439 const uint8_t *srcPtr= src[0];
2440 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2443 /* universal length finder */
2444 while(length+c->srcW <= FFABS(dstStride[0])
2445 && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
2448 for (i=0; i<srcSliceH; i++) {
2449 memcpy(dstPtr, srcPtr, length);
2450 srcPtr+= srcStride[0];
2451 dstPtr+= dstStride[0];
2457 static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
2458 int srcSliceH, uint8_t* dst[], int dstStride[])
2461 for (plane=0; plane<4; plane++) {
2462 int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
2463 int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
2464 int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2465 const uint8_t *srcPtr= src[plane];
2466 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
2468 if (!dst[plane]) continue;
2469 // ignore palette for GRAY8
2470 if (plane == 1 && !dst[2]) continue;
2471 if (!src[plane] || (plane == 1 && !src[2])) {
2472 if(is16BPS(c->dstFormat))
2474 fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
2476 if(is9_OR_10BPS(c->srcFormat)) {
2477 const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
2478 const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
2479 const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
2481 if (is16BPS(c->dstFormat)) {
2482 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
2483 #define COPY9_OR_10TO16(rfunc, wfunc) \
2484 for (i = 0; i < height; i++) { \
2485 for (j = 0; j < length; j++) { \
2486 int srcpx = rfunc(&srcPtr2[j]); \
2487 wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
2489 dstPtr2 += dstStride[plane]/2; \
2490 srcPtr2 += srcStride[plane]/2; \
2492 if (isBE(c->dstFormat)) {
2493 if (isBE(c->srcFormat)) {
2494 COPY9_OR_10TO16(AV_RB16, AV_WB16);
2496 COPY9_OR_10TO16(AV_RL16, AV_WB16);
2499 if (isBE(c->srcFormat)) {
2500 COPY9_OR_10TO16(AV_RB16, AV_WL16);
2502 COPY9_OR_10TO16(AV_RL16, AV_WL16);
2505 } else if (is9_OR_10BPS(c->dstFormat)) {
2506 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
2507 #define COPY9_OR_10TO9_OR_10(loop) \
2508 for (i = 0; i < height; i++) { \
2509 for (j = 0; j < length; j++) { \
2512 dstPtr2 += dstStride[plane]/2; \
2513 srcPtr2 += srcStride[plane]/2; \
2515 #define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
2516 if (dst_depth > src_depth) { \
2517 COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
2518 wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
2519 } else if (dst_depth < src_depth) { \
2520 COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \
2522 COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
2524 if (isBE(c->dstFormat)) {
2525 if (isBE(c->srcFormat)) {
2526 COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
2528 COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
2531 if (isBE(c->srcFormat)) {
2532 COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
2534 COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
2538 // FIXME Maybe dither instead.
2539 #define COPY9_OR_10TO8(rfunc) \
2540 for (i = 0; i < height; i++) { \
2541 for (j = 0; j < length; j++) { \
2542 dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \
2544 dstPtr += dstStride[plane]; \
2545 srcPtr2 += srcStride[plane]/2; \
2547 if (isBE(c->srcFormat)) {
2548 COPY9_OR_10TO8(AV_RB16);
2550 COPY9_OR_10TO8(AV_RL16);
2553 } else if(is9_OR_10BPS(c->dstFormat)) {
2554 const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
2555 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
2557 if (is16BPS(c->srcFormat)) {
2558 const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
2559 #define COPY16TO9_OR_10(rfunc, wfunc) \
2560 for (i = 0; i < height; i++) { \
2561 for (j = 0; j < length; j++) { \
2562 wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \
2564 dstPtr2 += dstStride[plane]/2; \
2565 srcPtr2 += srcStride[plane]/2; \
2567 if (isBE(c->dstFormat)) {
2568 if (isBE(c->srcFormat)) {
2569 COPY16TO9_OR_10(AV_RB16, AV_WB16);
2571 COPY16TO9_OR_10(AV_RL16, AV_WB16);
2574 if (isBE(c->srcFormat)) {
2575 COPY16TO9_OR_10(AV_RB16, AV_WL16);
2577 COPY16TO9_OR_10(AV_RL16, AV_WL16);
2581 #define COPY8TO9_OR_10(wfunc) \
2582 for (i = 0; i < height; i++) { \
2583 for (j = 0; j < length; j++) { \
2584 const int srcpx = srcPtr[j]; \
2585 wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
2587 dstPtr2 += dstStride[plane]/2; \
2588 srcPtr += srcStride[plane]; \
2590 if (isBE(c->dstFormat)) {
2591 COPY8TO9_OR_10(AV_WB16);
2593 COPY8TO9_OR_10(AV_WL16);
2596 } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
2597 if (!isBE(c->srcFormat)) srcPtr++;
2598 for (i=0; i<height; i++) {
2599 for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
2600 srcPtr+= srcStride[plane];
2601 dstPtr+= dstStride[plane];
2603 } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
2604 for (i=0; i<height; i++) {
2605 for (j=0; j<length; j++) {
2606 dstPtr[ j<<1 ] = srcPtr[j];
2607 dstPtr[(j<<1)+1] = srcPtr[j];
2609 srcPtr+= srcStride[plane];
2610 dstPtr+= dstStride[plane];
2612 } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
2613 && isBE(c->srcFormat) != isBE(c->dstFormat)) {
2615 for (i=0; i<height; i++) {
2616 for (j=0; j<length; j++)
2617 ((uint16_t*)dstPtr)[j] = av_bswap16(((const uint16_t*)srcPtr)[j]);
2618 srcPtr+= srcStride[plane];
2619 dstPtr+= dstStride[plane];
2621 } else if (dstStride[plane] == srcStride[plane] &&
2622 srcStride[plane] > 0 && srcStride[plane] == length) {
2623 memcpy(dst[plane] + dstStride[plane]*y, src[plane],
2624 height*dstStride[plane]);
2626 if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
2628 for (i=0; i<height; i++) {
2629 memcpy(dstPtr, srcPtr, length);
2630 srcPtr+= srcStride[plane];
2631 dstPtr+= dstStride[plane];
2639 void ff_get_unscaled_swscale(SwsContext *c)
2641 const enum PixelFormat srcFormat = c->srcFormat;
2642 const enum PixelFormat dstFormat = c->dstFormat;
2643 const int flags = c->flags;
2644 const int dstH = c->dstH;
2647 needsDither= isAnyRGB(dstFormat)
2648 && c->dstFormatBpp < 24
2649 && (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat)));
2652 if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) {
2653 c->swScale= planarToNv12Wrapper;
2656 if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && isAnyRGB(dstFormat)
2657 && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) {
2658 c->swScale= ff_yuv2rgb_get_func_ptr(c);
2661 if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) {
2662 c->swScale= yvu9ToYv12Wrapper;
2666 if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
2667 c->swScale= bgr24ToYv12Wrapper;
2669 /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2670 if ( isAnyRGB(srcFormat)
2671 && isAnyRGB(dstFormat)
2672 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8
2673 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8
2674 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4
2675 && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4
2676 && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2677 && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2678 && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2679 && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2680 && srcFormat != PIX_FMT_RGB48LE && dstFormat != PIX_FMT_RGB48LE
2681 && srcFormat != PIX_FMT_RGB48BE && dstFormat != PIX_FMT_RGB48BE
2682 && srcFormat != PIX_FMT_BGR48LE && dstFormat != PIX_FMT_BGR48LE
2683 && srcFormat != PIX_FMT_BGR48BE && dstFormat != PIX_FMT_BGR48BE
2684 && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2685 c->swScale= rgbToRgbWrapper;
2687 if ((usePal(srcFormat) && (
2688 dstFormat == PIX_FMT_RGB32 ||
2689 dstFormat == PIX_FMT_RGB32_1 ||
2690 dstFormat == PIX_FMT_RGB24 ||
2691 dstFormat == PIX_FMT_BGR32 ||
2692 dstFormat == PIX_FMT_BGR32_1 ||
2693 dstFormat == PIX_FMT_BGR24)))
2694 c->swScale= palToRgbWrapper;
2696 if (srcFormat == PIX_FMT_YUV422P) {
2697 if (dstFormat == PIX_FMT_YUYV422)
2698 c->swScale= yuv422pToYuy2Wrapper;
2699 else if (dstFormat == PIX_FMT_UYVY422)
2700 c->swScale= yuv422pToUyvyWrapper;
2703 /* LQ converters if -sws 0 or -sws 4*/
2704 if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
2706 if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) {
2707 if (dstFormat == PIX_FMT_YUYV422)
2708 c->swScale= planarToYuy2Wrapper;
2709 else if (dstFormat == PIX_FMT_UYVY422)
2710 c->swScale= planarToUyvyWrapper;
2713 if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
2714 c->swScale= yuyvToYuv420Wrapper;
2715 if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
2716 c->swScale= uyvyToYuv420Wrapper;
2717 if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
2718 c->swScale= yuyvToYuv422Wrapper;
2719 if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
2720 c->swScale= uyvyToYuv422Wrapper;
2723 if ( srcFormat == dstFormat
2724 || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
2725 || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
2726 || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2727 || (isPlanarYUV(dstFormat) && isGray(srcFormat))
2728 || (isGray(dstFormat) && isGray(srcFormat))
2729 || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat)
2730 && c->chrDstHSubSample == c->chrSrcHSubSample
2731 && c->chrDstVSubSample == c->chrSrcVSubSample
2732 && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21
2733 && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21))
2735 if (isPacked(c->srcFormat))
2736 c->swScale= packedCopyWrapper;
2737 else /* Planar YUV or gray */
2738 c->swScale= planarCopyWrapper;
2742 ff_bfin_get_unscaled_swscale(c);
2744 ff_swscale_get_unscaled_altivec(c);
2747 static void reset_ptr(const uint8_t* src[], int format)
2749 if(!isALPHA(format))
2751 if(!isPlanarYUV(format)) {
2754 if (!usePal(format))
2759 static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt,
2760 const int linesizes[4])
2762 const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
2765 for (i = 0; i < 4; i++) {
2766 int plane = desc->comp[i].plane;
2767 if (!data[plane] || !linesizes[plane])
2775 * swscale wrapper, so we don't need to export the SwsContext.
2776 * Assumes planar YUV to be in YUV order instead of YVU.
2778 int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], int srcSliceY,
2779 int srcSliceH, uint8_t* const dst[], const int dstStride[])
2782 const uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
2783 uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
2785 // do not mess up sliceDir if we have a "trailing" 0-size slice
2789 if (!check_image_pointers(src, c->srcFormat, srcStride)) {
2790 av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
2793 if (!check_image_pointers(dst, c->dstFormat, dstStride)) {
2794 av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
2798 if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2799 av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
2802 if (c->sliceDir == 0) {
2803 if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2806 if (usePal(c->srcFormat)) {
2807 for (i=0; i<256; i++) {
2808 int p, r, g, b,y,u,v;
2809 if(c->srcFormat == PIX_FMT_PAL8) {
2810 p=((const uint32_t*)(src[1]))[i];
2814 } else if(c->srcFormat == PIX_FMT_RGB8) {
2818 } else if(c->srcFormat == PIX_FMT_BGR8) {
2822 } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) {
2826 } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) {
2829 assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
2834 y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2835 u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2836 v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2837 c->pal_yuv[i]= y + (u<<8) + (v<<16);
2839 switch(c->dstFormat) {
2844 c->pal_rgb[i]= r + (g<<8) + (b<<16);
2846 case PIX_FMT_BGR32_1:
2850 c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
2852 case PIX_FMT_RGB32_1:
2856 c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
2863 c->pal_rgb[i]= b + (g<<8) + (r<<16);
2868 // copy strides, so they can safely be modified
2869 if (c->sliceDir == 1) {
2870 // slices go from top to bottom
2871 int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
2872 int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
2874 reset_ptr(src2, c->srcFormat);
2875 reset_ptr((const uint8_t**)dst2, c->dstFormat);
2877 /* reset slice direction at end of frame */
2878 if (srcSliceY + srcSliceH == c->srcH)
2881 return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2);
2883 // slices go from bottom to top => we flip the image internally
2884 int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
2885 int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
2887 src2[0] += (srcSliceH-1)*srcStride[0];
2888 if (!usePal(c->srcFormat))
2889 src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2890 src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2891 src2[3] += (srcSliceH-1)*srcStride[3];
2892 dst2[0] += ( c->dstH -1)*dstStride[0];
2893 dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1];
2894 dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2];
2895 dst2[3] += ( c->dstH -1)*dstStride[3];
2897 reset_ptr(src2, c->srcFormat);
2898 reset_ptr((const uint8_t**)dst2, c->dstFormat);
2900 /* reset slice direction at end of frame */
2904 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2908 /* Convert the palette to the same packed 32-bit format as the palette */
2909 void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2913 for (i=0; i<num_pixels; i++)
2914 ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
2917 /* Palette format: ABCD -> dst format: ABC */
2918 void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
2922 for (i=0; i<num_pixels; i++) {
2924 dst[0]= palette[src[i]*4+0];
2925 dst[1]= palette[src[i]*4+1];
2926 dst[2]= palette[src[i]*4+2];