2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
289 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
290 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
291 enum PixelFormat dstFormat)
293 #define conv16(bits) \
294 if (isBE(dstFormat)) { \
295 yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
296 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
298 dest, uDest, vDest, aDest, \
301 yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
302 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
304 dest, uDest, vDest, aDest, \
307 if (is16BPS(dstFormat)) {
309 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
317 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
318 const int16_t **lumSrc, int lumFilterSize,
319 const int16_t *chrFilter, const int16_t **chrUSrc,
320 const int16_t **chrVSrc,
321 int chrFilterSize, const int16_t **alpSrc,
322 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
323 uint8_t *aDest, int dstW, int chrDstW)
325 //FIXME Optimize (just quickly written not optimized..)
327 for (i=0; i<dstW; i++) {
330 for (j=0; j<lumFilterSize; j++)
331 val += lumSrc[j][i] * lumFilter[j];
333 dest[i]= av_clip_uint8(val>>19);
337 for (i=0; i<chrDstW; i++) {
341 for (j=0; j<chrFilterSize; j++) {
342 u += chrUSrc[j][i] * chrFilter[j];
343 v += chrVSrc[j][i] * chrFilter[j];
346 uDest[i]= av_clip_uint8(u>>19);
347 vDest[i]= av_clip_uint8(v>>19);
350 if (CONFIG_SWSCALE_ALPHA && aDest)
351 for (i=0; i<dstW; i++) {
354 for (j=0; j<lumFilterSize; j++)
355 val += alpSrc[j][i] * lumFilter[j];
357 aDest[i]= av_clip_uint8(val>>19);
362 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
363 const int16_t **lumSrc, int lumFilterSize,
364 const int16_t *chrFilter, const int16_t **chrUSrc,
365 const int16_t **chrVSrc,
366 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
367 int dstW, int chrDstW, enum PixelFormat dstFormat)
369 //FIXME Optimize (just quickly written not optimized..)
371 for (i=0; i<dstW; i++) {
374 for (j=0; j<lumFilterSize; j++)
375 val += lumSrc[j][i] * lumFilter[j];
377 dest[i]= av_clip_uint8(val>>19);
383 if (dstFormat == PIX_FMT_NV12)
384 for (i=0; i<chrDstW; i++) {
388 for (j=0; j<chrFilterSize; j++) {
389 u += chrUSrc[j][i] * chrFilter[j];
390 v += chrVSrc[j][i] * chrFilter[j];
393 uDest[2*i]= av_clip_uint8(u>>19);
394 uDest[2*i+1]= av_clip_uint8(v>>19);
397 for (i=0; i<chrDstW; i++) {
401 for (j=0; j<chrFilterSize; j++) {
402 u += chrUSrc[j][i] * chrFilter[j];
403 v += chrVSrc[j][i] * chrFilter[j];
406 uDest[2*i]= av_clip_uint8(v>>19);
407 uDest[2*i+1]= av_clip_uint8(u>>19);
411 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
412 for (i=0; i<(dstW>>1); i++) {\
418 int av_unused A1, A2;\
419 type av_unused *r, *b, *g;\
422 for (j=0; j<lumFilterSize; j++) {\
423 Y1 += lumSrc[j][i2] * lumFilter[j];\
424 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
426 for (j=0; j<chrFilterSize; j++) {\
427 U += chrUSrc[j][i] * chrFilter[j];\
428 V += chrVSrc[j][i] * chrFilter[j];\
437 for (j=0; j<lumFilterSize; j++) {\
438 A1 += alpSrc[j][i2 ] * lumFilter[j];\
439 A2 += alpSrc[j][i2+1] * lumFilter[j];\
445 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
446 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
447 if ((Y1|Y2|U|V)&256) {\
448 if (Y1>255) Y1=255; \
449 else if (Y1<0)Y1=0; \
450 if (Y2>255) Y2=255; \
451 else if (Y2<0)Y2=0; \
457 if (alpha && ((A1|A2)&256)) {\
458 A1=av_clip_uint8(A1);\
459 A2=av_clip_uint8(A2);\
462 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
463 for (i=0; i<dstW; i++) {\
471 for (j=0; j<lumFilterSize; j++) {\
472 Y += lumSrc[j][i ] * lumFilter[j];\
474 for (j=0; j<chrFilterSize; j++) {\
475 U += chrUSrc[j][i] * chrFilter[j];\
476 V += chrVSrc[j][i] * chrFilter[j];\
483 for (j=0; j<lumFilterSize; j++)\
484 A += alpSrc[j][i ] * lumFilter[j];\
487 A = av_clip_uint8(A);\
490 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
491 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
492 Y-= c->yuv2rgb_y_offset;\
493 Y*= c->yuv2rgb_y_coeff;\
495 R= Y + V*c->yuv2rgb_v2r_coeff;\
496 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
497 B= Y + U*c->yuv2rgb_u2b_coeff;\
498 if ((R|G|B)&(0xC0000000)) {\
499 if (R>=(256<<22)) R=(256<<22)-1; \
501 if (G>=(256<<22)) G=(256<<22)-1; \
503 if (B>=(256<<22)) B=(256<<22)-1; \
507 #define YSCALE_YUV_2_GRAY16_C \
508 for (i=0; i<(dstW>>1); i++) {\
517 for (j=0; j<lumFilterSize; j++) {\
518 Y1 += lumSrc[j][i2] * lumFilter[j];\
519 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
523 if ((Y1|Y2|U|V)&65536) {\
524 if (Y1>65535) Y1=65535; \
525 else if (Y1<0)Y1=0; \
526 if (Y2>65535) Y2=65535; \
527 else if (Y2<0)Y2=0; \
530 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
531 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
532 r = (type *)c->table_rV[V]; \
533 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
534 b = (type *)c->table_bU[U];
536 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
537 for (i=0; i<(dstW>>1); i++) { \
539 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
540 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
541 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
542 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
543 type av_unused *r, *b, *g; \
544 int av_unused A1, A2; \
546 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
547 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
550 #define YSCALE_YUV_2_GRAY16_2_C \
551 for (i=0; i<(dstW>>1); i++) { \
553 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
554 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
556 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
557 YSCALE_YUV_2_PACKED2_C(type,alpha)\
558 r = (type *)c->table_rV[V];\
559 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
560 b = (type *)c->table_bU[U];
562 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
563 for (i=0; i<(dstW>>1); i++) {\
565 int Y1= buf0[i2 ]>>7;\
566 int Y2= buf0[i2+1]>>7;\
567 int U= (ubuf1[i])>>7;\
568 int V= (vbuf1[i])>>7;\
569 type av_unused *r, *b, *g;\
570 int av_unused A1, A2;\
576 #define YSCALE_YUV_2_GRAY16_1_C \
577 for (i=0; i<(dstW>>1); i++) {\
579 int Y1= buf0[i2 ]<<1;\
580 int Y2= buf0[i2+1]<<1;
582 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
583 YSCALE_YUV_2_PACKED1_C(type,alpha)\
584 r = (type *)c->table_rV[V];\
585 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
586 b = (type *)c->table_bU[U];
588 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
589 for (i=0; i<(dstW>>1); i++) {\
591 int Y1= buf0[i2 ]>>7;\
592 int Y2= buf0[i2+1]>>7;\
593 int U= (ubuf0[i] + ubuf1[i])>>8;\
594 int V= (vbuf0[i] + vbuf1[i])>>8;\
595 type av_unused *r, *b, *g;\
596 int av_unused A1, A2;\
602 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
603 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
604 r = (type *)c->table_rV[V];\
605 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
606 b = (type *)c->table_bU[U];
608 #define YSCALE_YUV_2_MONO2_C \
609 const uint8_t * const d128=dither_8x8_220[y&7];\
610 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
611 for (i=0; i<dstW-7; i+=8) {\
613 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
614 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
615 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
616 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
617 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
618 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
619 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
620 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
621 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
625 #define YSCALE_YUV_2_MONOX_C \
626 const uint8_t * const d128=dither_8x8_220[y&7];\
627 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
629 for (i=0; i<dstW-1; i+=2) {\
634 for (j=0; j<lumFilterSize; j++) {\
635 Y1 += lumSrc[j][i] * lumFilter[j];\
636 Y2 += lumSrc[j][i+1] * lumFilter[j];\
646 acc+= acc + g[Y1+d128[(i+0)&7]];\
647 acc+= acc + g[Y2+d128[(i+1)&7]];\
649 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
654 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
655 switch(c->dstFormat) {\
656 case PIX_FMT_RGB48BE:\
657 case PIX_FMT_RGB48LE:\
659 ((uint8_t*)dest)[ 0]= r[Y1];\
660 ((uint8_t*)dest)[ 1]= r[Y1];\
661 ((uint8_t*)dest)[ 2]= g[Y1];\
662 ((uint8_t*)dest)[ 3]= g[Y1];\
663 ((uint8_t*)dest)[ 4]= b[Y1];\
664 ((uint8_t*)dest)[ 5]= b[Y1];\
665 ((uint8_t*)dest)[ 6]= r[Y2];\
666 ((uint8_t*)dest)[ 7]= r[Y2];\
667 ((uint8_t*)dest)[ 8]= g[Y2];\
668 ((uint8_t*)dest)[ 9]= g[Y2];\
669 ((uint8_t*)dest)[10]= b[Y2];\
670 ((uint8_t*)dest)[11]= b[Y2];\
674 case PIX_FMT_BGR48BE:\
675 case PIX_FMT_BGR48LE:\
677 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
678 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
679 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
680 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
681 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
682 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
689 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
690 func(uint32_t,needAlpha)\
691 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
692 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
695 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
697 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
698 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
702 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
703 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
711 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
712 func(uint32_t,needAlpha)\
713 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
714 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
717 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
719 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
720 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
724 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
725 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
732 ((uint8_t*)dest)[0]= r[Y1];\
733 ((uint8_t*)dest)[1]= g[Y1];\
734 ((uint8_t*)dest)[2]= b[Y1];\
735 ((uint8_t*)dest)[3]= r[Y2];\
736 ((uint8_t*)dest)[4]= g[Y2];\
737 ((uint8_t*)dest)[5]= b[Y2];\
743 ((uint8_t*)dest)[0]= b[Y1];\
744 ((uint8_t*)dest)[1]= g[Y1];\
745 ((uint8_t*)dest)[2]= r[Y1];\
746 ((uint8_t*)dest)[3]= b[Y2];\
747 ((uint8_t*)dest)[4]= g[Y2];\
748 ((uint8_t*)dest)[5]= r[Y2];\
752 case PIX_FMT_RGB565BE:\
753 case PIX_FMT_RGB565LE:\
754 case PIX_FMT_BGR565BE:\
755 case PIX_FMT_BGR565LE:\
757 const int dr1= dither_2x2_8[y&1 ][0];\
758 const int dg1= dither_2x2_4[y&1 ][0];\
759 const int db1= dither_2x2_8[(y&1)^1][0];\
760 const int dr2= dither_2x2_8[y&1 ][1];\
761 const int dg2= dither_2x2_4[y&1 ][1];\
762 const int db2= dither_2x2_8[(y&1)^1][1];\
764 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
765 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
769 case PIX_FMT_RGB555BE:\
770 case PIX_FMT_RGB555LE:\
771 case PIX_FMT_BGR555BE:\
772 case PIX_FMT_BGR555LE:\
774 const int dr1= dither_2x2_8[y&1 ][0];\
775 const int dg1= dither_2x2_8[y&1 ][1];\
776 const int db1= dither_2x2_8[(y&1)^1][0];\
777 const int dr2= dither_2x2_8[y&1 ][1];\
778 const int dg2= dither_2x2_8[y&1 ][0];\
779 const int db2= dither_2x2_8[(y&1)^1][1];\
781 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
782 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
786 case PIX_FMT_RGB444BE:\
787 case PIX_FMT_RGB444LE:\
788 case PIX_FMT_BGR444BE:\
789 case PIX_FMT_BGR444LE:\
791 const int dr1= dither_4x4_16[y&3 ][0];\
792 const int dg1= dither_4x4_16[y&3 ][1];\
793 const int db1= dither_4x4_16[(y&3)^3][0];\
794 const int dr2= dither_4x4_16[y&3 ][1];\
795 const int dg2= dither_4x4_16[y&3 ][0];\
796 const int db2= dither_4x4_16[(y&3)^3][1];\
798 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
799 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
806 const uint8_t * const d64= dither_8x8_73[y&7];\
807 const uint8_t * const d32= dither_8x8_32[y&7];\
809 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
810 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
817 const uint8_t * const d64= dither_8x8_73 [y&7];\
818 const uint8_t * const d128=dither_8x8_220[y&7];\
820 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
821 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
825 case PIX_FMT_RGB4_BYTE:\
826 case PIX_FMT_BGR4_BYTE:\
828 const uint8_t * const d64= dither_8x8_73 [y&7];\
829 const uint8_t * const d128=dither_8x8_220[y&7];\
831 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
832 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
836 case PIX_FMT_MONOBLACK:\
837 case PIX_FMT_MONOWHITE:\
842 case PIX_FMT_YUYV422:\
844 ((uint8_t*)dest)[2*i2+0]= Y1;\
845 ((uint8_t*)dest)[2*i2+1]= U;\
846 ((uint8_t*)dest)[2*i2+2]= Y2;\
847 ((uint8_t*)dest)[2*i2+3]= V;\
850 case PIX_FMT_UYVY422:\
852 ((uint8_t*)dest)[2*i2+0]= U;\
853 ((uint8_t*)dest)[2*i2+1]= Y1;\
854 ((uint8_t*)dest)[2*i2+2]= V;\
855 ((uint8_t*)dest)[2*i2+3]= Y2;\
858 case PIX_FMT_GRAY16BE:\
860 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
861 ((uint8_t*)dest)[2*i2+1]= Y1;\
862 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
863 ((uint8_t*)dest)[2*i2+3]= Y2;\
866 case PIX_FMT_GRAY16LE:\
868 ((uint8_t*)dest)[2*i2+0]= Y1;\
869 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
870 ((uint8_t*)dest)[2*i2+2]= Y2;\
871 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
876 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
877 const int16_t **lumSrc, int lumFilterSize,
878 const int16_t *chrFilter, const int16_t **chrUSrc,
879 const int16_t **chrVSrc, int chrFilterSize,
880 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
883 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
886 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
887 const int16_t **lumSrc, int lumFilterSize,
888 const int16_t *chrFilter, const int16_t **chrUSrc,
889 const int16_t **chrVSrc, int chrFilterSize,
890 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
893 int step= c->dstFormatBpp/8;
896 switch(c->dstFormat) {
904 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
905 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
906 dest[aidx]= needAlpha ? A : 255;
913 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
914 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
922 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
939 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
940 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
941 dest[aidx]= needAlpha ? A : 255;
948 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
949 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
957 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
972 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
975 uint8_t *ptr = plane + stride*y;
976 for (i=0; i<height; i++) {
977 memset(ptr, val, width);
982 static void rgb48ToY_c(uint8_t *dst, const uint8_t *src, int width,
986 for (i = 0; i < width; i++) {
991 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
995 static void rgb48ToUV_c(uint8_t *dstU, uint8_t *dstV,
996 const uint8_t *src1, const uint8_t *src2,
997 int width, uint32_t *unused)
1001 for (i = 0; i < width; i++) {
1002 int r = src1[6*i + 0];
1003 int g = src1[6*i + 2];
1004 int b = src1[6*i + 4];
1006 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1007 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1011 static void rgb48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
1012 const uint8_t *src1, const uint8_t *src2,
1013 int width, uint32_t *unused)
1017 for (i = 0; i < width; i++) {
1018 int r= src1[12*i + 0] + src1[12*i + 6];
1019 int g= src1[12*i + 2] + src1[12*i + 8];
1020 int b= src1[12*i + 4] + src1[12*i + 10];
1022 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1023 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1027 static void bgr48ToY_c(uint8_t *dst, const uint8_t *src, int width,
1031 for (i = 0; i < width; i++) {
1036 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1040 static void bgr48ToUV_c(uint8_t *dstU, uint8_t *dstV,
1041 const uint8_t *src1, const uint8_t *src2,
1042 int width, uint32_t *unused)
1045 for (i = 0; i < width; i++) {
1046 int b = src1[6*i + 0];
1047 int g = src1[6*i + 2];
1048 int r = src1[6*i + 4];
1050 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1051 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1055 static void bgr48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
1056 const uint8_t *src1, const uint8_t *src2,
1057 int width, uint32_t *unused)
1060 for (i = 0; i < width; i++) {
1061 int b= src1[12*i + 0] + src1[12*i + 6];
1062 int g= src1[12*i + 2] + src1[12*i + 8];
1063 int r= src1[12*i + 4] + src1[12*i + 10];
1065 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1066 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1070 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1071 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1072 int width, uint32_t *unused)\
1075 for (i=0; i<width; i++) {\
1076 int b= (((const type*)src)[i]>>shb)&maskb;\
1077 int g= (((const type*)src)[i]>>shg)&maskg;\
1078 int r= (((const type*)src)[i]>>shr)&maskr;\
1080 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1084 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1085 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1086 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1087 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1088 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1089 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1090 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1091 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1093 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1096 for (i=0; i<width; i++) {
1101 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1102 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1103 const uint8_t *src, const uint8_t *dummy, \
1104 int width, uint32_t *unused)\
1107 for (i=0; i<width; i++) {\
1108 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1109 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1110 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1112 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1113 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1116 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1117 const uint8_t *src, const uint8_t *dummy, \
1118 int width, uint32_t *unused)\
1121 for (i=0; i<width; i++) {\
1122 int pix0= ((const type*)src)[2*i+0]>>shp;\
1123 int pix1= ((const type*)src)[2*i+1]>>shp;\
1124 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1125 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1126 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1127 g&= maskg|(2*maskg);\
1131 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1132 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1136 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1137 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1138 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1139 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1140 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1141 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1142 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1143 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1145 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1148 for (i=0; i<width; i++) {
1151 dst[i]= pal[d] & 0xFF;
1155 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1156 const uint8_t *src1, const uint8_t *src2,
1157 int width, uint32_t *pal)
1160 assert(src1 == src2);
1161 for (i=0; i<width; i++) {
1162 int p= pal[src1[i]];
1169 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1170 int width, uint32_t *unused)
1173 for (i=0; i<width/8; i++) {
1176 dst[8*i+j]= ((d>>(7-j))&1)*255;
1180 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1181 int width, uint32_t *unused)
1184 for (i=0; i<width/8; i++) {
1187 dst[8*i+j]= ((d>>(7-j))&1)*255;
1191 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1192 const int16_t *chrUSrc, const int16_t *chrVSrc,
1193 const int16_t *alpSrc,
1194 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1195 uint8_t *aDest, int dstW, int chrDstW)
1198 for (i=0; i<dstW; i++) {
1199 int val= (lumSrc[i]+64)>>7;
1200 dest[i]= av_clip_uint8(val);
1204 for (i=0; i<chrDstW; i++) {
1205 int u=(chrUSrc[i]+64)>>7;
1206 int v=(chrVSrc[i]+64)>>7;
1207 uDest[i]= av_clip_uint8(u);
1208 vDest[i]= av_clip_uint8(v);
1211 if (CONFIG_SWSCALE_ALPHA && aDest)
1212 for (i=0; i<dstW; i++) {
1213 int val= (alpSrc[i]+64)>>7;
1214 aDest[i]= av_clip_uint8(val);
1219 * vertical bilinear scale YV12 to RGB
1221 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1222 const uint16_t *buf1, const uint16_t *ubuf0,
1223 const uint16_t *ubuf1, const uint16_t *vbuf0,
1224 const uint16_t *vbuf1, const uint16_t *abuf0,
1225 const uint16_t *abuf1, uint8_t *dest, int dstW,
1226 int yalpha, int uvalpha, int y)
1228 int yalpha1=4095- yalpha;
1229 int uvalpha1=4095-uvalpha;
1232 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1236 * YV12 to RGB without scaling or interpolating
1238 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1239 const uint16_t *ubuf0, const uint16_t *ubuf1,
1240 const uint16_t *vbuf0, const uint16_t *vbuf1,
1241 const uint16_t *abuf0, uint8_t *dest, int dstW,
1242 int uvalpha, enum PixelFormat dstFormat,
1245 const int yalpha1=0;
1248 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1249 const int yalpha= 4096; //FIXME ...
1251 if (uvalpha < 2048) {
1252 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1254 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1258 //FIXME yuy2* can read up to 7 samples too much
1260 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1264 for (i=0; i<width; i++)
1268 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1269 const uint8_t *src2, int width, uint32_t *unused)
1272 for (i=0; i<width; i++) {
1273 dstU[i]= src1[4*i + 1];
1274 dstV[i]= src1[4*i + 3];
1276 assert(src1 == src2);
1279 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1280 const uint8_t *src2, int width, uint32_t *unused)
1283 for (i=0; i<width; i++) {
1284 dstU[i]= src1[2*i + 1];
1285 dstV[i]= src2[2*i + 1];
1289 /* This is almost identical to the previous, end exists only because
1290 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1291 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1295 for (i=0; i<width; i++)
1299 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1300 const uint8_t *src2, int width, uint32_t *unused)
1303 for (i=0; i<width; i++) {
1304 dstU[i]= src1[4*i + 0];
1305 dstV[i]= src1[4*i + 2];
1307 assert(src1 == src2);
1310 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1311 const uint8_t *src2, int width, uint32_t *unused)
1314 for (i=0; i<width; i++) {
1320 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1321 const uint8_t *src, int width)
1324 for (i = 0; i < width; i++) {
1325 dst1[i] = src[2*i+0];
1326 dst2[i] = src[2*i+1];
1330 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1331 const uint8_t *src1, const uint8_t *src2,
1332 int width, uint32_t *unused)
1334 nvXXtoUV_c(dstU, dstV, src1, width);
1337 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1338 const uint8_t *src1, const uint8_t *src2,
1339 int width, uint32_t *unused)
1341 nvXXtoUV_c(dstV, dstU, src1, width);
1344 // FIXME Maybe dither instead.
1345 #define YUV_NBPS(depth, endianness, rfunc) \
1346 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1347 const uint8_t *_srcU, const uint8_t *_srcV, \
1348 int width, uint32_t *unused) \
1351 const uint16_t *srcU = (const uint16_t*)_srcU; \
1352 const uint16_t *srcV = (const uint16_t*)_srcV; \
1353 for (i = 0; i < width; i++) { \
1354 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1355 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1359 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1360 int width, uint32_t *unused) \
1363 const uint16_t *srcY = (const uint16_t*)_srcY; \
1364 for (i = 0; i < width; i++) \
1365 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1368 YUV_NBPS( 9, LE, AV_RL16)
1369 YUV_NBPS( 9, BE, AV_RB16)
1370 YUV_NBPS(10, LE, AV_RL16)
1371 YUV_NBPS(10, BE, AV_RB16)
1373 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1374 int width, uint32_t *unused)
1377 for (i=0; i<width; i++) {
1382 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1386 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1387 const uint8_t *src2, int width, uint32_t *unused)
1390 for (i=0; i<width; i++) {
1391 int b= src1[3*i + 0];
1392 int g= src1[3*i + 1];
1393 int r= src1[3*i + 2];
1395 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1396 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1398 assert(src1 == src2);
1401 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1402 const uint8_t *src2, int width, uint32_t *unused)
1405 for (i=0; i<width; i++) {
1406 int b= src1[6*i + 0] + src1[6*i + 3];
1407 int g= src1[6*i + 1] + src1[6*i + 4];
1408 int r= src1[6*i + 2] + src1[6*i + 5];
1410 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1411 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1413 assert(src1 == src2);
1416 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1420 for (i=0; i<width; i++) {
1425 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1429 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1430 const uint8_t *src2, int width, uint32_t *unused)
1434 for (i=0; i<width; i++) {
1435 int r= src1[3*i + 0];
1436 int g= src1[3*i + 1];
1437 int b= src1[3*i + 2];
1439 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1440 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1444 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1445 const uint8_t *src2, int width, uint32_t *unused)
1449 for (i=0; i<width; i++) {
1450 int r= src1[6*i + 0] + src1[6*i + 3];
1451 int g= src1[6*i + 1] + src1[6*i + 4];
1452 int b= src1[6*i + 2] + src1[6*i + 5];
1454 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1455 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1460 // bilinear / bicubic scaling
1461 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1463 const int16_t *filter, const int16_t *filterPos,
1467 for (i=0; i<dstW; i++) {
1469 int srcPos= filterPos[i];
1471 for (j=0; j<filterSize; j++) {
1472 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1474 //filter += hFilterSize;
1475 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1480 //FIXME all pal and rgb srcFormats could do this convertion as well
1481 //FIXME all scalers more complex than bilinear could do half of this transform
1482 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1485 for (i = 0; i < width; i++) {
1486 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1487 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1490 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1493 for (i = 0; i < width; i++) {
1494 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1495 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1498 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1501 for (i = 0; i < width; i++)
1502 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1504 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1507 for (i = 0; i < width; i++)
1508 dst[i] = (dst[i]*14071 + 33561947)>>14;
1511 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1512 const uint8_t *src, int srcW, int xInc)
1515 unsigned int xpos=0;
1516 for (i=0;i<dstWidth;i++) {
1517 register unsigned int xx=xpos>>16;
1518 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1519 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1524 // *** horizontal scale Y line to temp buffer
1525 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1526 const uint8_t *src, int srcW, int xInc,
1527 const int16_t *hLumFilter,
1528 const int16_t *hLumFilterPos, int hLumFilterSize,
1529 uint8_t *formatConvBuffer,
1530 uint32_t *pal, int isAlpha)
1532 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1533 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1535 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
1538 toYV12(formatConvBuffer, src, srcW, pal);
1539 src= formatConvBuffer;
1542 if (!c->hyscale_fast) {
1543 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1544 } else { // fast bilinear upscale / crap downscale
1545 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1549 convertRange(dst, dstWidth);
1552 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1553 int dstWidth, const uint8_t *src1,
1554 const uint8_t *src2, int srcW, int xInc)
1557 unsigned int xpos=0;
1558 for (i=0;i<dstWidth;i++) {
1559 register unsigned int xx=xpos>>16;
1560 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1561 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1562 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1567 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1568 const uint8_t *src1, const uint8_t *src2,
1569 int srcW, int xInc, const int16_t *hChrFilter,
1570 const int16_t *hChrFilterPos, int hChrFilterSize,
1571 uint8_t *formatConvBuffer, uint32_t *pal)
1574 src1 += c->chrSrcOffset;
1575 src2 += c->chrSrcOffset;
1578 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1579 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1580 src1= formatConvBuffer;
1584 if (!c->hcscale_fast) {
1585 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1586 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1587 } else { // fast bilinear upscale / crap downscale
1588 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1591 if (c->chrConvertRange)
1592 c->chrConvertRange(dst1, dst2, dstWidth);
1595 #define DEBUG_SWSCALE_BUFFERS 0
1596 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1598 static int swScale(SwsContext *c, const uint8_t* src[],
1599 int srcStride[], int srcSliceY,
1600 int srcSliceH, uint8_t* dst[], int dstStride[])
1602 /* load a few things into local vars to make the code more readable? and faster */
1603 const int srcW= c->srcW;
1604 const int dstW= c->dstW;
1605 const int dstH= c->dstH;
1606 const int chrDstW= c->chrDstW;
1607 const int chrSrcW= c->chrSrcW;
1608 const int lumXInc= c->lumXInc;
1609 const int chrXInc= c->chrXInc;
1610 const enum PixelFormat dstFormat= c->dstFormat;
1611 const int flags= c->flags;
1612 int16_t *vLumFilterPos= c->vLumFilterPos;
1613 int16_t *vChrFilterPos= c->vChrFilterPos;
1614 int16_t *hLumFilterPos= c->hLumFilterPos;
1615 int16_t *hChrFilterPos= c->hChrFilterPos;
1616 int16_t *vLumFilter= c->vLumFilter;
1617 int16_t *vChrFilter= c->vChrFilter;
1618 int16_t *hLumFilter= c->hLumFilter;
1619 int16_t *hChrFilter= c->hChrFilter;
1620 int32_t *lumMmxFilter= c->lumMmxFilter;
1621 int32_t *chrMmxFilter= c->chrMmxFilter;
1622 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1623 const int vLumFilterSize= c->vLumFilterSize;
1624 const int vChrFilterSize= c->vChrFilterSize;
1625 const int hLumFilterSize= c->hLumFilterSize;
1626 const int hChrFilterSize= c->hChrFilterSize;
1627 int16_t **lumPixBuf= c->lumPixBuf;
1628 int16_t **chrUPixBuf= c->chrUPixBuf;
1629 int16_t **chrVPixBuf= c->chrVPixBuf;
1630 int16_t **alpPixBuf= c->alpPixBuf;
1631 const int vLumBufSize= c->vLumBufSize;
1632 const int vChrBufSize= c->vChrBufSize;
1633 uint8_t *formatConvBuffer= c->formatConvBuffer;
1634 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1635 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1637 uint32_t *pal=c->pal_yuv;
1639 /* vars which will change and which we need to store back in the context */
1641 int lumBufIndex= c->lumBufIndex;
1642 int chrBufIndex= c->chrBufIndex;
1643 int lastInLumBuf= c->lastInLumBuf;
1644 int lastInChrBuf= c->lastInChrBuf;
1646 if (isPacked(c->srcFormat)) {
1654 srcStride[3]= srcStride[0];
1656 srcStride[1]<<= c->vChrDrop;
1657 srcStride[2]<<= c->vChrDrop;
1659 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1660 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1661 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1662 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1663 srcSliceY, srcSliceH, dstY, dstH);
1664 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1665 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1667 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1668 static int warnedAlready=0; //FIXME move this into the context perhaps
1669 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1670 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1671 " ->cannot do aligned memory accesses anymore\n");
1676 /* Note the user might start scaling the picture in the middle so this
1677 will not get executed. This is not really intended but works
1678 currently, so people might do it. */
1679 if (srcSliceY ==0) {
1689 for (;dstY < dstH; dstY++) {
1690 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1691 const int chrDstY= dstY>>c->chrDstVSubSample;
1692 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1693 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1694 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1696 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1697 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1698 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1699 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1700 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1701 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1704 //handle holes (FAST_BILINEAR & weird filters)
1705 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1706 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1707 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1708 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1710 DEBUG_BUFFERS("dstY: %d\n", dstY);
1711 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1712 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1713 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1714 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1716 // Do we have enough lines in this slice to output the dstY line
1717 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1719 if (!enough_lines) {
1720 lastLumSrcY = srcSliceY + srcSliceH - 1;
1721 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1722 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1723 lastLumSrcY, lastChrSrcY);
1726 //Do horizontal scaling
1727 while(lastInLumBuf < lastLumSrcY) {
1728 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1729 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1731 assert(lumBufIndex < 2*vLumBufSize);
1732 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1733 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1734 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1735 hLumFilter, hLumFilterPos, hLumFilterSize,
1738 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1739 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1740 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1744 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1745 lumBufIndex, lastInLumBuf);
1747 while(lastInChrBuf < lastChrSrcY) {
1748 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1749 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1751 assert(chrBufIndex < 2*vChrBufSize);
1752 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1753 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1754 //FIXME replace parameters through context struct (some at least)
1756 if (c->needs_hcscale)
1757 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1758 chrDstW, src1, src2, chrSrcW, chrXInc,
1759 hChrFilter, hChrFilterPos, hChrFilterSize,
1760 formatConvBuffer, pal);
1762 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1763 chrBufIndex, lastInChrBuf);
1765 //wrap buf index around to stay inside the ring buffer
1766 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1767 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1769 break; //we can't output a dstY line so let's try with the next slice
1772 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1774 if (dstY < dstH-2) {
1775 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1776 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1777 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1778 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1779 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1780 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1781 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1783 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1784 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1785 dest, uDest, dstW, chrDstW, dstFormat);
1786 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1787 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1788 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1789 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1790 const int16_t *lumBuf = lumSrcPtr[0];
1791 const int16_t *chrUBuf= chrUSrcPtr[0];
1792 const int16_t *chrVBuf= chrVSrcPtr[0];
1793 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1794 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1795 uDest, vDest, aDest, dstW, chrDstW);
1796 } else { //General YV12
1798 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1799 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1800 chrVSrcPtr, vChrFilterSize,
1801 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1804 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1805 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1806 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1807 int chrAlpha= vChrFilter[2*dstY+1];
1808 if(flags & SWS_FULL_CHR_H_INT) {
1809 yuv2rgbX_c_full(c, //FIXME write a packed1_full function
1810 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1811 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
1812 chrVSrcPtr, vChrFilterSize,
1813 alpSrcPtr, dest, dstW, dstY);
1815 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1816 *chrVSrcPtr, *(chrVSrcPtr+1),
1817 alpPixBuf ? *alpSrcPtr : NULL,
1818 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1820 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1821 int lumAlpha= vLumFilter[2*dstY+1];
1822 int chrAlpha= vChrFilter[2*dstY+1];
1824 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1826 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1827 if(flags & SWS_FULL_CHR_H_INT) {
1828 yuv2rgbX_c_full(c, //FIXME write a packed2_full function
1829 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1830 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1831 alpSrcPtr, dest, dstW, dstY);
1833 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1834 *chrVSrcPtr, *(chrVSrcPtr+1),
1835 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1836 dest, dstW, lumAlpha, chrAlpha, dstY);
1838 } else { //general RGB
1839 if(flags & SWS_FULL_CHR_H_INT) {
1841 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1842 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1843 alpSrcPtr, dest, dstW, dstY);
1846 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1847 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1848 alpSrcPtr, dest, dstW, dstY);
1852 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
1853 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1854 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1855 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1856 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1857 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1858 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1859 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1860 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
1861 lumSrcPtr, vLumFilterSize,
1862 vChrFilter+chrDstY*vChrFilterSize,
1863 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1864 dest, uDest, dstW, chrDstW, dstFormat);
1865 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
1866 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1867 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1868 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1869 yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1870 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1871 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
1874 yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
1875 lumSrcPtr, vLumFilterSize,
1876 vChrFilter+chrDstY*vChrFilterSize,
1877 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1878 alpSrcPtr, dest, uDest, vDest, aDest,
1882 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1883 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1884 if(flags & SWS_FULL_CHR_H_INT) {
1886 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1887 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1888 alpSrcPtr, dest, dstW, dstY);
1891 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1892 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1893 alpSrcPtr, dest, dstW, dstY);
1899 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1900 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1903 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1904 __asm__ volatile("sfence":::"memory");
1908 /* store changed local vars back in the context */
1910 c->lumBufIndex= lumBufIndex;
1911 c->chrBufIndex= chrBufIndex;
1912 c->lastInLumBuf= lastInLumBuf;
1913 c->lastInChrBuf= lastInChrBuf;
1915 return dstY - lastDstY;
1918 static void sws_init_swScale_c(SwsContext *c)
1920 enum PixelFormat srcFormat = c->srcFormat,
1921 dstFormat = c->dstFormat;
1923 c->yuv2nv12X = yuv2nv12X_c;
1924 if (is16BPS(dstFormat)) {
1925 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1926 } else if (is9_OR_10BPS(dstFormat)) {
1927 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1928 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1930 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1933 c->yuv2yuv1 = yuv2yuv1_c;
1934 c->yuv2yuvX = yuv2yuvX_c;
1936 c->yuv2packed1 = yuv2packed1_c;
1937 c->yuv2packed2 = yuv2packed2_c;
1938 c->yuv2packedX = yuv2packedX_c;
1940 c->hScale = hScale_c;
1942 if (c->flags & SWS_FAST_BILINEAR)
1944 c->hyscale_fast = hyscale_fast_c;
1945 c->hcscale_fast = hcscale_fast_c;
1948 c->chrToYV12 = NULL;
1950 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1951 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1952 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1953 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1957 case PIX_FMT_BGR4_BYTE:
1958 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1959 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1960 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1961 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1962 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1963 case PIX_FMT_YUV420P16BE:
1964 case PIX_FMT_YUV422P16BE:
1965 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1966 case PIX_FMT_YUV420P16LE:
1967 case PIX_FMT_YUV422P16LE:
1968 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1970 if (c->chrSrcHSubSample) {
1972 case PIX_FMT_RGB48BE:
1973 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half_c; break;
1974 case PIX_FMT_BGR48BE:
1975 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half_c; break;
1976 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1977 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1978 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1979 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1980 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1981 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1982 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1983 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1984 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1985 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1989 case PIX_FMT_RGB48BE:
1990 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_c; break;
1991 case PIX_FMT_BGR48BE:
1992 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_c; break;
1993 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1994 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1995 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1996 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1997 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1998 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1999 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
2000 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2001 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
2002 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
2006 c->lumToYV12 = NULL;
2007 c->alpToYV12 = NULL;
2008 switch (srcFormat) {
2009 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2010 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2011 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2012 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2013 case PIX_FMT_YUYV422 :
2014 case PIX_FMT_YUV420P16BE:
2015 case PIX_FMT_YUV422P16BE:
2016 case PIX_FMT_YUV444P16BE:
2017 case PIX_FMT_Y400A :
2018 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2019 case PIX_FMT_UYVY422 :
2020 case PIX_FMT_YUV420P16LE:
2021 case PIX_FMT_YUV422P16LE:
2022 case PIX_FMT_YUV444P16LE:
2023 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2024 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2025 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
2026 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
2027 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2028 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
2029 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
2033 case PIX_FMT_BGR4_BYTE:
2034 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2035 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2036 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2037 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2038 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2039 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2040 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2041 case PIX_FMT_RGB48BE:
2042 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY_c; break;
2043 case PIX_FMT_BGR48BE:
2044 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY_c; break;
2047 switch (srcFormat) {
2048 case PIX_FMT_RGB32 :
2049 case PIX_FMT_RGB32_1:
2050 case PIX_FMT_BGR32 :
2051 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA_c; break;
2052 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
2056 switch (srcFormat) {
2057 case PIX_FMT_Y400A :
2058 c->alpSrcOffset = 1;
2060 case PIX_FMT_RGB32 :
2061 case PIX_FMT_BGR32 :
2062 c->alpSrcOffset = 3;
2064 case PIX_FMT_RGB48LE:
2065 case PIX_FMT_BGR48LE:
2066 c->lumSrcOffset = 1;
2067 c->chrSrcOffset = 1;
2068 c->alpSrcOffset = 1;
2072 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2074 c->lumConvertRange = lumRangeFromJpeg_c;
2075 c->chrConvertRange = chrRangeFromJpeg_c;
2077 c->lumConvertRange = lumRangeToJpeg_c;
2078 c->chrConvertRange = chrRangeToJpeg_c;
2082 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2083 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2084 c->needs_hcscale = 1;
2087 SwsFunc ff_getSwsFunc(SwsContext *c)
2089 sws_init_swScale_c(c);
2092 ff_sws_init_swScale_mmx(c);
2094 ff_sws_init_swScale_altivec(c);