2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
289 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
290 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
291 enum PixelFormat dstFormat)
293 #define conv16(bits) \
294 if (isBE(dstFormat)) { \
295 yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
296 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
298 dest, uDest, vDest, aDest, \
301 yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
302 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
304 dest, uDest, vDest, aDest, \
307 if (is16BPS(dstFormat)) {
309 } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
317 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
318 const int16_t **lumSrc, int lumFilterSize,
319 const int16_t *chrFilter, const int16_t **chrUSrc,
320 const int16_t **chrVSrc,
321 int chrFilterSize, const int16_t **alpSrc,
322 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
323 uint8_t *aDest, int dstW, int chrDstW)
325 //FIXME Optimize (just quickly written not optimized..)
327 for (i=0; i<dstW; i++) {
330 for (j=0; j<lumFilterSize; j++)
331 val += lumSrc[j][i] * lumFilter[j];
333 dest[i]= av_clip_uint8(val>>19);
337 for (i=0; i<chrDstW; i++) {
341 for (j=0; j<chrFilterSize; j++) {
342 u += chrUSrc[j][i] * chrFilter[j];
343 v += chrVSrc[j][i] * chrFilter[j];
346 uDest[i]= av_clip_uint8(u>>19);
347 vDest[i]= av_clip_uint8(v>>19);
350 if (CONFIG_SWSCALE_ALPHA && aDest)
351 for (i=0; i<dstW; i++) {
354 for (j=0; j<lumFilterSize; j++)
355 val += alpSrc[j][i] * lumFilter[j];
357 aDest[i]= av_clip_uint8(val>>19);
362 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
363 const int16_t **lumSrc, int lumFilterSize,
364 const int16_t *chrFilter, const int16_t **chrUSrc,
365 const int16_t **chrVSrc,
366 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
367 int dstW, int chrDstW, enum PixelFormat dstFormat)
369 //FIXME Optimize (just quickly written not optimized..)
371 for (i=0; i<dstW; i++) {
374 for (j=0; j<lumFilterSize; j++)
375 val += lumSrc[j][i] * lumFilter[j];
377 dest[i]= av_clip_uint8(val>>19);
383 if (dstFormat == PIX_FMT_NV12)
384 for (i=0; i<chrDstW; i++) {
388 for (j=0; j<chrFilterSize; j++) {
389 u += chrUSrc[j][i] * chrFilter[j];
390 v += chrVSrc[j][i] * chrFilter[j];
393 uDest[2*i]= av_clip_uint8(u>>19);
394 uDest[2*i+1]= av_clip_uint8(v>>19);
397 for (i=0; i<chrDstW; i++) {
401 for (j=0; j<chrFilterSize; j++) {
402 u += chrUSrc[j][i] * chrFilter[j];
403 v += chrVSrc[j][i] * chrFilter[j];
406 uDest[2*i]= av_clip_uint8(v>>19);
407 uDest[2*i+1]= av_clip_uint8(u>>19);
411 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
412 for (i=0; i<(dstW>>1); i++) {\
418 int av_unused A1, A2;\
419 type av_unused *r, *b, *g;\
422 for (j=0; j<lumFilterSize; j++) {\
423 Y1 += lumSrc[j][i2] * lumFilter[j];\
424 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
426 for (j=0; j<chrFilterSize; j++) {\
427 U += chrUSrc[j][i] * chrFilter[j];\
428 V += chrVSrc[j][i] * chrFilter[j];\
437 for (j=0; j<lumFilterSize; j++) {\
438 A1 += alpSrc[j][i2 ] * lumFilter[j];\
439 A2 += alpSrc[j][i2+1] * lumFilter[j];\
445 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
446 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
447 if ((Y1|Y2|U|V)&256) {\
448 if (Y1>255) Y1=255; \
449 else if (Y1<0)Y1=0; \
450 if (Y2>255) Y2=255; \
451 else if (Y2<0)Y2=0; \
457 if (alpha && ((A1|A2)&256)) {\
458 A1=av_clip_uint8(A1);\
459 A2=av_clip_uint8(A2);\
462 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
463 for (i=0; i<dstW; i++) {\
471 for (j=0; j<lumFilterSize; j++) {\
472 Y += lumSrc[j][i ] * lumFilter[j];\
474 for (j=0; j<chrFilterSize; j++) {\
475 U += chrUSrc[j][i] * chrFilter[j];\
476 V += chrVSrc[j][i] * chrFilter[j];\
483 for (j=0; j<lumFilterSize; j++)\
484 A += alpSrc[j][i ] * lumFilter[j];\
487 A = av_clip_uint8(A);\
490 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
491 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
492 Y-= c->yuv2rgb_y_offset;\
493 Y*= c->yuv2rgb_y_coeff;\
495 R= Y + V*c->yuv2rgb_v2r_coeff;\
496 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
497 B= Y + U*c->yuv2rgb_u2b_coeff;\
498 if ((R|G|B)&(0xC0000000)) {\
499 if (R>=(256<<22)) R=(256<<22)-1; \
501 if (G>=(256<<22)) G=(256<<22)-1; \
503 if (B>=(256<<22)) B=(256<<22)-1; \
507 #define YSCALE_YUV_2_GRAY16_C \
508 for (i=0; i<(dstW>>1); i++) {\
517 for (j=0; j<lumFilterSize; j++) {\
518 Y1 += lumSrc[j][i2] * lumFilter[j];\
519 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
523 if ((Y1|Y2|U|V)&65536) {\
524 if (Y1>65535) Y1=65535; \
525 else if (Y1<0)Y1=0; \
526 if (Y2>65535) Y2=65535; \
527 else if (Y2<0)Y2=0; \
530 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
531 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
532 r = (type *)c->table_rV[V]; \
533 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
534 b = (type *)c->table_bU[U];
536 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
537 for (i=0; i<(dstW>>1); i++) { \
539 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
540 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
541 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
542 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
543 type av_unused *r, *b, *g; \
544 int av_unused A1, A2; \
546 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
547 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
550 #define YSCALE_YUV_2_GRAY16_2_C \
551 for (i=0; i<(dstW>>1); i++) { \
553 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
554 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
556 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
557 YSCALE_YUV_2_PACKED2_C(type,alpha)\
558 r = (type *)c->table_rV[V];\
559 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
560 b = (type *)c->table_bU[U];
562 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
563 for (i=0; i<(dstW>>1); i++) {\
565 int Y1= buf0[i2 ]>>7;\
566 int Y2= buf0[i2+1]>>7;\
567 int U= (ubuf1[i])>>7;\
568 int V= (vbuf1[i])>>7;\
569 type av_unused *r, *b, *g;\
570 int av_unused A1, A2;\
576 #define YSCALE_YUV_2_GRAY16_1_C \
577 for (i=0; i<(dstW>>1); i++) {\
579 int Y1= buf0[i2 ]<<1;\
580 int Y2= buf0[i2+1]<<1;
582 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
583 YSCALE_YUV_2_PACKED1_C(type,alpha)\
584 r = (type *)c->table_rV[V];\
585 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
586 b = (type *)c->table_bU[U];
588 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
589 for (i=0; i<(dstW>>1); i++) {\
591 int Y1= buf0[i2 ]>>7;\
592 int Y2= buf0[i2+1]>>7;\
593 int U= (ubuf0[i] + ubuf1[i])>>8;\
594 int V= (vbuf0[i] + vbuf1[i])>>8;\
595 type av_unused *r, *b, *g;\
596 int av_unused A1, A2;\
602 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
603 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
604 r = (type *)c->table_rV[V];\
605 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
606 b = (type *)c->table_bU[U];
608 #define YSCALE_YUV_2_MONO2_C \
609 const uint8_t * const d128=dither_8x8_220[y&7];\
610 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
611 for (i=0; i<dstW-7; i+=8) {\
613 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
614 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
615 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
616 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
617 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
618 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
619 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
620 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
621 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
625 #define YSCALE_YUV_2_MONOX_C \
626 const uint8_t * const d128=dither_8x8_220[y&7];\
627 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
629 for (i=0; i<dstW-1; i+=2) {\
634 for (j=0; j<lumFilterSize; j++) {\
635 Y1 += lumSrc[j][i] * lumFilter[j];\
636 Y2 += lumSrc[j][i+1] * lumFilter[j];\
646 acc+= acc + g[Y1+d128[(i+0)&7]];\
647 acc+= acc + g[Y2+d128[(i+1)&7]];\
649 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
654 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
655 switch(c->dstFormat) {\
656 case PIX_FMT_RGB48BE:\
657 case PIX_FMT_RGB48LE:\
659 ((uint8_t*)dest)[ 0]= r[Y1];\
660 ((uint8_t*)dest)[ 1]= r[Y1];\
661 ((uint8_t*)dest)[ 2]= g[Y1];\
662 ((uint8_t*)dest)[ 3]= g[Y1];\
663 ((uint8_t*)dest)[ 4]= b[Y1];\
664 ((uint8_t*)dest)[ 5]= b[Y1];\
665 ((uint8_t*)dest)[ 6]= r[Y2];\
666 ((uint8_t*)dest)[ 7]= r[Y2];\
667 ((uint8_t*)dest)[ 8]= g[Y2];\
668 ((uint8_t*)dest)[ 9]= g[Y2];\
669 ((uint8_t*)dest)[10]= b[Y2];\
670 ((uint8_t*)dest)[11]= b[Y2];\
674 case PIX_FMT_BGR48BE:\
675 case PIX_FMT_BGR48LE:\
677 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
678 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
679 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
680 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
681 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
682 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
689 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
690 func(uint32_t,needAlpha)\
691 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
692 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
695 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
697 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
698 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
702 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
703 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
711 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
712 func(uint32_t,needAlpha)\
713 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
714 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
717 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
719 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
720 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
724 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
725 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
732 ((uint8_t*)dest)[0]= r[Y1];\
733 ((uint8_t*)dest)[1]= g[Y1];\
734 ((uint8_t*)dest)[2]= b[Y1];\
735 ((uint8_t*)dest)[3]= r[Y2];\
736 ((uint8_t*)dest)[4]= g[Y2];\
737 ((uint8_t*)dest)[5]= b[Y2];\
743 ((uint8_t*)dest)[0]= b[Y1];\
744 ((uint8_t*)dest)[1]= g[Y1];\
745 ((uint8_t*)dest)[2]= r[Y1];\
746 ((uint8_t*)dest)[3]= b[Y2];\
747 ((uint8_t*)dest)[4]= g[Y2];\
748 ((uint8_t*)dest)[5]= r[Y2];\
752 case PIX_FMT_RGB565BE:\
753 case PIX_FMT_RGB565LE:\
754 case PIX_FMT_BGR565BE:\
755 case PIX_FMT_BGR565LE:\
757 const int dr1= dither_2x2_8[y&1 ][0];\
758 const int dg1= dither_2x2_4[y&1 ][0];\
759 const int db1= dither_2x2_8[(y&1)^1][0];\
760 const int dr2= dither_2x2_8[y&1 ][1];\
761 const int dg2= dither_2x2_4[y&1 ][1];\
762 const int db2= dither_2x2_8[(y&1)^1][1];\
764 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
765 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
769 case PIX_FMT_RGB555BE:\
770 case PIX_FMT_RGB555LE:\
771 case PIX_FMT_BGR555BE:\
772 case PIX_FMT_BGR555LE:\
774 const int dr1= dither_2x2_8[y&1 ][0];\
775 const int dg1= dither_2x2_8[y&1 ][1];\
776 const int db1= dither_2x2_8[(y&1)^1][0];\
777 const int dr2= dither_2x2_8[y&1 ][1];\
778 const int dg2= dither_2x2_8[y&1 ][0];\
779 const int db2= dither_2x2_8[(y&1)^1][1];\
781 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
782 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
786 case PIX_FMT_RGB444BE:\
787 case PIX_FMT_RGB444LE:\
788 case PIX_FMT_BGR444BE:\
789 case PIX_FMT_BGR444LE:\
791 const int dr1= dither_4x4_16[y&3 ][0];\
792 const int dg1= dither_4x4_16[y&3 ][1];\
793 const int db1= dither_4x4_16[(y&3)^3][0];\
794 const int dr2= dither_4x4_16[y&3 ][1];\
795 const int dg2= dither_4x4_16[y&3 ][0];\
796 const int db2= dither_4x4_16[(y&3)^3][1];\
798 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
799 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
806 const uint8_t * const d64= dither_8x8_73[y&7];\
807 const uint8_t * const d32= dither_8x8_32[y&7];\
809 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
810 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
817 const uint8_t * const d64= dither_8x8_73 [y&7];\
818 const uint8_t * const d128=dither_8x8_220[y&7];\
820 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
821 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
825 case PIX_FMT_RGB4_BYTE:\
826 case PIX_FMT_BGR4_BYTE:\
828 const uint8_t * const d64= dither_8x8_73 [y&7];\
829 const uint8_t * const d128=dither_8x8_220[y&7];\
831 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
832 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
836 case PIX_FMT_MONOBLACK:\
837 case PIX_FMT_MONOWHITE:\
842 case PIX_FMT_YUYV422:\
844 ((uint8_t*)dest)[2*i2+0]= Y1;\
845 ((uint8_t*)dest)[2*i2+1]= U;\
846 ((uint8_t*)dest)[2*i2+2]= Y2;\
847 ((uint8_t*)dest)[2*i2+3]= V;\
850 case PIX_FMT_UYVY422:\
852 ((uint8_t*)dest)[2*i2+0]= U;\
853 ((uint8_t*)dest)[2*i2+1]= Y1;\
854 ((uint8_t*)dest)[2*i2+2]= V;\
855 ((uint8_t*)dest)[2*i2+3]= Y2;\
858 case PIX_FMT_GRAY16BE:\
860 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
861 ((uint8_t*)dest)[2*i2+1]= Y1;\
862 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
863 ((uint8_t*)dest)[2*i2+3]= Y2;\
866 case PIX_FMT_GRAY16LE:\
868 ((uint8_t*)dest)[2*i2+0]= Y1;\
869 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
870 ((uint8_t*)dest)[2*i2+2]= Y2;\
871 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
876 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
877 const int16_t **lumSrc, int lumFilterSize,
878 const int16_t *chrFilter, const int16_t **chrUSrc,
879 const int16_t **chrVSrc, int chrFilterSize,
880 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
883 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
886 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
887 const int16_t **lumSrc, int lumFilterSize,
888 const int16_t *chrFilter, const int16_t **chrUSrc,
889 const int16_t **chrVSrc, int chrFilterSize,
890 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
893 int step= c->dstFormatBpp/8;
896 switch(c->dstFormat) {
904 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
905 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
906 dest[aidx]= needAlpha ? A : 255;
913 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
914 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
922 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
939 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
940 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
941 dest[aidx]= needAlpha ? A : 255;
948 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
949 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
957 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
972 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
975 uint8_t *ptr = plane + stride*y;
976 for (i=0; i<height; i++) {
977 memset(ptr, val, width);
982 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
983 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
984 uint8_t *dst, const uint8_t *src, int width, \
988 for (i = 0; i < width; i++) { \
989 int compA = rfunc(&src[i*6+0]) >> 8; \
990 int compB = rfunc(&src[i*6+2]) >> 8; \
991 int compC = rfunc(&src[i*6+4]) >> 8; \
993 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
997 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
998 uint8_t *dstU, uint8_t *dstV, \
999 const uint8_t *src1, const uint8_t *src2, \
1000 int width, uint32_t *unused) \
1003 assert(src1==src2); \
1004 for (i = 0; i < width; i++) { \
1005 int compA = rfunc(&src1[6*i + 0]) >> 8; \
1006 int compB = rfunc(&src1[6*i + 2]) >> 8; \
1007 int compC = rfunc(&src1[6*i + 4]) >> 8; \
1009 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1010 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1014 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
1015 uint8_t *dstU, uint8_t *dstV, \
1016 const uint8_t *src1, const uint8_t *src2, \
1017 int width, uint32_t *unused) \
1020 assert(src1==src2); \
1021 for (i = 0; i < width; i++) { \
1022 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
1023 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
1024 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1026 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1027 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1030 rgb48funcs(LE, AV_RL16, r, g, b);
1031 rgb48funcs(BE, AV_RB16, r, g, b);
1032 rgb48funcs(LE, AV_RL16, b, g, r);
1033 rgb48funcs(BE, AV_RB16, b, g, r);
1035 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1036 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1037 int width, uint32_t *unused)\
1040 for (i=0; i<width; i++) {\
1041 int b= (((const type*)src)[i]>>shb)&maskb;\
1042 int g= (((const type*)src)[i]>>shg)&maskg;\
1043 int r= (((const type*)src)[i]>>shr)&maskr;\
1045 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1049 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1050 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1051 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1052 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1053 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1054 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1055 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1056 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1058 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1061 for (i=0; i<width; i++) {
1066 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1069 for (i=0; i<width; i++) {
1074 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1075 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1076 const uint8_t *src, const uint8_t *dummy, \
1077 int width, uint32_t *unused)\
1080 for (i=0; i<width; i++) {\
1081 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1082 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1083 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1085 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1086 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1089 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1090 const uint8_t *src, const uint8_t *dummy, \
1091 int width, uint32_t *unused)\
1094 for (i=0; i<width; i++) {\
1095 int pix0= ((const type*)src)[2*i+0]>>shp;\
1096 int pix1= ((const type*)src)[2*i+1]>>shp;\
1097 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1098 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1099 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1100 g&= maskg|(2*maskg);\
1104 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1105 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1109 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1110 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1111 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1112 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1113 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1114 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1115 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1116 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1118 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1121 for (i=0; i<width; i++) {
1124 dst[i]= pal[d] & 0xFF;
1128 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1129 const uint8_t *src1, const uint8_t *src2,
1130 int width, uint32_t *pal)
1133 assert(src1 == src2);
1134 for (i=0; i<width; i++) {
1135 int p= pal[src1[i]];
1142 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1143 int width, uint32_t *unused)
1146 for (i=0; i<width/8; i++) {
1149 dst[8*i+j]= ((d>>(7-j))&1)*255;
1153 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1154 int width, uint32_t *unused)
1157 for (i=0; i<width/8; i++) {
1160 dst[8*i+j]= ((d>>(7-j))&1)*255;
1164 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1165 const int16_t *chrUSrc, const int16_t *chrVSrc,
1166 const int16_t *alpSrc,
1167 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1168 uint8_t *aDest, int dstW, int chrDstW)
1171 for (i=0; i<dstW; i++) {
1172 int val= (lumSrc[i]+64)>>7;
1173 dest[i]= av_clip_uint8(val);
1177 for (i=0; i<chrDstW; i++) {
1178 int u=(chrUSrc[i]+64)>>7;
1179 int v=(chrVSrc[i]+64)>>7;
1180 uDest[i]= av_clip_uint8(u);
1181 vDest[i]= av_clip_uint8(v);
1184 if (CONFIG_SWSCALE_ALPHA && aDest)
1185 for (i=0; i<dstW; i++) {
1186 int val= (alpSrc[i]+64)>>7;
1187 aDest[i]= av_clip_uint8(val);
1192 * vertical bilinear scale YV12 to RGB
1194 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1195 const uint16_t *buf1, const uint16_t *ubuf0,
1196 const uint16_t *ubuf1, const uint16_t *vbuf0,
1197 const uint16_t *vbuf1, const uint16_t *abuf0,
1198 const uint16_t *abuf1, uint8_t *dest, int dstW,
1199 int yalpha, int uvalpha, int y)
1201 int yalpha1=4095- yalpha;
1202 int uvalpha1=4095-uvalpha;
1205 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1209 * YV12 to RGB without scaling or interpolating
1211 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1212 const uint16_t *ubuf0, const uint16_t *ubuf1,
1213 const uint16_t *vbuf0, const uint16_t *vbuf1,
1214 const uint16_t *abuf0, uint8_t *dest, int dstW,
1215 int uvalpha, enum PixelFormat dstFormat,
1218 const int yalpha1=0;
1221 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1222 const int yalpha= 4096; //FIXME ...
1224 if (uvalpha < 2048) {
1225 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1227 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1231 //FIXME yuy2* can read up to 7 samples too much
1233 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1237 for (i=0; i<width; i++)
1241 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1242 const uint8_t *src2, int width, uint32_t *unused)
1245 for (i=0; i<width; i++) {
1246 dstU[i]= src1[4*i + 1];
1247 dstV[i]= src1[4*i + 3];
1249 assert(src1 == src2);
1252 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1253 const uint8_t *src2, int width, uint32_t *unused)
1256 for (i=0; i<width; i++) {
1257 dstU[i]= src1[2*i + 1];
1258 dstV[i]= src2[2*i + 1];
1262 /* This is almost identical to the previous, end exists only because
1263 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1264 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1268 for (i=0; i<width; i++)
1272 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1273 const uint8_t *src2, int width, uint32_t *unused)
1276 for (i=0; i<width; i++) {
1277 dstU[i]= src1[4*i + 0];
1278 dstV[i]= src1[4*i + 2];
1280 assert(src1 == src2);
1283 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1284 const uint8_t *src2, int width, uint32_t *unused)
1287 for (i=0; i<width; i++) {
1293 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1294 const uint8_t *src, int width)
1297 for (i = 0; i < width; i++) {
1298 dst1[i] = src[2*i+0];
1299 dst2[i] = src[2*i+1];
1303 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1304 const uint8_t *src1, const uint8_t *src2,
1305 int width, uint32_t *unused)
1307 nvXXtoUV_c(dstU, dstV, src1, width);
1310 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1311 const uint8_t *src1, const uint8_t *src2,
1312 int width, uint32_t *unused)
1314 nvXXtoUV_c(dstV, dstU, src1, width);
1317 // FIXME Maybe dither instead.
1318 #define YUV_NBPS(depth, endianness, rfunc) \
1319 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1320 const uint8_t *_srcU, const uint8_t *_srcV, \
1321 int width, uint32_t *unused) \
1324 const uint16_t *srcU = (const uint16_t*)_srcU; \
1325 const uint16_t *srcV = (const uint16_t*)_srcV; \
1326 for (i = 0; i < width; i++) { \
1327 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1328 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1332 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1333 int width, uint32_t *unused) \
1336 const uint16_t *srcY = (const uint16_t*)_srcY; \
1337 for (i = 0; i < width; i++) \
1338 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1341 YUV_NBPS( 9, LE, AV_RL16)
1342 YUV_NBPS( 9, BE, AV_RB16)
1343 YUV_NBPS(10, LE, AV_RL16)
1344 YUV_NBPS(10, BE, AV_RB16)
1346 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1347 int width, uint32_t *unused)
1350 for (i=0; i<width; i++) {
1355 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1359 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1360 const uint8_t *src2, int width, uint32_t *unused)
1363 for (i=0; i<width; i++) {
1364 int b= src1[3*i + 0];
1365 int g= src1[3*i + 1];
1366 int r= src1[3*i + 2];
1368 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1369 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1371 assert(src1 == src2);
1374 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1375 const uint8_t *src2, int width, uint32_t *unused)
1378 for (i=0; i<width; i++) {
1379 int b= src1[6*i + 0] + src1[6*i + 3];
1380 int g= src1[6*i + 1] + src1[6*i + 4];
1381 int r= src1[6*i + 2] + src1[6*i + 5];
1383 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1384 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1386 assert(src1 == src2);
1389 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1393 for (i=0; i<width; i++) {
1398 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1402 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1403 const uint8_t *src2, int width, uint32_t *unused)
1407 for (i=0; i<width; i++) {
1408 int r= src1[3*i + 0];
1409 int g= src1[3*i + 1];
1410 int b= src1[3*i + 2];
1412 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1413 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1417 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1418 const uint8_t *src2, int width, uint32_t *unused)
1422 for (i=0; i<width; i++) {
1423 int r= src1[6*i + 0] + src1[6*i + 3];
1424 int g= src1[6*i + 1] + src1[6*i + 4];
1425 int b= src1[6*i + 2] + src1[6*i + 5];
1427 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1428 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1433 // bilinear / bicubic scaling
1434 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1436 const int16_t *filter, const int16_t *filterPos,
1440 for (i=0; i<dstW; i++) {
1442 int srcPos= filterPos[i];
1444 for (j=0; j<filterSize; j++) {
1445 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1447 //filter += hFilterSize;
1448 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1453 //FIXME all pal and rgb srcFormats could do this convertion as well
1454 //FIXME all scalers more complex than bilinear could do half of this transform
1455 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1458 for (i = 0; i < width; i++) {
1459 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1460 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1463 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1466 for (i = 0; i < width; i++) {
1467 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1468 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1471 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1474 for (i = 0; i < width; i++)
1475 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1477 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1480 for (i = 0; i < width; i++)
1481 dst[i] = (dst[i]*14071 + 33561947)>>14;
1484 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1485 const uint8_t *src, int srcW, int xInc)
1488 unsigned int xpos=0;
1489 for (i=0;i<dstWidth;i++) {
1490 register unsigned int xx=xpos>>16;
1491 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1492 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1497 // *** horizontal scale Y line to temp buffer
1498 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1499 const uint8_t *src, int srcW, int xInc,
1500 const int16_t *hLumFilter,
1501 const int16_t *hLumFilterPos, int hLumFilterSize,
1502 uint8_t *formatConvBuffer,
1503 uint32_t *pal, int isAlpha)
1505 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1506 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1509 toYV12(formatConvBuffer, src, srcW, pal);
1510 src= formatConvBuffer;
1513 if (!c->hyscale_fast) {
1514 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1515 } else { // fast bilinear upscale / crap downscale
1516 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1520 convertRange(dst, dstWidth);
1523 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1524 int dstWidth, const uint8_t *src1,
1525 const uint8_t *src2, int srcW, int xInc)
1528 unsigned int xpos=0;
1529 for (i=0;i<dstWidth;i++) {
1530 register unsigned int xx=xpos>>16;
1531 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1532 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1533 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1538 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1539 const uint8_t *src1, const uint8_t *src2,
1540 int srcW, int xInc, const int16_t *hChrFilter,
1541 const int16_t *hChrFilterPos, int hChrFilterSize,
1542 uint8_t *formatConvBuffer, uint32_t *pal)
1545 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1546 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1547 src1= formatConvBuffer;
1551 if (!c->hcscale_fast) {
1552 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1553 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1554 } else { // fast bilinear upscale / crap downscale
1555 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1558 if (c->chrConvertRange)
1559 c->chrConvertRange(dst1, dst2, dstWidth);
1562 #define DEBUG_SWSCALE_BUFFERS 0
1563 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1565 static int swScale(SwsContext *c, const uint8_t* src[],
1566 int srcStride[], int srcSliceY,
1567 int srcSliceH, uint8_t* dst[], int dstStride[])
1569 /* load a few things into local vars to make the code more readable? and faster */
1570 const int srcW= c->srcW;
1571 const int dstW= c->dstW;
1572 const int dstH= c->dstH;
1573 const int chrDstW= c->chrDstW;
1574 const int chrSrcW= c->chrSrcW;
1575 const int lumXInc= c->lumXInc;
1576 const int chrXInc= c->chrXInc;
1577 const enum PixelFormat dstFormat= c->dstFormat;
1578 const int flags= c->flags;
1579 int16_t *vLumFilterPos= c->vLumFilterPos;
1580 int16_t *vChrFilterPos= c->vChrFilterPos;
1581 int16_t *hLumFilterPos= c->hLumFilterPos;
1582 int16_t *hChrFilterPos= c->hChrFilterPos;
1583 int16_t *vLumFilter= c->vLumFilter;
1584 int16_t *vChrFilter= c->vChrFilter;
1585 int16_t *hLumFilter= c->hLumFilter;
1586 int16_t *hChrFilter= c->hChrFilter;
1587 int32_t *lumMmxFilter= c->lumMmxFilter;
1588 int32_t *chrMmxFilter= c->chrMmxFilter;
1589 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1590 const int vLumFilterSize= c->vLumFilterSize;
1591 const int vChrFilterSize= c->vChrFilterSize;
1592 const int hLumFilterSize= c->hLumFilterSize;
1593 const int hChrFilterSize= c->hChrFilterSize;
1594 int16_t **lumPixBuf= c->lumPixBuf;
1595 int16_t **chrUPixBuf= c->chrUPixBuf;
1596 int16_t **chrVPixBuf= c->chrVPixBuf;
1597 int16_t **alpPixBuf= c->alpPixBuf;
1598 const int vLumBufSize= c->vLumBufSize;
1599 const int vChrBufSize= c->vChrBufSize;
1600 uint8_t *formatConvBuffer= c->formatConvBuffer;
1601 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1602 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1604 uint32_t *pal=c->pal_yuv;
1606 /* vars which will change and which we need to store back in the context */
1608 int lumBufIndex= c->lumBufIndex;
1609 int chrBufIndex= c->chrBufIndex;
1610 int lastInLumBuf= c->lastInLumBuf;
1611 int lastInChrBuf= c->lastInChrBuf;
1613 if (isPacked(c->srcFormat)) {
1621 srcStride[3]= srcStride[0];
1623 srcStride[1]<<= c->vChrDrop;
1624 srcStride[2]<<= c->vChrDrop;
1626 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1627 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1628 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1629 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1630 srcSliceY, srcSliceH, dstY, dstH);
1631 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1632 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1634 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1635 static int warnedAlready=0; //FIXME move this into the context perhaps
1636 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1637 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1638 " ->cannot do aligned memory accesses anymore\n");
1643 /* Note the user might start scaling the picture in the middle so this
1644 will not get executed. This is not really intended but works
1645 currently, so people might do it. */
1646 if (srcSliceY ==0) {
1656 for (;dstY < dstH; dstY++) {
1657 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1658 const int chrDstY= dstY>>c->chrDstVSubSample;
1659 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1660 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1661 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1663 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1664 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1665 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1666 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1667 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1668 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1671 //handle holes (FAST_BILINEAR & weird filters)
1672 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1673 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1674 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1675 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1677 DEBUG_BUFFERS("dstY: %d\n", dstY);
1678 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1679 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1680 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1681 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1683 // Do we have enough lines in this slice to output the dstY line
1684 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1686 if (!enough_lines) {
1687 lastLumSrcY = srcSliceY + srcSliceH - 1;
1688 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1689 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1690 lastLumSrcY, lastChrSrcY);
1693 //Do horizontal scaling
1694 while(lastInLumBuf < lastLumSrcY) {
1695 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1696 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1698 assert(lumBufIndex < 2*vLumBufSize);
1699 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1700 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1701 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1702 hLumFilter, hLumFilterPos, hLumFilterSize,
1705 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1706 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1707 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1711 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1712 lumBufIndex, lastInLumBuf);
1714 while(lastInChrBuf < lastChrSrcY) {
1715 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1716 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1718 assert(chrBufIndex < 2*vChrBufSize);
1719 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1720 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1721 //FIXME replace parameters through context struct (some at least)
1723 if (c->needs_hcscale)
1724 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1725 chrDstW, src1, src2, chrSrcW, chrXInc,
1726 hChrFilter, hChrFilterPos, hChrFilterSize,
1727 formatConvBuffer, pal);
1729 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1730 chrBufIndex, lastInChrBuf);
1732 //wrap buf index around to stay inside the ring buffer
1733 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1734 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1736 break; //we can't output a dstY line so let's try with the next slice
1739 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1741 if (dstY < dstH-2) {
1742 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1743 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1744 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1745 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1746 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1747 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1748 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1750 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1751 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1752 dest, uDest, dstW, chrDstW, dstFormat);
1753 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1754 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1755 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1756 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1757 const int16_t *lumBuf = lumSrcPtr[0];
1758 const int16_t *chrUBuf= chrUSrcPtr[0];
1759 const int16_t *chrVBuf= chrVSrcPtr[0];
1760 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1761 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1762 uDest, vDest, aDest, dstW, chrDstW);
1763 } else { //General YV12
1765 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1766 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1767 chrVSrcPtr, vChrFilterSize,
1768 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1771 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1772 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1773 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1774 int chrAlpha= vChrFilter[2*dstY+1];
1775 if(flags & SWS_FULL_CHR_H_INT) {
1776 yuv2rgbX_c_full(c, //FIXME write a packed1_full function
1777 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1778 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
1779 chrVSrcPtr, vChrFilterSize,
1780 alpSrcPtr, dest, dstW, dstY);
1782 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1783 *chrVSrcPtr, *(chrVSrcPtr+1),
1784 alpPixBuf ? *alpSrcPtr : NULL,
1785 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1787 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1788 int lumAlpha= vLumFilter[2*dstY+1];
1789 int chrAlpha= vChrFilter[2*dstY+1];
1791 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1793 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1794 if(flags & SWS_FULL_CHR_H_INT) {
1795 yuv2rgbX_c_full(c, //FIXME write a packed2_full function
1796 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1797 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1798 alpSrcPtr, dest, dstW, dstY);
1800 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1801 *chrVSrcPtr, *(chrVSrcPtr+1),
1802 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1803 dest, dstW, lumAlpha, chrAlpha, dstY);
1805 } else { //general RGB
1806 if(flags & SWS_FULL_CHR_H_INT) {
1808 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1809 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1810 alpSrcPtr, dest, dstW, dstY);
1813 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1814 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1815 alpSrcPtr, dest, dstW, dstY);
1819 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
1820 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1821 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1822 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1823 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1824 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1825 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1826 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
1827 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
1828 lumSrcPtr, vLumFilterSize,
1829 vChrFilter+chrDstY*vChrFilterSize,
1830 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1831 dest, uDest, dstW, chrDstW, dstFormat);
1832 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
1833 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1834 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1835 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
1836 yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1837 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1838 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
1841 yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
1842 lumSrcPtr, vLumFilterSize,
1843 vChrFilter+chrDstY*vChrFilterSize,
1844 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1845 alpSrcPtr, dest, uDest, vDest, aDest,
1849 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1850 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1851 if(flags & SWS_FULL_CHR_H_INT) {
1853 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1854 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1855 alpSrcPtr, dest, dstW, dstY);
1858 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1859 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1860 alpSrcPtr, dest, dstW, dstY);
1866 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1867 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1870 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1871 __asm__ volatile("sfence":::"memory");
1875 /* store changed local vars back in the context */
1877 c->lumBufIndex= lumBufIndex;
1878 c->chrBufIndex= chrBufIndex;
1879 c->lastInLumBuf= lastInLumBuf;
1880 c->lastInChrBuf= lastInChrBuf;
1882 return dstY - lastDstY;
1885 static void sws_init_swScale_c(SwsContext *c)
1887 enum PixelFormat srcFormat = c->srcFormat,
1888 dstFormat = c->dstFormat;
1890 c->yuv2nv12X = yuv2nv12X_c;
1891 if (is16BPS(dstFormat)) {
1892 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1893 } else if (is9_OR_10BPS(dstFormat)) {
1894 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1895 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1897 c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1900 c->yuv2yuv1 = yuv2yuv1_c;
1901 c->yuv2yuvX = yuv2yuvX_c;
1903 c->yuv2packed1 = yuv2packed1_c;
1904 c->yuv2packed2 = yuv2packed2_c;
1905 c->yuv2packedX = yuv2packedX_c;
1907 c->hScale = hScale_c;
1909 if (c->flags & SWS_FAST_BILINEAR) {
1910 c->hyscale_fast = hyscale_fast_c;
1911 c->hcscale_fast = hcscale_fast_c;
1914 c->chrToYV12 = NULL;
1916 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1917 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1918 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1919 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1923 case PIX_FMT_BGR4_BYTE:
1924 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1925 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1926 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1927 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1928 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1929 case PIX_FMT_YUV420P16BE:
1930 case PIX_FMT_YUV422P16BE:
1931 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1932 case PIX_FMT_YUV420P16LE:
1933 case PIX_FMT_YUV422P16LE:
1934 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1936 if (c->chrSrcHSubSample) {
1938 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1939 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1940 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1941 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1942 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1943 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1944 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1945 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1946 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1947 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1948 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1949 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1950 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1951 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1955 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1956 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1957 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1958 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1959 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1960 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1961 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1962 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1963 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1964 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1965 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1966 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1967 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1968 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1972 c->lumToYV12 = NULL;
1973 c->alpToYV12 = NULL;
1974 switch (srcFormat) {
1975 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1976 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1977 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1978 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1979 case PIX_FMT_YUYV422 :
1980 case PIX_FMT_YUV420P16BE:
1981 case PIX_FMT_YUV422P16BE:
1982 case PIX_FMT_YUV444P16BE:
1983 case PIX_FMT_Y400A :
1984 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1985 case PIX_FMT_UYVY422 :
1986 case PIX_FMT_YUV420P16LE:
1987 case PIX_FMT_YUV422P16LE:
1988 case PIX_FMT_YUV444P16LE:
1989 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1990 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1991 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1992 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1993 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1994 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1995 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1999 case PIX_FMT_BGR4_BYTE:
2000 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2001 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2002 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2003 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2004 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2005 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2006 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2007 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2008 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2009 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2010 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2013 switch (srcFormat) {
2015 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2017 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2018 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2022 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2024 c->lumConvertRange = lumRangeFromJpeg_c;
2025 c->chrConvertRange = chrRangeFromJpeg_c;
2027 c->lumConvertRange = lumRangeToJpeg_c;
2028 c->chrConvertRange = chrRangeToJpeg_c;
2032 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2033 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2034 c->needs_hcscale = 1;
2037 SwsFunc ff_getSwsFunc(SwsContext *c)
2039 sws_init_swScale_c(c);
2042 ff_sws_init_swScale_mmx(c);
2044 ff_sws_init_swScale_altivec(c);