2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
333 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
334 const int16_t **lumSrc, int lumFilterSize,
335 const int16_t *chrFilter, const int16_t **chrUSrc,
336 const int16_t **chrVSrc, int chrFilterSize,
337 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
338 uint8_t *vDest, uint8_t *aDest,
339 int dstW, int chrDstW)
341 enum PixelFormat dstFormat = c->dstFormat;
343 //FIXME Optimize (just quickly written not optimized..)
345 for (i=0; i<dstW; i++) {
348 for (j=0; j<lumFilterSize; j++)
349 val += lumSrc[j][i] * lumFilter[j];
351 dest[i]= av_clip_uint8(val>>19);
357 if (dstFormat == PIX_FMT_NV12)
358 for (i=0; i<chrDstW; i++) {
362 for (j=0; j<chrFilterSize; j++) {
363 u += chrUSrc[j][i] * chrFilter[j];
364 v += chrVSrc[j][i] * chrFilter[j];
367 uDest[2*i]= av_clip_uint8(u>>19);
368 uDest[2*i+1]= av_clip_uint8(v>>19);
371 for (i=0; i<chrDstW; i++) {
375 for (j=0; j<chrFilterSize; j++) {
376 u += chrUSrc[j][i] * chrFilter[j];
377 v += chrVSrc[j][i] * chrFilter[j];
380 uDest[2*i]= av_clip_uint8(v>>19);
381 uDest[2*i+1]= av_clip_uint8(u>>19);
385 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
386 for (i=0; i<(dstW>>1); i++) {\
392 int av_unused A1, A2;\
393 type av_unused *r, *b, *g;\
396 for (j=0; j<lumFilterSize; j++) {\
397 Y1 += lumSrc[j][i2] * lumFilter[j];\
398 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
400 for (j=0; j<chrFilterSize; j++) {\
401 U += chrUSrc[j][i] * chrFilter[j];\
402 V += chrVSrc[j][i] * chrFilter[j];\
411 for (j=0; j<lumFilterSize; j++) {\
412 A1 += alpSrc[j][i2 ] * lumFilter[j];\
413 A2 += alpSrc[j][i2+1] * lumFilter[j];\
419 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
420 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
421 if ((Y1|Y2|U|V)&256) {\
422 if (Y1>255) Y1=255; \
423 else if (Y1<0)Y1=0; \
424 if (Y2>255) Y2=255; \
425 else if (Y2<0)Y2=0; \
431 if (alpha && ((A1|A2)&256)) {\
432 A1=av_clip_uint8(A1);\
433 A2=av_clip_uint8(A2);\
436 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
437 for (i=0; i<dstW; i++) {\
445 for (j=0; j<lumFilterSize; j++) {\
446 Y += lumSrc[j][i ] * lumFilter[j];\
448 for (j=0; j<chrFilterSize; j++) {\
449 U += chrUSrc[j][i] * chrFilter[j];\
450 V += chrVSrc[j][i] * chrFilter[j];\
457 for (j=0; j<lumFilterSize; j++)\
458 A += alpSrc[j][i ] * lumFilter[j];\
461 A = av_clip_uint8(A);\
464 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
465 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
466 Y-= c->yuv2rgb_y_offset;\
467 Y*= c->yuv2rgb_y_coeff;\
469 R= Y + V*c->yuv2rgb_v2r_coeff;\
470 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
471 B= Y + U*c->yuv2rgb_u2b_coeff;\
472 if ((R|G|B)&(0xC0000000)) {\
473 if (R>=(256<<22)) R=(256<<22)-1; \
475 if (G>=(256<<22)) G=(256<<22)-1; \
477 if (B>=(256<<22)) B=(256<<22)-1; \
481 #define YSCALE_YUV_2_GRAY16_C \
482 for (i=0; i<(dstW>>1); i++) {\
491 for (j=0; j<lumFilterSize; j++) {\
492 Y1 += lumSrc[j][i2] * lumFilter[j];\
493 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
497 if ((Y1|Y2|U|V)&65536) {\
498 if (Y1>65535) Y1=65535; \
499 else if (Y1<0)Y1=0; \
500 if (Y2>65535) Y2=65535; \
501 else if (Y2<0)Y2=0; \
504 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
505 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
506 r = (type *)c->table_rV[V]; \
507 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
508 b = (type *)c->table_bU[U];
510 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
511 for (i=0; i<(dstW>>1); i++) { \
513 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
514 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
515 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
516 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
517 type av_unused *r, *b, *g; \
518 int av_unused A1, A2; \
520 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
521 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
524 #define YSCALE_YUV_2_GRAY16_2_C \
525 for (i=0; i<(dstW>>1); i++) { \
527 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
528 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
530 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
531 YSCALE_YUV_2_PACKED2_C(type,alpha)\
532 r = (type *)c->table_rV[V];\
533 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
534 b = (type *)c->table_bU[U];
536 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
537 for (i=0; i<(dstW>>1); i++) {\
539 int Y1= buf0[i2 ]>>7;\
540 int Y2= buf0[i2+1]>>7;\
541 int U= (ubuf1[i])>>7;\
542 int V= (vbuf1[i])>>7;\
543 type av_unused *r, *b, *g;\
544 int av_unused A1, A2;\
550 #define YSCALE_YUV_2_GRAY16_1_C \
551 for (i=0; i<(dstW>>1); i++) {\
553 int Y1= buf0[i2 ]<<1;\
554 int Y2= buf0[i2+1]<<1;
556 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
557 YSCALE_YUV_2_PACKED1_C(type,alpha)\
558 r = (type *)c->table_rV[V];\
559 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
560 b = (type *)c->table_bU[U];
562 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
563 for (i=0; i<(dstW>>1); i++) {\
565 int Y1= buf0[i2 ]>>7;\
566 int Y2= buf0[i2+1]>>7;\
567 int U= (ubuf0[i] + ubuf1[i])>>8;\
568 int V= (vbuf0[i] + vbuf1[i])>>8;\
569 type av_unused *r, *b, *g;\
570 int av_unused A1, A2;\
576 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
577 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
578 r = (type *)c->table_rV[V];\
579 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
580 b = (type *)c->table_bU[U];
582 #define YSCALE_YUV_2_MONO2_C \
583 const uint8_t * const d128=dither_8x8_220[y&7];\
584 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
585 for (i=0; i<dstW-7; i+=8) {\
587 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
588 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
589 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
590 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
591 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
592 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
593 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
594 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
595 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
599 #define YSCALE_YUV_2_MONOX_C \
600 const uint8_t * const d128=dither_8x8_220[y&7];\
601 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
603 for (i=0; i<dstW-1; i+=2) {\
608 for (j=0; j<lumFilterSize; j++) {\
609 Y1 += lumSrc[j][i] * lumFilter[j];\
610 Y2 += lumSrc[j][i+1] * lumFilter[j];\
620 acc+= acc + g[Y1+d128[(i+0)&7]];\
621 acc+= acc + g[Y2+d128[(i+1)&7]];\
623 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
628 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
629 switch(c->dstFormat) {\
630 case PIX_FMT_RGB48BE:\
631 case PIX_FMT_RGB48LE:\
633 ((uint8_t*)dest)[ 0]= r[Y1];\
634 ((uint8_t*)dest)[ 1]= r[Y1];\
635 ((uint8_t*)dest)[ 2]= g[Y1];\
636 ((uint8_t*)dest)[ 3]= g[Y1];\
637 ((uint8_t*)dest)[ 4]= b[Y1];\
638 ((uint8_t*)dest)[ 5]= b[Y1];\
639 ((uint8_t*)dest)[ 6]= r[Y2];\
640 ((uint8_t*)dest)[ 7]= r[Y2];\
641 ((uint8_t*)dest)[ 8]= g[Y2];\
642 ((uint8_t*)dest)[ 9]= g[Y2];\
643 ((uint8_t*)dest)[10]= b[Y2];\
644 ((uint8_t*)dest)[11]= b[Y2];\
648 case PIX_FMT_BGR48BE:\
649 case PIX_FMT_BGR48LE:\
651 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
652 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
653 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
654 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
655 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
656 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
663 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
664 func(uint32_t,needAlpha)\
665 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
666 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
669 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
671 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
672 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
676 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
677 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
685 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
686 func(uint32_t,needAlpha)\
687 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
688 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
691 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
693 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
694 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
698 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
699 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
706 ((uint8_t*)dest)[0]= r[Y1];\
707 ((uint8_t*)dest)[1]= g[Y1];\
708 ((uint8_t*)dest)[2]= b[Y1];\
709 ((uint8_t*)dest)[3]= r[Y2];\
710 ((uint8_t*)dest)[4]= g[Y2];\
711 ((uint8_t*)dest)[5]= b[Y2];\
717 ((uint8_t*)dest)[0]= b[Y1];\
718 ((uint8_t*)dest)[1]= g[Y1];\
719 ((uint8_t*)dest)[2]= r[Y1];\
720 ((uint8_t*)dest)[3]= b[Y2];\
721 ((uint8_t*)dest)[4]= g[Y2];\
722 ((uint8_t*)dest)[5]= r[Y2];\
726 case PIX_FMT_RGB565BE:\
727 case PIX_FMT_RGB565LE:\
728 case PIX_FMT_BGR565BE:\
729 case PIX_FMT_BGR565LE:\
731 const int dr1= dither_2x2_8[y&1 ][0];\
732 const int dg1= dither_2x2_4[y&1 ][0];\
733 const int db1= dither_2x2_8[(y&1)^1][0];\
734 const int dr2= dither_2x2_8[y&1 ][1];\
735 const int dg2= dither_2x2_4[y&1 ][1];\
736 const int db2= dither_2x2_8[(y&1)^1][1];\
738 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
739 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
743 case PIX_FMT_RGB555BE:\
744 case PIX_FMT_RGB555LE:\
745 case PIX_FMT_BGR555BE:\
746 case PIX_FMT_BGR555LE:\
748 const int dr1= dither_2x2_8[y&1 ][0];\
749 const int dg1= dither_2x2_8[y&1 ][1];\
750 const int db1= dither_2x2_8[(y&1)^1][0];\
751 const int dr2= dither_2x2_8[y&1 ][1];\
752 const int dg2= dither_2x2_8[y&1 ][0];\
753 const int db2= dither_2x2_8[(y&1)^1][1];\
755 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
756 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
760 case PIX_FMT_RGB444BE:\
761 case PIX_FMT_RGB444LE:\
762 case PIX_FMT_BGR444BE:\
763 case PIX_FMT_BGR444LE:\
765 const int dr1= dither_4x4_16[y&3 ][0];\
766 const int dg1= dither_4x4_16[y&3 ][1];\
767 const int db1= dither_4x4_16[(y&3)^3][0];\
768 const int dr2= dither_4x4_16[y&3 ][1];\
769 const int dg2= dither_4x4_16[y&3 ][0];\
770 const int db2= dither_4x4_16[(y&3)^3][1];\
772 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
773 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
780 const uint8_t * const d64= dither_8x8_73[y&7];\
781 const uint8_t * const d32= dither_8x8_32[y&7];\
783 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
784 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
791 const uint8_t * const d64= dither_8x8_73 [y&7];\
792 const uint8_t * const d128=dither_8x8_220[y&7];\
794 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
795 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
799 case PIX_FMT_RGB4_BYTE:\
800 case PIX_FMT_BGR4_BYTE:\
802 const uint8_t * const d64= dither_8x8_73 [y&7];\
803 const uint8_t * const d128=dither_8x8_220[y&7];\
805 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
806 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
810 case PIX_FMT_MONOBLACK:\
811 case PIX_FMT_MONOWHITE:\
816 case PIX_FMT_YUYV422:\
818 ((uint8_t*)dest)[2*i2+0]= Y1;\
819 ((uint8_t*)dest)[2*i2+1]= U;\
820 ((uint8_t*)dest)[2*i2+2]= Y2;\
821 ((uint8_t*)dest)[2*i2+3]= V;\
824 case PIX_FMT_UYVY422:\
826 ((uint8_t*)dest)[2*i2+0]= U;\
827 ((uint8_t*)dest)[2*i2+1]= Y1;\
828 ((uint8_t*)dest)[2*i2+2]= V;\
829 ((uint8_t*)dest)[2*i2+3]= Y2;\
832 case PIX_FMT_GRAY16BE:\
834 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
835 ((uint8_t*)dest)[2*i2+1]= Y1;\
836 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
837 ((uint8_t*)dest)[2*i2+3]= Y2;\
840 case PIX_FMT_GRAY16LE:\
842 ((uint8_t*)dest)[2*i2+0]= Y1;\
843 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
844 ((uint8_t*)dest)[2*i2+2]= Y2;\
845 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
850 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
851 const int16_t **lumSrc, int lumFilterSize,
852 const int16_t *chrFilter, const int16_t **chrUSrc,
853 const int16_t **chrVSrc, int chrFilterSize,
854 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
857 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
860 static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
861 const int16_t **lumSrc, int lumFilterSize,
862 const int16_t *chrFilter, const int16_t **chrUSrc,
863 const int16_t **chrVSrc, int chrFilterSize,
864 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
867 int step= c->dstFormatBpp/8;
870 switch(c->dstFormat) {
878 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
879 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
880 dest[aidx]= needAlpha ? A : 255;
887 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
888 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
896 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
913 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
914 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
915 dest[aidx]= needAlpha ? A : 255;
922 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
923 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
931 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
946 static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
949 uint8_t *ptr = plane + stride*y;
950 for (i=0; i<height; i++) {
951 memset(ptr, val, width);
956 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
957 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
958 uint8_t *dst, const uint8_t *src, int width, \
962 for (i = 0; i < width; i++) { \
963 int compA = rfunc(&src[i*6+0]) >> 8; \
964 int compB = rfunc(&src[i*6+2]) >> 8; \
965 int compC = rfunc(&src[i*6+4]) >> 8; \
967 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
971 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
972 uint8_t *dstU, uint8_t *dstV, \
973 const uint8_t *src1, const uint8_t *src2, \
974 int width, uint32_t *unused) \
977 assert(src1==src2); \
978 for (i = 0; i < width; i++) { \
979 int compA = rfunc(&src1[6*i + 0]) >> 8; \
980 int compB = rfunc(&src1[6*i + 2]) >> 8; \
981 int compC = rfunc(&src1[6*i + 4]) >> 8; \
983 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
984 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
988 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
989 uint8_t *dstU, uint8_t *dstV, \
990 const uint8_t *src1, const uint8_t *src2, \
991 int width, uint32_t *unused) \
994 assert(src1==src2); \
995 for (i = 0; i < width; i++) { \
996 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
997 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
998 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1000 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1001 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1004 rgb48funcs(LE, AV_RL16, r, g, b);
1005 rgb48funcs(BE, AV_RB16, r, g, b);
1006 rgb48funcs(LE, AV_RL16, b, g, r);
1007 rgb48funcs(BE, AV_RB16, b, g, r);
1009 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1010 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1011 int width, uint32_t *unused)\
1014 for (i=0; i<width; i++) {\
1015 int b= (((const type*)src)[i]>>shb)&maskb;\
1016 int g= (((const type*)src)[i]>>shg)&maskg;\
1017 int r= (((const type*)src)[i]>>shr)&maskr;\
1019 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1023 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1024 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1025 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1026 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1027 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1028 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1029 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1030 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1032 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1035 for (i=0; i<width; i++) {
1040 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1043 for (i=0; i<width; i++) {
1048 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1049 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1050 const uint8_t *src, const uint8_t *dummy, \
1051 int width, uint32_t *unused)\
1054 for (i=0; i<width; i++) {\
1055 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1056 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1057 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1059 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1060 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1063 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1064 const uint8_t *src, const uint8_t *dummy, \
1065 int width, uint32_t *unused)\
1068 for (i=0; i<width; i++) {\
1069 int pix0= ((const type*)src)[2*i+0]>>shp;\
1070 int pix1= ((const type*)src)[2*i+1]>>shp;\
1071 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1072 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1073 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1074 g&= maskg|(2*maskg);\
1078 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1079 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1083 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1084 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1085 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1086 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1087 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1088 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1089 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1090 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1092 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1095 for (i=0; i<width; i++) {
1098 dst[i]= pal[d] & 0xFF;
1102 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1103 const uint8_t *src1, const uint8_t *src2,
1104 int width, uint32_t *pal)
1107 assert(src1 == src2);
1108 for (i=0; i<width; i++) {
1109 int p= pal[src1[i]];
1116 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1117 int width, uint32_t *unused)
1120 for (i=0; i<width/8; i++) {
1123 dst[8*i+j]= ((d>>(7-j))&1)*255;
1127 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1128 int width, uint32_t *unused)
1131 for (i=0; i<width/8; i++) {
1134 dst[8*i+j]= ((d>>(7-j))&1)*255;
1138 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1139 const int16_t *chrUSrc, const int16_t *chrVSrc,
1140 const int16_t *alpSrc,
1141 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1142 uint8_t *aDest, int dstW, int chrDstW)
1145 for (i=0; i<dstW; i++) {
1146 int val= (lumSrc[i]+64)>>7;
1147 dest[i]= av_clip_uint8(val);
1151 for (i=0; i<chrDstW; i++) {
1152 int u=(chrUSrc[i]+64)>>7;
1153 int v=(chrVSrc[i]+64)>>7;
1154 uDest[i]= av_clip_uint8(u);
1155 vDest[i]= av_clip_uint8(v);
1158 if (CONFIG_SWSCALE_ALPHA && aDest)
1159 for (i=0; i<dstW; i++) {
1160 int val= (alpSrc[i]+64)>>7;
1161 aDest[i]= av_clip_uint8(val);
1166 * vertical bilinear scale YV12 to RGB
1168 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1169 const uint16_t *buf1, const uint16_t *ubuf0,
1170 const uint16_t *ubuf1, const uint16_t *vbuf0,
1171 const uint16_t *vbuf1, const uint16_t *abuf0,
1172 const uint16_t *abuf1, uint8_t *dest, int dstW,
1173 int yalpha, int uvalpha, int y)
1175 int yalpha1=4095- yalpha;
1176 int uvalpha1=4095-uvalpha;
1179 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1183 * YV12 to RGB without scaling or interpolating
1185 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1186 const uint16_t *ubuf0, const uint16_t *ubuf1,
1187 const uint16_t *vbuf0, const uint16_t *vbuf1,
1188 const uint16_t *abuf0, uint8_t *dest, int dstW,
1189 int uvalpha, enum PixelFormat dstFormat,
1192 const int yalpha1=0;
1195 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1196 const int yalpha= 4096; //FIXME ...
1198 if (uvalpha < 2048) {
1199 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1201 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1205 //FIXME yuy2* can read up to 7 samples too much
1207 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1211 for (i=0; i<width; i++)
1215 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1216 const uint8_t *src2, int width, uint32_t *unused)
1219 for (i=0; i<width; i++) {
1220 dstU[i]= src1[4*i + 1];
1221 dstV[i]= src1[4*i + 3];
1223 assert(src1 == src2);
1226 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1227 const uint8_t *src2, int width, uint32_t *unused)
1230 for (i=0; i<width; i++) {
1231 dstU[i]= src1[2*i + 1];
1232 dstV[i]= src2[2*i + 1];
1236 /* This is almost identical to the previous, end exists only because
1237 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1238 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1242 for (i=0; i<width; i++)
1246 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1247 const uint8_t *src2, int width, uint32_t *unused)
1250 for (i=0; i<width; i++) {
1251 dstU[i]= src1[4*i + 0];
1252 dstV[i]= src1[4*i + 2];
1254 assert(src1 == src2);
1257 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1258 const uint8_t *src2, int width, uint32_t *unused)
1261 for (i=0; i<width; i++) {
1267 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1268 const uint8_t *src, int width)
1271 for (i = 0; i < width; i++) {
1272 dst1[i] = src[2*i+0];
1273 dst2[i] = src[2*i+1];
1277 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1278 const uint8_t *src1, const uint8_t *src2,
1279 int width, uint32_t *unused)
1281 nvXXtoUV_c(dstU, dstV, src1, width);
1284 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1285 const uint8_t *src1, const uint8_t *src2,
1286 int width, uint32_t *unused)
1288 nvXXtoUV_c(dstV, dstU, src1, width);
1291 // FIXME Maybe dither instead.
1292 #define YUV_NBPS(depth, endianness, rfunc) \
1293 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1294 const uint8_t *_srcU, const uint8_t *_srcV, \
1295 int width, uint32_t *unused) \
1298 const uint16_t *srcU = (const uint16_t*)_srcU; \
1299 const uint16_t *srcV = (const uint16_t*)_srcV; \
1300 for (i = 0; i < width; i++) { \
1301 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1302 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1306 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1307 int width, uint32_t *unused) \
1310 const uint16_t *srcY = (const uint16_t*)_srcY; \
1311 for (i = 0; i < width; i++) \
1312 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1315 YUV_NBPS( 9, LE, AV_RL16)
1316 YUV_NBPS( 9, BE, AV_RB16)
1317 YUV_NBPS(10, LE, AV_RL16)
1318 YUV_NBPS(10, BE, AV_RB16)
1320 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1321 int width, uint32_t *unused)
1324 for (i=0; i<width; i++) {
1329 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1333 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1334 const uint8_t *src2, int width, uint32_t *unused)
1337 for (i=0; i<width; i++) {
1338 int b= src1[3*i + 0];
1339 int g= src1[3*i + 1];
1340 int r= src1[3*i + 2];
1342 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1343 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1345 assert(src1 == src2);
1348 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1349 const uint8_t *src2, int width, uint32_t *unused)
1352 for (i=0; i<width; i++) {
1353 int b= src1[6*i + 0] + src1[6*i + 3];
1354 int g= src1[6*i + 1] + src1[6*i + 4];
1355 int r= src1[6*i + 2] + src1[6*i + 5];
1357 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1358 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1360 assert(src1 == src2);
1363 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1367 for (i=0; i<width; i++) {
1372 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1376 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1377 const uint8_t *src2, int width, uint32_t *unused)
1381 for (i=0; i<width; i++) {
1382 int r= src1[3*i + 0];
1383 int g= src1[3*i + 1];
1384 int b= src1[3*i + 2];
1386 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1387 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1391 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1392 const uint8_t *src2, int width, uint32_t *unused)
1396 for (i=0; i<width; i++) {
1397 int r= src1[6*i + 0] + src1[6*i + 3];
1398 int g= src1[6*i + 1] + src1[6*i + 4];
1399 int b= src1[6*i + 2] + src1[6*i + 5];
1401 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1402 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1407 // bilinear / bicubic scaling
1408 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1410 const int16_t *filter, const int16_t *filterPos,
1414 for (i=0; i<dstW; i++) {
1416 int srcPos= filterPos[i];
1418 for (j=0; j<filterSize; j++) {
1419 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1421 //filter += hFilterSize;
1422 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1427 //FIXME all pal and rgb srcFormats could do this convertion as well
1428 //FIXME all scalers more complex than bilinear could do half of this transform
1429 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1432 for (i = 0; i < width; i++) {
1433 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1434 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1437 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1440 for (i = 0; i < width; i++) {
1441 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1442 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1445 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1448 for (i = 0; i < width; i++)
1449 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1451 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1454 for (i = 0; i < width; i++)
1455 dst[i] = (dst[i]*14071 + 33561947)>>14;
1458 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1459 const uint8_t *src, int srcW, int xInc)
1462 unsigned int xpos=0;
1463 for (i=0;i<dstWidth;i++) {
1464 register unsigned int xx=xpos>>16;
1465 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1466 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1471 // *** horizontal scale Y line to temp buffer
1472 static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1473 const uint8_t *src, int srcW, int xInc,
1474 const int16_t *hLumFilter,
1475 const int16_t *hLumFilterPos, int hLumFilterSize,
1476 uint8_t *formatConvBuffer,
1477 uint32_t *pal, int isAlpha)
1479 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1480 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1483 toYV12(formatConvBuffer, src, srcW, pal);
1484 src= formatConvBuffer;
1487 if (!c->hyscale_fast) {
1488 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1489 } else { // fast bilinear upscale / crap downscale
1490 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1494 convertRange(dst, dstWidth);
1497 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1498 int dstWidth, const uint8_t *src1,
1499 const uint8_t *src2, int srcW, int xInc)
1502 unsigned int xpos=0;
1503 for (i=0;i<dstWidth;i++) {
1504 register unsigned int xx=xpos>>16;
1505 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1506 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1507 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1512 static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1513 const uint8_t *src1, const uint8_t *src2,
1514 int srcW, int xInc, const int16_t *hChrFilter,
1515 const int16_t *hChrFilterPos, int hChrFilterSize,
1516 uint8_t *formatConvBuffer, uint32_t *pal)
1519 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1520 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1521 src1= formatConvBuffer;
1525 if (!c->hcscale_fast) {
1526 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1527 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1528 } else { // fast bilinear upscale / crap downscale
1529 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1532 if (c->chrConvertRange)
1533 c->chrConvertRange(dst1, dst2, dstWidth);
1536 static av_always_inline void
1537 find_c_packed_planar_out_funcs(SwsContext *c,
1538 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1539 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1540 yuv2packedX_fn *yuv2packedX)
1542 enum PixelFormat dstFormat = c->dstFormat;
1544 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1545 *yuv2yuvX = yuv2nv12X_c;
1546 } else if (is16BPS(dstFormat)) {
1547 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1548 } else if (is9_OR_10BPS(dstFormat)) {
1549 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1550 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1552 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1555 *yuv2yuv1 = yuv2yuv1_c;
1556 *yuv2yuvX = yuv2yuvX_c;
1558 if(c->flags & SWS_FULL_CHR_H_INT) {
1559 *yuv2packedX = yuv2rgbX_c_full;
1561 *yuv2packed1 = yuv2packed1_c;
1562 *yuv2packed2 = yuv2packed2_c;
1563 *yuv2packedX = yuv2packedX_c;
1567 #define DEBUG_SWSCALE_BUFFERS 0
1568 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1570 static int swScale(SwsContext *c, const uint8_t* src[],
1571 int srcStride[], int srcSliceY,
1572 int srcSliceH, uint8_t* dst[], int dstStride[])
1574 /* load a few things into local vars to make the code more readable? and faster */
1575 const int srcW= c->srcW;
1576 const int dstW= c->dstW;
1577 const int dstH= c->dstH;
1578 const int chrDstW= c->chrDstW;
1579 const int chrSrcW= c->chrSrcW;
1580 const int lumXInc= c->lumXInc;
1581 const int chrXInc= c->chrXInc;
1582 const enum PixelFormat dstFormat= c->dstFormat;
1583 const int flags= c->flags;
1584 int16_t *vLumFilterPos= c->vLumFilterPos;
1585 int16_t *vChrFilterPos= c->vChrFilterPos;
1586 int16_t *hLumFilterPos= c->hLumFilterPos;
1587 int16_t *hChrFilterPos= c->hChrFilterPos;
1588 int16_t *vLumFilter= c->vLumFilter;
1589 int16_t *vChrFilter= c->vChrFilter;
1590 int16_t *hLumFilter= c->hLumFilter;
1591 int16_t *hChrFilter= c->hChrFilter;
1592 int32_t *lumMmxFilter= c->lumMmxFilter;
1593 int32_t *chrMmxFilter= c->chrMmxFilter;
1594 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1595 const int vLumFilterSize= c->vLumFilterSize;
1596 const int vChrFilterSize= c->vChrFilterSize;
1597 const int hLumFilterSize= c->hLumFilterSize;
1598 const int hChrFilterSize= c->hChrFilterSize;
1599 int16_t **lumPixBuf= c->lumPixBuf;
1600 int16_t **chrUPixBuf= c->chrUPixBuf;
1601 int16_t **chrVPixBuf= c->chrVPixBuf;
1602 int16_t **alpPixBuf= c->alpPixBuf;
1603 const int vLumBufSize= c->vLumBufSize;
1604 const int vChrBufSize= c->vChrBufSize;
1605 uint8_t *formatConvBuffer= c->formatConvBuffer;
1606 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1607 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1609 uint32_t *pal=c->pal_yuv;
1610 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1611 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1612 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1613 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1614 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1616 /* vars which will change and which we need to store back in the context */
1618 int lumBufIndex= c->lumBufIndex;
1619 int chrBufIndex= c->chrBufIndex;
1620 int lastInLumBuf= c->lastInLumBuf;
1621 int lastInChrBuf= c->lastInChrBuf;
1623 if (isPacked(c->srcFormat)) {
1631 srcStride[3]= srcStride[0];
1633 srcStride[1]<<= c->vChrDrop;
1634 srcStride[2]<<= c->vChrDrop;
1636 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1637 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1638 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1639 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1640 srcSliceY, srcSliceH, dstY, dstH);
1641 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1642 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1644 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1645 static int warnedAlready=0; //FIXME move this into the context perhaps
1646 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1647 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1648 " ->cannot do aligned memory accesses anymore\n");
1653 /* Note the user might start scaling the picture in the middle so this
1654 will not get executed. This is not really intended but works
1655 currently, so people might do it. */
1656 if (srcSliceY ==0) {
1666 for (;dstY < dstH; dstY++) {
1667 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1668 const int chrDstY= dstY>>c->chrDstVSubSample;
1669 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1670 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1671 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1673 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1674 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1675 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1676 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1677 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1678 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1681 //handle holes (FAST_BILINEAR & weird filters)
1682 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1683 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1684 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1685 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1687 DEBUG_BUFFERS("dstY: %d\n", dstY);
1688 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1689 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1690 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1691 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1693 // Do we have enough lines in this slice to output the dstY line
1694 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1696 if (!enough_lines) {
1697 lastLumSrcY = srcSliceY + srcSliceH - 1;
1698 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1699 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1700 lastLumSrcY, lastChrSrcY);
1703 //Do horizontal scaling
1704 while(lastInLumBuf < lastLumSrcY) {
1705 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1706 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1708 assert(lumBufIndex < 2*vLumBufSize);
1709 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1710 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1711 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1712 hLumFilter, hLumFilterPos, hLumFilterSize,
1715 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1716 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1717 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1721 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1722 lumBufIndex, lastInLumBuf);
1724 while(lastInChrBuf < lastChrSrcY) {
1725 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1726 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1728 assert(chrBufIndex < 2*vChrBufSize);
1729 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1730 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1731 //FIXME replace parameters through context struct (some at least)
1733 if (c->needs_hcscale)
1734 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1735 chrDstW, src1, src2, chrSrcW, chrXInc,
1736 hChrFilter, hChrFilterPos, hChrFilterSize,
1737 formatConvBuffer, pal);
1739 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1740 chrBufIndex, lastInChrBuf);
1742 //wrap buf index around to stay inside the ring buffer
1743 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1744 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1746 break; //we can't output a dstY line so let's try with the next slice
1749 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1751 if (dstY >= dstH-2) {
1752 // hmm looks like we can't use MMX here without overwriting this array's tail
1753 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1754 &yuv2packed1, &yuv2packed2,
1759 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1760 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1761 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1762 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1763 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1764 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1765 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1766 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1767 const int16_t *lumBuf = lumSrcPtr[0];
1768 const int16_t *chrUBuf= chrUSrcPtr[0];
1769 const int16_t *chrVBuf= chrVSrcPtr[0];
1770 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1771 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1772 uDest, vDest, aDest, dstW, chrDstW);
1773 } else { //General YV12
1775 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1776 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1777 chrVSrcPtr, vChrFilterSize,
1778 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1781 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1782 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1783 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1784 int chrAlpha= vChrFilter[2*dstY+1];
1785 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1786 *chrVSrcPtr, *(chrVSrcPtr+1),
1787 alpPixBuf ? *alpSrcPtr : NULL,
1788 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1789 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1790 int lumAlpha= vLumFilter[2*dstY+1];
1791 int chrAlpha= vChrFilter[2*dstY+1];
1793 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1795 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1796 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1797 *chrVSrcPtr, *(chrVSrcPtr+1),
1798 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1799 dest, dstW, lumAlpha, chrAlpha, dstY);
1800 } else { //general RGB
1802 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1803 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1804 alpSrcPtr, dest, dstW, dstY);
1810 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1811 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1814 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1815 __asm__ volatile("sfence":::"memory");
1819 /* store changed local vars back in the context */
1821 c->lumBufIndex= lumBufIndex;
1822 c->chrBufIndex= chrBufIndex;
1823 c->lastInLumBuf= lastInLumBuf;
1824 c->lastInChrBuf= lastInChrBuf;
1826 return dstY - lastDstY;
1829 static void sws_init_swScale_c(SwsContext *c)
1831 enum PixelFormat srcFormat = c->srcFormat;
1833 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1834 &c->yuv2packed1, &c->yuv2packed2,
1837 c->hScale = hScale_c;
1839 if (c->flags & SWS_FAST_BILINEAR) {
1840 c->hyscale_fast = hyscale_fast_c;
1841 c->hcscale_fast = hcscale_fast_c;
1844 c->chrToYV12 = NULL;
1846 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1847 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1848 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1849 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1853 case PIX_FMT_BGR4_BYTE:
1854 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1855 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1856 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1857 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1858 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1859 case PIX_FMT_YUV420P16BE:
1860 case PIX_FMT_YUV422P16BE:
1861 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1862 case PIX_FMT_YUV420P16LE:
1863 case PIX_FMT_YUV422P16LE:
1864 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1866 if (c->chrSrcHSubSample) {
1868 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1869 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1870 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1871 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1872 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1873 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1874 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1875 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1876 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1877 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1878 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1879 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1880 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1881 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1885 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1886 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1887 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1888 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1889 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1890 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1891 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1892 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1893 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1894 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1895 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1896 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1897 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1898 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1902 c->lumToYV12 = NULL;
1903 c->alpToYV12 = NULL;
1904 switch (srcFormat) {
1905 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1906 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1907 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1908 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1909 case PIX_FMT_YUYV422 :
1910 case PIX_FMT_YUV420P16BE:
1911 case PIX_FMT_YUV422P16BE:
1912 case PIX_FMT_YUV444P16BE:
1913 case PIX_FMT_Y400A :
1914 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1915 case PIX_FMT_UYVY422 :
1916 case PIX_FMT_YUV420P16LE:
1917 case PIX_FMT_YUV422P16LE:
1918 case PIX_FMT_YUV444P16LE:
1919 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1920 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1921 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1922 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1923 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1924 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1925 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1929 case PIX_FMT_BGR4_BYTE:
1930 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
1931 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
1932 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
1933 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
1934 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
1935 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
1936 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
1937 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
1938 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
1939 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
1940 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
1943 switch (srcFormat) {
1945 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
1947 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
1948 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
1952 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
1954 c->lumConvertRange = lumRangeFromJpeg_c;
1955 c->chrConvertRange = chrRangeFromJpeg_c;
1957 c->lumConvertRange = lumRangeToJpeg_c;
1958 c->chrConvertRange = chrRangeToJpeg_c;
1962 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
1963 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
1964 c->needs_hcscale = 1;
1967 SwsFunc ff_getSwsFunc(SwsContext *c)
1969 sws_init_swScale_c(c);
1972 ff_sws_init_swScale_mmx(c);
1974 ff_sws_init_swScale_altivec(c);