2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
333 const int16_t **lumSrc, int lumFilterSize,
334 const int16_t *chrFilter, const int16_t **chrUSrc,
335 const int16_t **chrVSrc, int chrFilterSize,
336 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
337 uint8_t *vDest, uint8_t *aDest,
338 int dstW, int chrDstW)
340 enum PixelFormat dstFormat = c->dstFormat;
342 //FIXME Optimize (just quickly written not optimized..)
344 for (i=0; i<dstW; i++) {
347 for (j=0; j<lumFilterSize; j++)
348 val += lumSrc[j][i] * lumFilter[j];
350 dest[i]= av_clip_uint8(val>>19);
356 if (dstFormat == PIX_FMT_NV12)
357 for (i=0; i<chrDstW; i++) {
361 for (j=0; j<chrFilterSize; j++) {
362 u += chrUSrc[j][i] * chrFilter[j];
363 v += chrVSrc[j][i] * chrFilter[j];
366 uDest[2*i]= av_clip_uint8(u>>19);
367 uDest[2*i+1]= av_clip_uint8(v>>19);
370 for (i=0; i<chrDstW; i++) {
374 for (j=0; j<chrFilterSize; j++) {
375 u += chrUSrc[j][i] * chrFilter[j];
376 v += chrVSrc[j][i] * chrFilter[j];
379 uDest[2*i]= av_clip_uint8(v>>19);
380 uDest[2*i+1]= av_clip_uint8(u>>19);
384 #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
385 for (i=0; i<(dstW>>1); i++) {\
391 int av_unused A1, A2;\
392 type av_unused *r, *b, *g;\
395 for (j=0; j<lumFilterSize; j++) {\
396 Y1 += lumSrc[j][i2] * lumFilter[j];\
397 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
399 for (j=0; j<chrFilterSize; j++) {\
400 U += chrUSrc[j][i] * chrFilter[j];\
401 V += chrVSrc[j][i] * chrFilter[j];\
410 for (j=0; j<lumFilterSize; j++) {\
411 A1 += alpSrc[j][i2 ] * lumFilter[j];\
412 A2 += alpSrc[j][i2+1] * lumFilter[j];\
418 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
419 YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
420 if ((Y1|Y2|U|V)&256) {\
421 if (Y1>255) Y1=255; \
422 else if (Y1<0)Y1=0; \
423 if (Y2>255) Y2=255; \
424 else if (Y2<0)Y2=0; \
430 if (alpha && ((A1|A2)&256)) {\
431 A1=av_clip_uint8(A1);\
432 A2=av_clip_uint8(A2);\
435 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
436 for (i=0; i<dstW; i++) {\
444 for (j=0; j<lumFilterSize; j++) {\
445 Y += lumSrc[j][i ] * lumFilter[j];\
447 for (j=0; j<chrFilterSize; j++) {\
448 U += chrUSrc[j][i] * chrFilter[j];\
449 V += chrVSrc[j][i] * chrFilter[j];\
456 for (j=0; j<lumFilterSize; j++)\
457 A += alpSrc[j][i ] * lumFilter[j];\
460 A = av_clip_uint8(A);\
463 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
464 YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
465 Y-= c->yuv2rgb_y_offset;\
466 Y*= c->yuv2rgb_y_coeff;\
468 R= Y + V*c->yuv2rgb_v2r_coeff;\
469 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
470 B= Y + U*c->yuv2rgb_u2b_coeff;\
471 if ((R|G|B)&(0xC0000000)) {\
472 if (R>=(256<<22)) R=(256<<22)-1; \
474 if (G>=(256<<22)) G=(256<<22)-1; \
476 if (B>=(256<<22)) B=(256<<22)-1; \
480 #define YSCALE_YUV_2_GRAY16_C \
481 for (i=0; i<(dstW>>1); i++) {\
490 for (j=0; j<lumFilterSize; j++) {\
491 Y1 += lumSrc[j][i2] * lumFilter[j];\
492 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
496 if ((Y1|Y2|U|V)&65536) {\
497 if (Y1>65535) Y1=65535; \
498 else if (Y1<0)Y1=0; \
499 if (Y2>65535) Y2=65535; \
500 else if (Y2<0)Y2=0; \
503 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
504 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
505 r = (type *)c->table_rV[V]; \
506 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
507 b = (type *)c->table_bU[U];
509 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
510 for (i=0; i<(dstW>>1); i++) { \
512 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
513 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
514 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
515 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
516 type av_unused *r, *b, *g; \
517 int av_unused A1, A2; \
519 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
520 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
523 #define YSCALE_YUV_2_GRAY16_2_C \
524 for (i=0; i<(dstW>>1); i++) { \
526 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
527 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
529 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
530 YSCALE_YUV_2_PACKED2_C(type,alpha)\
531 r = (type *)c->table_rV[V];\
532 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
533 b = (type *)c->table_bU[U];
535 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
536 for (i=0; i<(dstW>>1); i++) {\
538 int Y1= buf0[i2 ]>>7;\
539 int Y2= buf0[i2+1]>>7;\
540 int U= (ubuf1[i])>>7;\
541 int V= (vbuf1[i])>>7;\
542 type av_unused *r, *b, *g;\
543 int av_unused A1, A2;\
549 #define YSCALE_YUV_2_GRAY16_1_C \
550 for (i=0; i<(dstW>>1); i++) {\
552 int Y1= buf0[i2 ]<<1;\
553 int Y2= buf0[i2+1]<<1;
555 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
556 YSCALE_YUV_2_PACKED1_C(type,alpha)\
557 r = (type *)c->table_rV[V];\
558 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
559 b = (type *)c->table_bU[U];
561 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
562 for (i=0; i<(dstW>>1); i++) {\
564 int Y1= buf0[i2 ]>>7;\
565 int Y2= buf0[i2+1]>>7;\
566 int U= (ubuf0[i] + ubuf1[i])>>8;\
567 int V= (vbuf0[i] + vbuf1[i])>>8;\
568 type av_unused *r, *b, *g;\
569 int av_unused A1, A2;\
575 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
576 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
577 r = (type *)c->table_rV[V];\
578 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
579 b = (type *)c->table_bU[U];
581 #define YSCALE_YUV_2_MONO2_C \
582 const uint8_t * const d128=dither_8x8_220[y&7];\
583 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
584 for (i=0; i<dstW-7; i+=8) {\
586 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
587 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
588 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
589 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
590 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
591 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
592 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
593 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
594 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
598 #define YSCALE_YUV_2_MONOX_C \
599 const uint8_t * const d128=dither_8x8_220[y&7];\
600 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
602 for (i=0; i<dstW-1; i+=2) {\
607 for (j=0; j<lumFilterSize; j++) {\
608 Y1 += lumSrc[j][i] * lumFilter[j];\
609 Y2 += lumSrc[j][i+1] * lumFilter[j];\
619 acc+= acc + g[Y1+d128[(i+0)&7]];\
620 acc+= acc + g[Y2+d128[(i+1)&7]];\
622 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
627 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
628 switch(c->dstFormat) {\
629 case PIX_FMT_RGB48BE:\
630 case PIX_FMT_RGB48LE:\
632 ((uint8_t*)dest)[ 0]= r[Y1];\
633 ((uint8_t*)dest)[ 1]= r[Y1];\
634 ((uint8_t*)dest)[ 2]= g[Y1];\
635 ((uint8_t*)dest)[ 3]= g[Y1];\
636 ((uint8_t*)dest)[ 4]= b[Y1];\
637 ((uint8_t*)dest)[ 5]= b[Y1];\
638 ((uint8_t*)dest)[ 6]= r[Y2];\
639 ((uint8_t*)dest)[ 7]= r[Y2];\
640 ((uint8_t*)dest)[ 8]= g[Y2];\
641 ((uint8_t*)dest)[ 9]= g[Y2];\
642 ((uint8_t*)dest)[10]= b[Y2];\
643 ((uint8_t*)dest)[11]= b[Y2];\
647 case PIX_FMT_BGR48BE:\
648 case PIX_FMT_BGR48LE:\
650 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
651 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
652 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
653 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
654 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
655 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
662 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
663 func(uint32_t,needAlpha)\
664 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
665 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
668 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
670 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
671 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
675 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
676 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
684 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
685 func(uint32_t,needAlpha)\
686 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
687 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
690 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
692 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
693 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
697 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
698 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
705 ((uint8_t*)dest)[0]= r[Y1];\
706 ((uint8_t*)dest)[1]= g[Y1];\
707 ((uint8_t*)dest)[2]= b[Y1];\
708 ((uint8_t*)dest)[3]= r[Y2];\
709 ((uint8_t*)dest)[4]= g[Y2];\
710 ((uint8_t*)dest)[5]= b[Y2];\
716 ((uint8_t*)dest)[0]= b[Y1];\
717 ((uint8_t*)dest)[1]= g[Y1];\
718 ((uint8_t*)dest)[2]= r[Y1];\
719 ((uint8_t*)dest)[3]= b[Y2];\
720 ((uint8_t*)dest)[4]= g[Y2];\
721 ((uint8_t*)dest)[5]= r[Y2];\
725 case PIX_FMT_RGB565BE:\
726 case PIX_FMT_RGB565LE:\
727 case PIX_FMT_BGR565BE:\
728 case PIX_FMT_BGR565LE:\
730 const int dr1= dither_2x2_8[y&1 ][0];\
731 const int dg1= dither_2x2_4[y&1 ][0];\
732 const int db1= dither_2x2_8[(y&1)^1][0];\
733 const int dr2= dither_2x2_8[y&1 ][1];\
734 const int dg2= dither_2x2_4[y&1 ][1];\
735 const int db2= dither_2x2_8[(y&1)^1][1];\
737 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
738 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
742 case PIX_FMT_RGB555BE:\
743 case PIX_FMT_RGB555LE:\
744 case PIX_FMT_BGR555BE:\
745 case PIX_FMT_BGR555LE:\
747 const int dr1= dither_2x2_8[y&1 ][0];\
748 const int dg1= dither_2x2_8[y&1 ][1];\
749 const int db1= dither_2x2_8[(y&1)^1][0];\
750 const int dr2= dither_2x2_8[y&1 ][1];\
751 const int dg2= dither_2x2_8[y&1 ][0];\
752 const int db2= dither_2x2_8[(y&1)^1][1];\
754 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
755 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
759 case PIX_FMT_RGB444BE:\
760 case PIX_FMT_RGB444LE:\
761 case PIX_FMT_BGR444BE:\
762 case PIX_FMT_BGR444LE:\
764 const int dr1= dither_4x4_16[y&3 ][0];\
765 const int dg1= dither_4x4_16[y&3 ][1];\
766 const int db1= dither_4x4_16[(y&3)^3][0];\
767 const int dr2= dither_4x4_16[y&3 ][1];\
768 const int dg2= dither_4x4_16[y&3 ][0];\
769 const int db2= dither_4x4_16[(y&3)^3][1];\
771 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
772 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
779 const uint8_t * const d64= dither_8x8_73[y&7];\
780 const uint8_t * const d32= dither_8x8_32[y&7];\
782 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
783 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
790 const uint8_t * const d64= dither_8x8_73 [y&7];\
791 const uint8_t * const d128=dither_8x8_220[y&7];\
793 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
794 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
798 case PIX_FMT_RGB4_BYTE:\
799 case PIX_FMT_BGR4_BYTE:\
801 const uint8_t * const d64= dither_8x8_73 [y&7];\
802 const uint8_t * const d128=dither_8x8_220[y&7];\
804 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
805 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
809 case PIX_FMT_MONOBLACK:\
810 case PIX_FMT_MONOWHITE:\
815 case PIX_FMT_YUYV422:\
817 ((uint8_t*)dest)[2*i2+0]= Y1;\
818 ((uint8_t*)dest)[2*i2+1]= U;\
819 ((uint8_t*)dest)[2*i2+2]= Y2;\
820 ((uint8_t*)dest)[2*i2+3]= V;\
823 case PIX_FMT_UYVY422:\
825 ((uint8_t*)dest)[2*i2+0]= U;\
826 ((uint8_t*)dest)[2*i2+1]= Y1;\
827 ((uint8_t*)dest)[2*i2+2]= V;\
828 ((uint8_t*)dest)[2*i2+3]= Y2;\
831 case PIX_FMT_GRAY16BE:\
833 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
834 ((uint8_t*)dest)[2*i2+1]= Y1;\
835 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
836 ((uint8_t*)dest)[2*i2+3]= Y2;\
839 case PIX_FMT_GRAY16LE:\
841 ((uint8_t*)dest)[2*i2+0]= Y1;\
842 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
843 ((uint8_t*)dest)[2*i2+2]= Y2;\
844 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
849 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
850 const int16_t **lumSrc, int lumFilterSize,
851 const int16_t *chrFilter, const int16_t **chrUSrc,
852 const int16_t **chrVSrc, int chrFilterSize,
853 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
856 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
859 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
860 const int16_t **lumSrc, int lumFilterSize,
861 const int16_t *chrFilter, const int16_t **chrUSrc,
862 const int16_t **chrVSrc, int chrFilterSize,
863 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
866 int step= c->dstFormatBpp/8;
869 switch(c->dstFormat) {
877 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
878 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
879 dest[aidx]= needAlpha ? A : 255;
886 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
887 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
895 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
912 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
913 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
914 dest[aidx]= needAlpha ? A : 255;
921 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
922 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
930 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
945 static av_always_inline void fillPlane(uint8_t* plane, int stride,
946 int width, int height,
950 uint8_t *ptr = plane + stride*y;
951 for (i=0; i<height; i++) {
952 memset(ptr, val, width);
957 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
958 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
959 uint8_t *dst, const uint8_t *src, int width, \
963 for (i = 0; i < width; i++) { \
964 int compA = rfunc(&src[i*6+0]) >> 8; \
965 int compB = rfunc(&src[i*6+2]) >> 8; \
966 int compC = rfunc(&src[i*6+4]) >> 8; \
968 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
972 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
973 uint8_t *dstU, uint8_t *dstV, \
974 const uint8_t *src1, const uint8_t *src2, \
975 int width, uint32_t *unused) \
978 assert(src1==src2); \
979 for (i = 0; i < width; i++) { \
980 int compA = rfunc(&src1[6*i + 0]) >> 8; \
981 int compB = rfunc(&src1[6*i + 2]) >> 8; \
982 int compC = rfunc(&src1[6*i + 4]) >> 8; \
984 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
985 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
989 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
990 uint8_t *dstU, uint8_t *dstV, \
991 const uint8_t *src1, const uint8_t *src2, \
992 int width, uint32_t *unused) \
995 assert(src1==src2); \
996 for (i = 0; i < width; i++) { \
997 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
998 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
999 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1001 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1002 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1005 rgb48funcs(LE, AV_RL16, r, g, b);
1006 rgb48funcs(BE, AV_RB16, r, g, b);
1007 rgb48funcs(LE, AV_RL16, b, g, r);
1008 rgb48funcs(BE, AV_RB16, b, g, r);
1010 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1011 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1012 int width, uint32_t *unused)\
1015 for (i=0; i<width; i++) {\
1016 int b= (((const type*)src)[i]>>shb)&maskb;\
1017 int g= (((const type*)src)[i]>>shg)&maskg;\
1018 int r= (((const type*)src)[i]>>shr)&maskr;\
1020 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1024 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1025 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1026 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1027 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1028 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1029 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1030 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1031 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1033 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1036 for (i=0; i<width; i++) {
1041 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1044 for (i=0; i<width; i++) {
1049 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1050 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1051 const uint8_t *src, const uint8_t *dummy, \
1052 int width, uint32_t *unused)\
1055 for (i=0; i<width; i++) {\
1056 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1057 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1058 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1060 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1061 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1064 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1065 const uint8_t *src, const uint8_t *dummy, \
1066 int width, uint32_t *unused)\
1069 for (i=0; i<width; i++) {\
1070 int pix0= ((const type*)src)[2*i+0]>>shp;\
1071 int pix1= ((const type*)src)[2*i+1]>>shp;\
1072 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1073 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1074 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1075 g&= maskg|(2*maskg);\
1079 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1080 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1084 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1085 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1086 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1087 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1088 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1089 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1090 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1091 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1093 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1096 for (i=0; i<width; i++) {
1099 dst[i]= pal[d] & 0xFF;
1103 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1104 const uint8_t *src1, const uint8_t *src2,
1105 int width, uint32_t *pal)
1108 assert(src1 == src2);
1109 for (i=0; i<width; i++) {
1110 int p= pal[src1[i]];
1117 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1118 int width, uint32_t *unused)
1121 for (i=0; i<width/8; i++) {
1124 dst[8*i+j]= ((d>>(7-j))&1)*255;
1128 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1129 int width, uint32_t *unused)
1132 for (i=0; i<width/8; i++) {
1135 dst[8*i+j]= ((d>>(7-j))&1)*255;
1139 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1140 const int16_t *chrUSrc, const int16_t *chrVSrc,
1141 const int16_t *alpSrc,
1142 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1143 uint8_t *aDest, int dstW, int chrDstW)
1146 for (i=0; i<dstW; i++) {
1147 int val= (lumSrc[i]+64)>>7;
1148 dest[i]= av_clip_uint8(val);
1152 for (i=0; i<chrDstW; i++) {
1153 int u=(chrUSrc[i]+64)>>7;
1154 int v=(chrVSrc[i]+64)>>7;
1155 uDest[i]= av_clip_uint8(u);
1156 vDest[i]= av_clip_uint8(v);
1159 if (CONFIG_SWSCALE_ALPHA && aDest)
1160 for (i=0; i<dstW; i++) {
1161 int val= (alpSrc[i]+64)>>7;
1162 aDest[i]= av_clip_uint8(val);
1167 * vertical bilinear scale YV12 to RGB
1169 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1170 const uint16_t *buf1, const uint16_t *ubuf0,
1171 const uint16_t *ubuf1, const uint16_t *vbuf0,
1172 const uint16_t *vbuf1, const uint16_t *abuf0,
1173 const uint16_t *abuf1, uint8_t *dest, int dstW,
1174 int yalpha, int uvalpha, int y)
1176 int yalpha1=4095- yalpha;
1177 int uvalpha1=4095-uvalpha;
1180 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1184 * YV12 to RGB without scaling or interpolating
1186 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1187 const uint16_t *ubuf0, const uint16_t *ubuf1,
1188 const uint16_t *vbuf0, const uint16_t *vbuf1,
1189 const uint16_t *abuf0, uint8_t *dest, int dstW,
1190 int uvalpha, enum PixelFormat dstFormat,
1193 const int yalpha1=0;
1196 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1197 const int yalpha= 4096; //FIXME ...
1199 if (uvalpha < 2048) {
1200 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1202 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1206 //FIXME yuy2* can read up to 7 samples too much
1208 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1212 for (i=0; i<width; i++)
1216 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1217 const uint8_t *src2, int width, uint32_t *unused)
1220 for (i=0; i<width; i++) {
1221 dstU[i]= src1[4*i + 1];
1222 dstV[i]= src1[4*i + 3];
1224 assert(src1 == src2);
1227 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1228 const uint8_t *src2, int width, uint32_t *unused)
1231 for (i=0; i<width; i++) {
1232 dstU[i]= src1[2*i + 1];
1233 dstV[i]= src2[2*i + 1];
1237 /* This is almost identical to the previous, end exists only because
1238 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1239 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1243 for (i=0; i<width; i++)
1247 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1248 const uint8_t *src2, int width, uint32_t *unused)
1251 for (i=0; i<width; i++) {
1252 dstU[i]= src1[4*i + 0];
1253 dstV[i]= src1[4*i + 2];
1255 assert(src1 == src2);
1258 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1259 const uint8_t *src2, int width, uint32_t *unused)
1262 for (i=0; i<width; i++) {
1268 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1269 const uint8_t *src, int width)
1272 for (i = 0; i < width; i++) {
1273 dst1[i] = src[2*i+0];
1274 dst2[i] = src[2*i+1];
1278 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1279 const uint8_t *src1, const uint8_t *src2,
1280 int width, uint32_t *unused)
1282 nvXXtoUV_c(dstU, dstV, src1, width);
1285 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1286 const uint8_t *src1, const uint8_t *src2,
1287 int width, uint32_t *unused)
1289 nvXXtoUV_c(dstV, dstU, src1, width);
1292 // FIXME Maybe dither instead.
1293 #define YUV_NBPS(depth, endianness, rfunc) \
1294 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1295 const uint8_t *_srcU, const uint8_t *_srcV, \
1296 int width, uint32_t *unused) \
1299 const uint16_t *srcU = (const uint16_t*)_srcU; \
1300 const uint16_t *srcV = (const uint16_t*)_srcV; \
1301 for (i = 0; i < width; i++) { \
1302 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1303 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1307 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1308 int width, uint32_t *unused) \
1311 const uint16_t *srcY = (const uint16_t*)_srcY; \
1312 for (i = 0; i < width; i++) \
1313 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1316 YUV_NBPS( 9, LE, AV_RL16)
1317 YUV_NBPS( 9, BE, AV_RB16)
1318 YUV_NBPS(10, LE, AV_RL16)
1319 YUV_NBPS(10, BE, AV_RB16)
1321 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1322 int width, uint32_t *unused)
1325 for (i=0; i<width; i++) {
1330 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1334 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1335 const uint8_t *src2, int width, uint32_t *unused)
1338 for (i=0; i<width; i++) {
1339 int b= src1[3*i + 0];
1340 int g= src1[3*i + 1];
1341 int r= src1[3*i + 2];
1343 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1344 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1346 assert(src1 == src2);
1349 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1350 const uint8_t *src2, int width, uint32_t *unused)
1353 for (i=0; i<width; i++) {
1354 int b= src1[6*i + 0] + src1[6*i + 3];
1355 int g= src1[6*i + 1] + src1[6*i + 4];
1356 int r= src1[6*i + 2] + src1[6*i + 5];
1358 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1359 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1361 assert(src1 == src2);
1364 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1368 for (i=0; i<width; i++) {
1373 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1377 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1378 const uint8_t *src2, int width, uint32_t *unused)
1382 for (i=0; i<width; i++) {
1383 int r= src1[3*i + 0];
1384 int g= src1[3*i + 1];
1385 int b= src1[3*i + 2];
1387 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1388 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1392 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1393 const uint8_t *src2, int width, uint32_t *unused)
1397 for (i=0; i<width; i++) {
1398 int r= src1[6*i + 0] + src1[6*i + 3];
1399 int g= src1[6*i + 1] + src1[6*i + 4];
1400 int b= src1[6*i + 2] + src1[6*i + 5];
1402 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1403 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1408 // bilinear / bicubic scaling
1409 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1411 const int16_t *filter, const int16_t *filterPos,
1415 for (i=0; i<dstW; i++) {
1417 int srcPos= filterPos[i];
1419 for (j=0; j<filterSize; j++) {
1420 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1422 //filter += hFilterSize;
1423 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1428 //FIXME all pal and rgb srcFormats could do this convertion as well
1429 //FIXME all scalers more complex than bilinear could do half of this transform
1430 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1433 for (i = 0; i < width; i++) {
1434 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1435 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1438 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1441 for (i = 0; i < width; i++) {
1442 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1443 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1446 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1449 for (i = 0; i < width; i++)
1450 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1452 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1455 for (i = 0; i < width; i++)
1456 dst[i] = (dst[i]*14071 + 33561947)>>14;
1459 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1460 const uint8_t *src, int srcW, int xInc)
1463 unsigned int xpos=0;
1464 for (i=0;i<dstWidth;i++) {
1465 register unsigned int xx=xpos>>16;
1466 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1467 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1472 // *** horizontal scale Y line to temp buffer
1473 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1474 const uint8_t *src, int srcW, int xInc,
1475 const int16_t *hLumFilter,
1476 const int16_t *hLumFilterPos, int hLumFilterSize,
1477 uint8_t *formatConvBuffer,
1478 uint32_t *pal, int isAlpha)
1480 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1481 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1484 toYV12(formatConvBuffer, src, srcW, pal);
1485 src= formatConvBuffer;
1488 if (!c->hyscale_fast) {
1489 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1490 } else { // fast bilinear upscale / crap downscale
1491 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1495 convertRange(dst, dstWidth);
1498 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1499 int dstWidth, const uint8_t *src1,
1500 const uint8_t *src2, int srcW, int xInc)
1503 unsigned int xpos=0;
1504 for (i=0;i<dstWidth;i++) {
1505 register unsigned int xx=xpos>>16;
1506 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1507 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1508 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1513 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1514 const uint8_t *src1, const uint8_t *src2,
1515 int srcW, int xInc, const int16_t *hChrFilter,
1516 const int16_t *hChrFilterPos, int hChrFilterSize,
1517 uint8_t *formatConvBuffer, uint32_t *pal)
1520 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1521 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1522 src1= formatConvBuffer;
1526 if (!c->hcscale_fast) {
1527 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1528 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1529 } else { // fast bilinear upscale / crap downscale
1530 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1533 if (c->chrConvertRange)
1534 c->chrConvertRange(dst1, dst2, dstWidth);
1537 static av_always_inline void
1538 find_c_packed_planar_out_funcs(SwsContext *c,
1539 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1540 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1541 yuv2packedX_fn *yuv2packedX)
1543 enum PixelFormat dstFormat = c->dstFormat;
1545 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1546 *yuv2yuvX = yuv2nv12X_c;
1547 } else if (is16BPS(dstFormat)) {
1548 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1549 } else if (is9_OR_10BPS(dstFormat)) {
1550 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1551 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1553 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1556 *yuv2yuv1 = yuv2yuv1_c;
1557 *yuv2yuvX = yuv2yuvX_c;
1559 if(c->flags & SWS_FULL_CHR_H_INT) {
1560 *yuv2packedX = yuv2rgbX_c_full;
1562 *yuv2packed1 = yuv2packed1_c;
1563 *yuv2packed2 = yuv2packed2_c;
1564 *yuv2packedX = yuv2packedX_c;
1568 #define DEBUG_SWSCALE_BUFFERS 0
1569 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1571 static int swScale(SwsContext *c, const uint8_t* src[],
1572 int srcStride[], int srcSliceY,
1573 int srcSliceH, uint8_t* dst[], int dstStride[])
1575 /* load a few things into local vars to make the code more readable? and faster */
1576 const int srcW= c->srcW;
1577 const int dstW= c->dstW;
1578 const int dstH= c->dstH;
1579 const int chrDstW= c->chrDstW;
1580 const int chrSrcW= c->chrSrcW;
1581 const int lumXInc= c->lumXInc;
1582 const int chrXInc= c->chrXInc;
1583 const enum PixelFormat dstFormat= c->dstFormat;
1584 const int flags= c->flags;
1585 int16_t *vLumFilterPos= c->vLumFilterPos;
1586 int16_t *vChrFilterPos= c->vChrFilterPos;
1587 int16_t *hLumFilterPos= c->hLumFilterPos;
1588 int16_t *hChrFilterPos= c->hChrFilterPos;
1589 int16_t *vLumFilter= c->vLumFilter;
1590 int16_t *vChrFilter= c->vChrFilter;
1591 int16_t *hLumFilter= c->hLumFilter;
1592 int16_t *hChrFilter= c->hChrFilter;
1593 int32_t *lumMmxFilter= c->lumMmxFilter;
1594 int32_t *chrMmxFilter= c->chrMmxFilter;
1595 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1596 const int vLumFilterSize= c->vLumFilterSize;
1597 const int vChrFilterSize= c->vChrFilterSize;
1598 const int hLumFilterSize= c->hLumFilterSize;
1599 const int hChrFilterSize= c->hChrFilterSize;
1600 int16_t **lumPixBuf= c->lumPixBuf;
1601 int16_t **chrUPixBuf= c->chrUPixBuf;
1602 int16_t **chrVPixBuf= c->chrVPixBuf;
1603 int16_t **alpPixBuf= c->alpPixBuf;
1604 const int vLumBufSize= c->vLumBufSize;
1605 const int vChrBufSize= c->vChrBufSize;
1606 uint8_t *formatConvBuffer= c->formatConvBuffer;
1607 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1608 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1610 uint32_t *pal=c->pal_yuv;
1611 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1612 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1613 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1614 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1615 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1617 /* vars which will change and which we need to store back in the context */
1619 int lumBufIndex= c->lumBufIndex;
1620 int chrBufIndex= c->chrBufIndex;
1621 int lastInLumBuf= c->lastInLumBuf;
1622 int lastInChrBuf= c->lastInChrBuf;
1624 if (isPacked(c->srcFormat)) {
1632 srcStride[3]= srcStride[0];
1634 srcStride[1]<<= c->vChrDrop;
1635 srcStride[2]<<= c->vChrDrop;
1637 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1638 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1639 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1640 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1641 srcSliceY, srcSliceH, dstY, dstH);
1642 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1643 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1645 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1646 static int warnedAlready=0; //FIXME move this into the context perhaps
1647 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1648 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1649 " ->cannot do aligned memory accesses anymore\n");
1654 /* Note the user might start scaling the picture in the middle so this
1655 will not get executed. This is not really intended but works
1656 currently, so people might do it. */
1657 if (srcSliceY ==0) {
1667 for (;dstY < dstH; dstY++) {
1668 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1669 const int chrDstY= dstY>>c->chrDstVSubSample;
1670 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1671 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1672 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1674 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1675 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1676 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1677 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1678 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1679 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1682 //handle holes (FAST_BILINEAR & weird filters)
1683 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1684 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1685 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1686 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1688 DEBUG_BUFFERS("dstY: %d\n", dstY);
1689 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1690 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1691 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1692 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1694 // Do we have enough lines in this slice to output the dstY line
1695 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1697 if (!enough_lines) {
1698 lastLumSrcY = srcSliceY + srcSliceH - 1;
1699 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1700 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1701 lastLumSrcY, lastChrSrcY);
1704 //Do horizontal scaling
1705 while(lastInLumBuf < lastLumSrcY) {
1706 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1707 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1709 assert(lumBufIndex < 2*vLumBufSize);
1710 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1711 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1712 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1713 hLumFilter, hLumFilterPos, hLumFilterSize,
1716 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1717 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1718 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1722 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1723 lumBufIndex, lastInLumBuf);
1725 while(lastInChrBuf < lastChrSrcY) {
1726 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1727 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1729 assert(chrBufIndex < 2*vChrBufSize);
1730 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1731 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1732 //FIXME replace parameters through context struct (some at least)
1734 if (c->needs_hcscale)
1735 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1736 chrDstW, src1, src2, chrSrcW, chrXInc,
1737 hChrFilter, hChrFilterPos, hChrFilterSize,
1738 formatConvBuffer, pal);
1740 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1741 chrBufIndex, lastInChrBuf);
1743 //wrap buf index around to stay inside the ring buffer
1744 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1745 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1747 break; //we can't output a dstY line so let's try with the next slice
1750 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1752 if (dstY >= dstH-2) {
1753 // hmm looks like we can't use MMX here without overwriting this array's tail
1754 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1755 &yuv2packed1, &yuv2packed2,
1760 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1761 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1762 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1763 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1764 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1765 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1766 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1767 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1768 const int16_t *lumBuf = lumSrcPtr[0];
1769 const int16_t *chrUBuf= chrUSrcPtr[0];
1770 const int16_t *chrVBuf= chrVSrcPtr[0];
1771 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1772 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1773 uDest, vDest, aDest, dstW, chrDstW);
1774 } else { //General YV12
1776 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1777 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1778 chrVSrcPtr, vChrFilterSize,
1779 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1782 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1783 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1784 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1785 int chrAlpha= vChrFilter[2*dstY+1];
1786 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1787 *chrVSrcPtr, *(chrVSrcPtr+1),
1788 alpPixBuf ? *alpSrcPtr : NULL,
1789 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1790 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1791 int lumAlpha= vLumFilter[2*dstY+1];
1792 int chrAlpha= vChrFilter[2*dstY+1];
1794 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1796 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1797 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1798 *chrVSrcPtr, *(chrVSrcPtr+1),
1799 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1800 dest, dstW, lumAlpha, chrAlpha, dstY);
1801 } else { //general RGB
1803 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1804 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1805 alpSrcPtr, dest, dstW, dstY);
1811 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1812 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1815 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1816 __asm__ volatile("sfence":::"memory");
1820 /* store changed local vars back in the context */
1822 c->lumBufIndex= lumBufIndex;
1823 c->chrBufIndex= chrBufIndex;
1824 c->lastInLumBuf= lastInLumBuf;
1825 c->lastInChrBuf= lastInChrBuf;
1827 return dstY - lastDstY;
1830 static av_cold void sws_init_swScale_c(SwsContext *c)
1832 enum PixelFormat srcFormat = c->srcFormat;
1834 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1835 &c->yuv2packed1, &c->yuv2packed2,
1838 c->hScale = hScale_c;
1840 if (c->flags & SWS_FAST_BILINEAR) {
1841 c->hyscale_fast = hyscale_fast_c;
1842 c->hcscale_fast = hcscale_fast_c;
1845 c->chrToYV12 = NULL;
1847 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1848 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1849 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1850 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1854 case PIX_FMT_BGR4_BYTE:
1855 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1856 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1857 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1858 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1859 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1860 case PIX_FMT_YUV420P16BE:
1861 case PIX_FMT_YUV422P16BE:
1862 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1863 case PIX_FMT_YUV420P16LE:
1864 case PIX_FMT_YUV422P16LE:
1865 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1867 if (c->chrSrcHSubSample) {
1869 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1870 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1871 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1872 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1873 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1874 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1875 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1876 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1877 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1878 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1879 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1880 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1881 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1882 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1886 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1887 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1888 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1889 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1890 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1891 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1892 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1893 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1894 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1895 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1896 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1897 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1898 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1899 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1903 c->lumToYV12 = NULL;
1904 c->alpToYV12 = NULL;
1905 switch (srcFormat) {
1906 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1907 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1908 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1909 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1910 case PIX_FMT_YUYV422 :
1911 case PIX_FMT_YUV420P16BE:
1912 case PIX_FMT_YUV422P16BE:
1913 case PIX_FMT_YUV444P16BE:
1914 case PIX_FMT_Y400A :
1915 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1916 case PIX_FMT_UYVY422 :
1917 case PIX_FMT_YUV420P16LE:
1918 case PIX_FMT_YUV422P16LE:
1919 case PIX_FMT_YUV444P16LE:
1920 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1921 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1922 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1923 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1924 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1925 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1926 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1930 case PIX_FMT_BGR4_BYTE:
1931 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
1932 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
1933 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
1934 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
1935 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
1936 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
1937 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
1938 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
1939 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
1940 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
1941 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
1944 switch (srcFormat) {
1946 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
1948 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
1949 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
1953 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
1955 c->lumConvertRange = lumRangeFromJpeg_c;
1956 c->chrConvertRange = chrRangeFromJpeg_c;
1958 c->lumConvertRange = lumRangeToJpeg_c;
1959 c->chrConvertRange = chrRangeToJpeg_c;
1963 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
1964 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
1965 c->needs_hcscale = 1;
1968 SwsFunc ff_getSwsFunc(SwsContext *c)
1970 sws_init_swScale_c(c);
1973 ff_sws_init_swScale_mmx(c);
1975 ff_sws_init_swScale_altivec(c);