2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
333 const int16_t **lumSrc, int lumFilterSize,
334 const int16_t *chrFilter, const int16_t **chrUSrc,
335 const int16_t **chrVSrc, int chrFilterSize,
336 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
337 uint8_t *vDest, uint8_t *aDest,
338 int dstW, int chrDstW)
340 enum PixelFormat dstFormat = c->dstFormat;
342 //FIXME Optimize (just quickly written not optimized..)
344 for (i=0; i<dstW; i++) {
347 for (j=0; j<lumFilterSize; j++)
348 val += lumSrc[j][i] * lumFilter[j];
350 dest[i]= av_clip_uint8(val>>19);
356 if (dstFormat == PIX_FMT_NV12)
357 for (i=0; i<chrDstW; i++) {
361 for (j=0; j<chrFilterSize; j++) {
362 u += chrUSrc[j][i] * chrFilter[j];
363 v += chrVSrc[j][i] * chrFilter[j];
366 uDest[2*i]= av_clip_uint8(u>>19);
367 uDest[2*i+1]= av_clip_uint8(v>>19);
370 for (i=0; i<chrDstW; i++) {
374 for (j=0; j<chrFilterSize; j++) {
375 u += chrUSrc[j][i] * chrFilter[j];
376 v += chrVSrc[j][i] * chrFilter[j];
379 uDest[2*i]= av_clip_uint8(v>>19);
380 uDest[2*i+1]= av_clip_uint8(u>>19);
384 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
385 for (i=0; i<(dstW>>1); i++) {\
391 int av_unused A1, A2;\
392 type av_unused *r, *b, *g;\
395 for (j=0; j<lumFilterSize; j++) {\
396 Y1 += lumSrc[j][i2] * lumFilter[j];\
397 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
399 for (j=0; j<chrFilterSize; j++) {\
400 U += chrUSrc[j][i] * chrFilter[j];\
401 V += chrVSrc[j][i] * chrFilter[j];\
410 for (j=0; j<lumFilterSize; j++) {\
411 A1 += alpSrc[j][i2 ] * lumFilter[j];\
412 A2 += alpSrc[j][i2+1] * lumFilter[j];\
417 if ((Y1|Y2|U|V)&256) {\
418 if (Y1>255) Y1=255; \
419 else if (Y1<0)Y1=0; \
420 if (Y2>255) Y2=255; \
421 else if (Y2<0)Y2=0; \
427 if (alpha && ((A1|A2)&256)) {\
428 A1=av_clip_uint8(A1);\
429 A2=av_clip_uint8(A2);\
432 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
433 for (i=0; i<dstW; i++) {\
441 for (j=0; j<lumFilterSize; j++) {\
442 Y += lumSrc[j][i ] * lumFilter[j];\
444 for (j=0; j<chrFilterSize; j++) {\
445 U += chrUSrc[j][i] * chrFilter[j];\
446 V += chrVSrc[j][i] * chrFilter[j];\
453 for (j=0; j<lumFilterSize; j++)\
454 A += alpSrc[j][i ] * lumFilter[j];\
457 A = av_clip_uint8(A);\
459 Y-= c->yuv2rgb_y_offset;\
460 Y*= c->yuv2rgb_y_coeff;\
462 R= Y + V*c->yuv2rgb_v2r_coeff;\
463 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
464 B= Y + U*c->yuv2rgb_u2b_coeff;\
465 if ((R|G|B)&(0xC0000000)) {\
466 if (R>=(256<<22)) R=(256<<22)-1; \
468 if (G>=(256<<22)) G=(256<<22)-1; \
470 if (B>=(256<<22)) B=(256<<22)-1; \
474 #define YSCALE_YUV_2_GRAY16_C \
475 for (i=0; i<(dstW>>1); i++) {\
484 for (j=0; j<lumFilterSize; j++) {\
485 Y1 += lumSrc[j][i2] * lumFilter[j];\
486 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
490 if ((Y1|Y2|U|V)&65536) {\
491 if (Y1>65535) Y1=65535; \
492 else if (Y1<0)Y1=0; \
493 if (Y2>65535) Y2=65535; \
494 else if (Y2<0)Y2=0; \
497 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
498 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
499 r = (type *)c->table_rV[V]; \
500 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
501 b = (type *)c->table_bU[U];
503 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
504 for (i=0; i<(dstW>>1); i++) { \
506 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
507 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
508 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
509 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
510 type av_unused *r, *b, *g; \
511 int av_unused A1, A2; \
513 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
514 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
517 #define YSCALE_YUV_2_GRAY16_2_C \
518 for (i=0; i<(dstW>>1); i++) { \
520 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
521 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
523 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
524 YSCALE_YUV_2_PACKED2_C(type,alpha)\
525 r = (type *)c->table_rV[V];\
526 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
527 b = (type *)c->table_bU[U];
529 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
530 for (i=0; i<(dstW>>1); i++) {\
532 int Y1= buf0[i2 ]>>7;\
533 int Y2= buf0[i2+1]>>7;\
534 int U= (ubuf1[i])>>7;\
535 int V= (vbuf1[i])>>7;\
536 type av_unused *r, *b, *g;\
537 int av_unused A1, A2;\
543 #define YSCALE_YUV_2_GRAY16_1_C \
544 for (i=0; i<(dstW>>1); i++) {\
546 int Y1= buf0[i2 ]<<1;\
547 int Y2= buf0[i2+1]<<1;
549 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
550 YSCALE_YUV_2_PACKED1_C(type,alpha)\
551 r = (type *)c->table_rV[V];\
552 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
553 b = (type *)c->table_bU[U];
555 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
556 for (i=0; i<(dstW>>1); i++) {\
558 int Y1= buf0[i2 ]>>7;\
559 int Y2= buf0[i2+1]>>7;\
560 int U= (ubuf0[i] + ubuf1[i])>>8;\
561 int V= (vbuf0[i] + vbuf1[i])>>8;\
562 type av_unused *r, *b, *g;\
563 int av_unused A1, A2;\
569 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
570 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
571 r = (type *)c->table_rV[V];\
572 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
573 b = (type *)c->table_bU[U];
575 #define YSCALE_YUV_2_MONO2_C \
576 const uint8_t * const d128=dither_8x8_220[y&7];\
577 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
578 for (i=0; i<dstW-7; i+=8) {\
580 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
581 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
582 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
583 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
584 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
585 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
586 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
587 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
588 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
592 #define YSCALE_YUV_2_MONOX_C \
593 const uint8_t * const d128=dither_8x8_220[y&7];\
594 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
596 for (i=0; i<dstW-1; i+=2) {\
601 for (j=0; j<lumFilterSize; j++) {\
602 Y1 += lumSrc[j][i] * lumFilter[j];\
603 Y2 += lumSrc[j][i+1] * lumFilter[j];\
613 acc+= acc + g[Y1+d128[(i+0)&7]];\
614 acc+= acc + g[Y2+d128[(i+1)&7]];\
616 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
621 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
622 switch(c->dstFormat) {\
623 case PIX_FMT_RGB48BE:\
624 case PIX_FMT_RGB48LE:\
626 ((uint8_t*)dest)[ 0]= r[Y1];\
627 ((uint8_t*)dest)[ 1]= r[Y1];\
628 ((uint8_t*)dest)[ 2]= g[Y1];\
629 ((uint8_t*)dest)[ 3]= g[Y1];\
630 ((uint8_t*)dest)[ 4]= b[Y1];\
631 ((uint8_t*)dest)[ 5]= b[Y1];\
632 ((uint8_t*)dest)[ 6]= r[Y2];\
633 ((uint8_t*)dest)[ 7]= r[Y2];\
634 ((uint8_t*)dest)[ 8]= g[Y2];\
635 ((uint8_t*)dest)[ 9]= g[Y2];\
636 ((uint8_t*)dest)[10]= b[Y2];\
637 ((uint8_t*)dest)[11]= b[Y2];\
641 case PIX_FMT_BGR48BE:\
642 case PIX_FMT_BGR48LE:\
644 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
645 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
646 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
647 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
648 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
649 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
656 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
657 func(uint32_t,needAlpha)\
658 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
659 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
662 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
664 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
665 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
669 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
670 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
678 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
679 func(uint32_t,needAlpha)\
680 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
681 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
684 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
686 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
687 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
691 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
692 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
699 ((uint8_t*)dest)[0]= r[Y1];\
700 ((uint8_t*)dest)[1]= g[Y1];\
701 ((uint8_t*)dest)[2]= b[Y1];\
702 ((uint8_t*)dest)[3]= r[Y2];\
703 ((uint8_t*)dest)[4]= g[Y2];\
704 ((uint8_t*)dest)[5]= b[Y2];\
710 ((uint8_t*)dest)[0]= b[Y1];\
711 ((uint8_t*)dest)[1]= g[Y1];\
712 ((uint8_t*)dest)[2]= r[Y1];\
713 ((uint8_t*)dest)[3]= b[Y2];\
714 ((uint8_t*)dest)[4]= g[Y2];\
715 ((uint8_t*)dest)[5]= r[Y2];\
719 case PIX_FMT_RGB565BE:\
720 case PIX_FMT_RGB565LE:\
721 case PIX_FMT_BGR565BE:\
722 case PIX_FMT_BGR565LE:\
724 const int dr1= dither_2x2_8[y&1 ][0];\
725 const int dg1= dither_2x2_4[y&1 ][0];\
726 const int db1= dither_2x2_8[(y&1)^1][0];\
727 const int dr2= dither_2x2_8[y&1 ][1];\
728 const int dg2= dither_2x2_4[y&1 ][1];\
729 const int db2= dither_2x2_8[(y&1)^1][1];\
731 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
732 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
736 case PIX_FMT_RGB555BE:\
737 case PIX_FMT_RGB555LE:\
738 case PIX_FMT_BGR555BE:\
739 case PIX_FMT_BGR555LE:\
741 const int dr1= dither_2x2_8[y&1 ][0];\
742 const int dg1= dither_2x2_8[y&1 ][1];\
743 const int db1= dither_2x2_8[(y&1)^1][0];\
744 const int dr2= dither_2x2_8[y&1 ][1];\
745 const int dg2= dither_2x2_8[y&1 ][0];\
746 const int db2= dither_2x2_8[(y&1)^1][1];\
748 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
749 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
753 case PIX_FMT_RGB444BE:\
754 case PIX_FMT_RGB444LE:\
755 case PIX_FMT_BGR444BE:\
756 case PIX_FMT_BGR444LE:\
758 const int dr1= dither_4x4_16[y&3 ][0];\
759 const int dg1= dither_4x4_16[y&3 ][1];\
760 const int db1= dither_4x4_16[(y&3)^3][0];\
761 const int dr2= dither_4x4_16[y&3 ][1];\
762 const int dg2= dither_4x4_16[y&3 ][0];\
763 const int db2= dither_4x4_16[(y&3)^3][1];\
765 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
766 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
773 const uint8_t * const d64= dither_8x8_73[y&7];\
774 const uint8_t * const d32= dither_8x8_32[y&7];\
776 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
777 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
784 const uint8_t * const d64= dither_8x8_73 [y&7];\
785 const uint8_t * const d128=dither_8x8_220[y&7];\
787 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
788 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
792 case PIX_FMT_RGB4_BYTE:\
793 case PIX_FMT_BGR4_BYTE:\
795 const uint8_t * const d64= dither_8x8_73 [y&7];\
796 const uint8_t * const d128=dither_8x8_220[y&7];\
798 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
799 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
803 case PIX_FMT_MONOBLACK:\
804 case PIX_FMT_MONOWHITE:\
809 case PIX_FMT_YUYV422:\
811 ((uint8_t*)dest)[2*i2+0]= Y1;\
812 ((uint8_t*)dest)[2*i2+1]= U;\
813 ((uint8_t*)dest)[2*i2+2]= Y2;\
814 ((uint8_t*)dest)[2*i2+3]= V;\
817 case PIX_FMT_UYVY422:\
819 ((uint8_t*)dest)[2*i2+0]= U;\
820 ((uint8_t*)dest)[2*i2+1]= Y1;\
821 ((uint8_t*)dest)[2*i2+2]= V;\
822 ((uint8_t*)dest)[2*i2+3]= Y2;\
825 case PIX_FMT_GRAY16BE:\
827 ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
828 ((uint8_t*)dest)[2*i2+1]= Y1;\
829 ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
830 ((uint8_t*)dest)[2*i2+3]= Y2;\
833 case PIX_FMT_GRAY16LE:\
835 ((uint8_t*)dest)[2*i2+0]= Y1;\
836 ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
837 ((uint8_t*)dest)[2*i2+2]= Y2;\
838 ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
843 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
844 const int16_t **lumSrc, int lumFilterSize,
845 const int16_t *chrFilter, const int16_t **chrUSrc,
846 const int16_t **chrVSrc, int chrFilterSize,
847 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
850 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
853 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
854 const int16_t **lumSrc, int lumFilterSize,
855 const int16_t *chrFilter, const int16_t **chrUSrc,
856 const int16_t **chrVSrc, int chrFilterSize,
857 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
860 int step= c->dstFormatBpp/8;
863 switch(c->dstFormat) {
871 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
872 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
873 dest[aidx]= needAlpha ? A : 255;
880 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
881 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
889 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
906 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
907 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
908 dest[aidx]= needAlpha ? A : 255;
915 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
916 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
924 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
939 static av_always_inline void fillPlane(uint8_t* plane, int stride,
940 int width, int height,
944 uint8_t *ptr = plane + stride*y;
945 for (i=0; i<height; i++) {
946 memset(ptr, val, width);
951 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
952 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
953 uint8_t *dst, const uint8_t *src, int width, \
957 for (i = 0; i < width; i++) { \
958 int compA = rfunc(&src[i*6+0]) >> 8; \
959 int compB = rfunc(&src[i*6+2]) >> 8; \
960 int compC = rfunc(&src[i*6+4]) >> 8; \
962 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
966 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
967 uint8_t *dstU, uint8_t *dstV, \
968 const uint8_t *src1, const uint8_t *src2, \
969 int width, uint32_t *unused) \
972 assert(src1==src2); \
973 for (i = 0; i < width; i++) { \
974 int compA = rfunc(&src1[6*i + 0]) >> 8; \
975 int compB = rfunc(&src1[6*i + 2]) >> 8; \
976 int compC = rfunc(&src1[6*i + 4]) >> 8; \
978 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
979 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
983 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
984 uint8_t *dstU, uint8_t *dstV, \
985 const uint8_t *src1, const uint8_t *src2, \
986 int width, uint32_t *unused) \
989 assert(src1==src2); \
990 for (i = 0; i < width; i++) { \
991 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
992 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
993 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
995 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
996 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
999 rgb48funcs(LE, AV_RL16, r, g, b);
1000 rgb48funcs(BE, AV_RB16, r, g, b);
1001 rgb48funcs(LE, AV_RL16, b, g, r);
1002 rgb48funcs(BE, AV_RB16, b, g, r);
1004 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1005 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1006 int width, uint32_t *unused)\
1009 for (i=0; i<width; i++) {\
1010 int b= (((const type*)src)[i]>>shb)&maskb;\
1011 int g= (((const type*)src)[i]>>shg)&maskg;\
1012 int r= (((const type*)src)[i]>>shr)&maskr;\
1014 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1018 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1019 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1020 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1021 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1022 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1023 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1024 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1025 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1027 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1030 for (i=0; i<width; i++) {
1035 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1038 for (i=0; i<width; i++) {
1043 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1044 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1045 const uint8_t *src, const uint8_t *dummy, \
1046 int width, uint32_t *unused)\
1049 for (i=0; i<width; i++) {\
1050 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1051 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1052 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1054 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1055 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1058 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1059 const uint8_t *src, const uint8_t *dummy, \
1060 int width, uint32_t *unused)\
1063 for (i=0; i<width; i++) {\
1064 int pix0= ((const type*)src)[2*i+0]>>shp;\
1065 int pix1= ((const type*)src)[2*i+1]>>shp;\
1066 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1067 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1068 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1069 g&= maskg|(2*maskg);\
1073 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1074 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1078 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1079 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1080 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1081 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1082 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1083 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1084 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1085 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1087 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1090 for (i=0; i<width; i++) {
1093 dst[i]= pal[d] & 0xFF;
1097 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1098 const uint8_t *src1, const uint8_t *src2,
1099 int width, uint32_t *pal)
1102 assert(src1 == src2);
1103 for (i=0; i<width; i++) {
1104 int p= pal[src1[i]];
1111 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1112 int width, uint32_t *unused)
1115 for (i=0; i<width/8; i++) {
1118 dst[8*i+j]= ((d>>(7-j))&1)*255;
1122 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1123 int width, uint32_t *unused)
1126 for (i=0; i<width/8; i++) {
1129 dst[8*i+j]= ((d>>(7-j))&1)*255;
1133 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1134 const int16_t *chrUSrc, const int16_t *chrVSrc,
1135 const int16_t *alpSrc,
1136 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1137 uint8_t *aDest, int dstW, int chrDstW)
1140 for (i=0; i<dstW; i++) {
1141 int val= (lumSrc[i]+64)>>7;
1142 dest[i]= av_clip_uint8(val);
1146 for (i=0; i<chrDstW; i++) {
1147 int u=(chrUSrc[i]+64)>>7;
1148 int v=(chrVSrc[i]+64)>>7;
1149 uDest[i]= av_clip_uint8(u);
1150 vDest[i]= av_clip_uint8(v);
1153 if (CONFIG_SWSCALE_ALPHA && aDest)
1154 for (i=0; i<dstW; i++) {
1155 int val= (alpSrc[i]+64)>>7;
1156 aDest[i]= av_clip_uint8(val);
1161 * vertical bilinear scale YV12 to RGB
1163 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1164 const uint16_t *buf1, const uint16_t *ubuf0,
1165 const uint16_t *ubuf1, const uint16_t *vbuf0,
1166 const uint16_t *vbuf1, const uint16_t *abuf0,
1167 const uint16_t *abuf1, uint8_t *dest, int dstW,
1168 int yalpha, int uvalpha, int y)
1170 int yalpha1=4095- yalpha;
1171 int uvalpha1=4095-uvalpha;
1174 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1178 * YV12 to RGB without scaling or interpolating
1180 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1181 const uint16_t *ubuf0, const uint16_t *ubuf1,
1182 const uint16_t *vbuf0, const uint16_t *vbuf1,
1183 const uint16_t *abuf0, uint8_t *dest, int dstW,
1184 int uvalpha, enum PixelFormat dstFormat,
1187 const int yalpha1=0;
1190 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1191 const int yalpha= 4096; //FIXME ...
1193 if (uvalpha < 2048) {
1194 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1196 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1200 //FIXME yuy2* can read up to 7 samples too much
1202 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1206 for (i=0; i<width; i++)
1210 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1211 const uint8_t *src2, int width, uint32_t *unused)
1214 for (i=0; i<width; i++) {
1215 dstU[i]= src1[4*i + 1];
1216 dstV[i]= src1[4*i + 3];
1218 assert(src1 == src2);
1221 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1222 const uint8_t *src2, int width, uint32_t *unused)
1225 for (i=0; i<width; i++) {
1226 dstU[i]= src1[2*i + 1];
1227 dstV[i]= src2[2*i + 1];
1231 /* This is almost identical to the previous, end exists only because
1232 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1233 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1237 for (i=0; i<width; i++)
1241 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1242 const uint8_t *src2, int width, uint32_t *unused)
1245 for (i=0; i<width; i++) {
1246 dstU[i]= src1[4*i + 0];
1247 dstV[i]= src1[4*i + 2];
1249 assert(src1 == src2);
1252 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1253 const uint8_t *src2, int width, uint32_t *unused)
1256 for (i=0; i<width; i++) {
1262 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1263 const uint8_t *src, int width)
1266 for (i = 0; i < width; i++) {
1267 dst1[i] = src[2*i+0];
1268 dst2[i] = src[2*i+1];
1272 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1273 const uint8_t *src1, const uint8_t *src2,
1274 int width, uint32_t *unused)
1276 nvXXtoUV_c(dstU, dstV, src1, width);
1279 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1280 const uint8_t *src1, const uint8_t *src2,
1281 int width, uint32_t *unused)
1283 nvXXtoUV_c(dstV, dstU, src1, width);
1286 // FIXME Maybe dither instead.
1287 #define YUV_NBPS(depth, endianness, rfunc) \
1288 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1289 const uint8_t *_srcU, const uint8_t *_srcV, \
1290 int width, uint32_t *unused) \
1293 const uint16_t *srcU = (const uint16_t*)_srcU; \
1294 const uint16_t *srcV = (const uint16_t*)_srcV; \
1295 for (i = 0; i < width; i++) { \
1296 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1297 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1301 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1302 int width, uint32_t *unused) \
1305 const uint16_t *srcY = (const uint16_t*)_srcY; \
1306 for (i = 0; i < width; i++) \
1307 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1310 YUV_NBPS( 9, LE, AV_RL16)
1311 YUV_NBPS( 9, BE, AV_RB16)
1312 YUV_NBPS(10, LE, AV_RL16)
1313 YUV_NBPS(10, BE, AV_RB16)
1315 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1316 int width, uint32_t *unused)
1319 for (i=0; i<width; i++) {
1324 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1328 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1329 const uint8_t *src2, int width, uint32_t *unused)
1332 for (i=0; i<width; i++) {
1333 int b= src1[3*i + 0];
1334 int g= src1[3*i + 1];
1335 int r= src1[3*i + 2];
1337 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1338 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1340 assert(src1 == src2);
1343 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1344 const uint8_t *src2, int width, uint32_t *unused)
1347 for (i=0; i<width; i++) {
1348 int b= src1[6*i + 0] + src1[6*i + 3];
1349 int g= src1[6*i + 1] + src1[6*i + 4];
1350 int r= src1[6*i + 2] + src1[6*i + 5];
1352 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1353 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1355 assert(src1 == src2);
1358 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1362 for (i=0; i<width; i++) {
1367 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1371 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1372 const uint8_t *src2, int width, uint32_t *unused)
1376 for (i=0; i<width; i++) {
1377 int r= src1[3*i + 0];
1378 int g= src1[3*i + 1];
1379 int b= src1[3*i + 2];
1381 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1382 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1386 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1387 const uint8_t *src2, int width, uint32_t *unused)
1391 for (i=0; i<width; i++) {
1392 int r= src1[6*i + 0] + src1[6*i + 3];
1393 int g= src1[6*i + 1] + src1[6*i + 4];
1394 int b= src1[6*i + 2] + src1[6*i + 5];
1396 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1397 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1402 // bilinear / bicubic scaling
1403 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1405 const int16_t *filter, const int16_t *filterPos,
1409 for (i=0; i<dstW; i++) {
1411 int srcPos= filterPos[i];
1413 for (j=0; j<filterSize; j++) {
1414 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1416 //filter += hFilterSize;
1417 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1422 //FIXME all pal and rgb srcFormats could do this convertion as well
1423 //FIXME all scalers more complex than bilinear could do half of this transform
1424 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1427 for (i = 0; i < width; i++) {
1428 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1429 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1432 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1435 for (i = 0; i < width; i++) {
1436 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1437 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1440 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1443 for (i = 0; i < width; i++)
1444 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1446 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1449 for (i = 0; i < width; i++)
1450 dst[i] = (dst[i]*14071 + 33561947)>>14;
1453 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1454 const uint8_t *src, int srcW, int xInc)
1457 unsigned int xpos=0;
1458 for (i=0;i<dstWidth;i++) {
1459 register unsigned int xx=xpos>>16;
1460 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1461 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1466 // *** horizontal scale Y line to temp buffer
1467 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1468 const uint8_t *src, int srcW, int xInc,
1469 const int16_t *hLumFilter,
1470 const int16_t *hLumFilterPos, int hLumFilterSize,
1471 uint8_t *formatConvBuffer,
1472 uint32_t *pal, int isAlpha)
1474 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1475 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1478 toYV12(formatConvBuffer, src, srcW, pal);
1479 src= formatConvBuffer;
1482 if (!c->hyscale_fast) {
1483 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1484 } else { // fast bilinear upscale / crap downscale
1485 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1489 convertRange(dst, dstWidth);
1492 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1493 int dstWidth, const uint8_t *src1,
1494 const uint8_t *src2, int srcW, int xInc)
1497 unsigned int xpos=0;
1498 for (i=0;i<dstWidth;i++) {
1499 register unsigned int xx=xpos>>16;
1500 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1501 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1502 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1507 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1508 const uint8_t *src1, const uint8_t *src2,
1509 int srcW, int xInc, const int16_t *hChrFilter,
1510 const int16_t *hChrFilterPos, int hChrFilterSize,
1511 uint8_t *formatConvBuffer, uint32_t *pal)
1514 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1515 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1516 src1= formatConvBuffer;
1520 if (!c->hcscale_fast) {
1521 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1522 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1523 } else { // fast bilinear upscale / crap downscale
1524 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1527 if (c->chrConvertRange)
1528 c->chrConvertRange(dst1, dst2, dstWidth);
1531 static av_always_inline void
1532 find_c_packed_planar_out_funcs(SwsContext *c,
1533 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1534 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1535 yuv2packedX_fn *yuv2packedX)
1537 enum PixelFormat dstFormat = c->dstFormat;
1539 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1540 *yuv2yuvX = yuv2nv12X_c;
1541 } else if (is16BPS(dstFormat)) {
1542 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1543 } else if (is9_OR_10BPS(dstFormat)) {
1544 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1545 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1547 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1550 *yuv2yuv1 = yuv2yuv1_c;
1551 *yuv2yuvX = yuv2yuvX_c;
1553 if(c->flags & SWS_FULL_CHR_H_INT) {
1554 *yuv2packedX = yuv2rgbX_c_full;
1556 *yuv2packed1 = yuv2packed1_c;
1557 *yuv2packed2 = yuv2packed2_c;
1558 *yuv2packedX = yuv2packedX_c;
1562 #define DEBUG_SWSCALE_BUFFERS 0
1563 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1565 static int swScale(SwsContext *c, const uint8_t* src[],
1566 int srcStride[], int srcSliceY,
1567 int srcSliceH, uint8_t* dst[], int dstStride[])
1569 /* load a few things into local vars to make the code more readable? and faster */
1570 const int srcW= c->srcW;
1571 const int dstW= c->dstW;
1572 const int dstH= c->dstH;
1573 const int chrDstW= c->chrDstW;
1574 const int chrSrcW= c->chrSrcW;
1575 const int lumXInc= c->lumXInc;
1576 const int chrXInc= c->chrXInc;
1577 const enum PixelFormat dstFormat= c->dstFormat;
1578 const int flags= c->flags;
1579 int16_t *vLumFilterPos= c->vLumFilterPos;
1580 int16_t *vChrFilterPos= c->vChrFilterPos;
1581 int16_t *hLumFilterPos= c->hLumFilterPos;
1582 int16_t *hChrFilterPos= c->hChrFilterPos;
1583 int16_t *vLumFilter= c->vLumFilter;
1584 int16_t *vChrFilter= c->vChrFilter;
1585 int16_t *hLumFilter= c->hLumFilter;
1586 int16_t *hChrFilter= c->hChrFilter;
1587 int32_t *lumMmxFilter= c->lumMmxFilter;
1588 int32_t *chrMmxFilter= c->chrMmxFilter;
1589 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1590 const int vLumFilterSize= c->vLumFilterSize;
1591 const int vChrFilterSize= c->vChrFilterSize;
1592 const int hLumFilterSize= c->hLumFilterSize;
1593 const int hChrFilterSize= c->hChrFilterSize;
1594 int16_t **lumPixBuf= c->lumPixBuf;
1595 int16_t **chrUPixBuf= c->chrUPixBuf;
1596 int16_t **chrVPixBuf= c->chrVPixBuf;
1597 int16_t **alpPixBuf= c->alpPixBuf;
1598 const int vLumBufSize= c->vLumBufSize;
1599 const int vChrBufSize= c->vChrBufSize;
1600 uint8_t *formatConvBuffer= c->formatConvBuffer;
1601 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1602 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1604 uint32_t *pal=c->pal_yuv;
1605 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1606 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1607 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1608 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1609 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1611 /* vars which will change and which we need to store back in the context */
1613 int lumBufIndex= c->lumBufIndex;
1614 int chrBufIndex= c->chrBufIndex;
1615 int lastInLumBuf= c->lastInLumBuf;
1616 int lastInChrBuf= c->lastInChrBuf;
1618 if (isPacked(c->srcFormat)) {
1626 srcStride[3]= srcStride[0];
1628 srcStride[1]<<= c->vChrDrop;
1629 srcStride[2]<<= c->vChrDrop;
1631 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1632 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1633 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1634 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1635 srcSliceY, srcSliceH, dstY, dstH);
1636 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1637 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1639 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1640 static int warnedAlready=0; //FIXME move this into the context perhaps
1641 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1642 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1643 " ->cannot do aligned memory accesses anymore\n");
1648 /* Note the user might start scaling the picture in the middle so this
1649 will not get executed. This is not really intended but works
1650 currently, so people might do it. */
1651 if (srcSliceY ==0) {
1661 for (;dstY < dstH; dstY++) {
1662 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1663 const int chrDstY= dstY>>c->chrDstVSubSample;
1664 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1665 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1666 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1668 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1669 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1670 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1671 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1672 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1673 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1676 //handle holes (FAST_BILINEAR & weird filters)
1677 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1678 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1679 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1680 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1682 DEBUG_BUFFERS("dstY: %d\n", dstY);
1683 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1684 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1685 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1686 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1688 // Do we have enough lines in this slice to output the dstY line
1689 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1691 if (!enough_lines) {
1692 lastLumSrcY = srcSliceY + srcSliceH - 1;
1693 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1694 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1695 lastLumSrcY, lastChrSrcY);
1698 //Do horizontal scaling
1699 while(lastInLumBuf < lastLumSrcY) {
1700 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1701 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1703 assert(lumBufIndex < 2*vLumBufSize);
1704 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1705 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1706 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1707 hLumFilter, hLumFilterPos, hLumFilterSize,
1710 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1711 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1712 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1716 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1717 lumBufIndex, lastInLumBuf);
1719 while(lastInChrBuf < lastChrSrcY) {
1720 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1721 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1723 assert(chrBufIndex < 2*vChrBufSize);
1724 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1725 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1726 //FIXME replace parameters through context struct (some at least)
1728 if (c->needs_hcscale)
1729 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1730 chrDstW, src1, src2, chrSrcW, chrXInc,
1731 hChrFilter, hChrFilterPos, hChrFilterSize,
1732 formatConvBuffer, pal);
1734 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1735 chrBufIndex, lastInChrBuf);
1737 //wrap buf index around to stay inside the ring buffer
1738 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1739 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1741 break; //we can't output a dstY line so let's try with the next slice
1744 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1746 if (dstY >= dstH-2) {
1747 // hmm looks like we can't use MMX here without overwriting this array's tail
1748 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1749 &yuv2packed1, &yuv2packed2,
1754 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1755 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1756 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1757 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1758 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1759 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1760 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1761 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1762 const int16_t *lumBuf = lumSrcPtr[0];
1763 const int16_t *chrUBuf= chrUSrcPtr[0];
1764 const int16_t *chrVBuf= chrVSrcPtr[0];
1765 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1766 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1767 uDest, vDest, aDest, dstW, chrDstW);
1768 } else { //General YV12
1770 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1771 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1772 chrVSrcPtr, vChrFilterSize,
1773 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1776 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1777 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1778 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1779 int chrAlpha= vChrFilter[2*dstY+1];
1780 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1781 *chrVSrcPtr, *(chrVSrcPtr+1),
1782 alpPixBuf ? *alpSrcPtr : NULL,
1783 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1784 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1785 int lumAlpha= vLumFilter[2*dstY+1];
1786 int chrAlpha= vChrFilter[2*dstY+1];
1788 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1790 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1791 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1792 *chrVSrcPtr, *(chrVSrcPtr+1),
1793 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1794 dest, dstW, lumAlpha, chrAlpha, dstY);
1795 } else { //general RGB
1797 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1798 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1799 alpSrcPtr, dest, dstW, dstY);
1805 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1806 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1809 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1810 __asm__ volatile("sfence":::"memory");
1814 /* store changed local vars back in the context */
1816 c->lumBufIndex= lumBufIndex;
1817 c->chrBufIndex= chrBufIndex;
1818 c->lastInLumBuf= lastInLumBuf;
1819 c->lastInChrBuf= lastInChrBuf;
1821 return dstY - lastDstY;
1824 static av_cold void sws_init_swScale_c(SwsContext *c)
1826 enum PixelFormat srcFormat = c->srcFormat;
1828 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1829 &c->yuv2packed1, &c->yuv2packed2,
1832 c->hScale = hScale_c;
1834 if (c->flags & SWS_FAST_BILINEAR) {
1835 c->hyscale_fast = hyscale_fast_c;
1836 c->hcscale_fast = hcscale_fast_c;
1839 c->chrToYV12 = NULL;
1841 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1842 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1843 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1844 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1848 case PIX_FMT_BGR4_BYTE:
1849 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1850 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1851 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1852 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1853 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1854 case PIX_FMT_YUV420P16BE:
1855 case PIX_FMT_YUV422P16BE:
1856 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1857 case PIX_FMT_YUV420P16LE:
1858 case PIX_FMT_YUV422P16LE:
1859 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1861 if (c->chrSrcHSubSample) {
1863 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1864 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1865 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1866 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1867 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1868 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1869 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1870 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1871 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1872 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1873 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1874 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1875 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1876 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1880 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1881 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1882 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1883 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1884 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1885 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1886 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1887 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1888 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1889 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1890 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1891 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1892 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1893 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1897 c->lumToYV12 = NULL;
1898 c->alpToYV12 = NULL;
1899 switch (srcFormat) {
1900 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1901 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1902 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1903 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1904 case PIX_FMT_YUYV422 :
1905 case PIX_FMT_YUV420P16BE:
1906 case PIX_FMT_YUV422P16BE:
1907 case PIX_FMT_YUV444P16BE:
1908 case PIX_FMT_Y400A :
1909 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1910 case PIX_FMT_UYVY422 :
1911 case PIX_FMT_YUV420P16LE:
1912 case PIX_FMT_YUV422P16LE:
1913 case PIX_FMT_YUV444P16LE:
1914 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1915 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1916 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1917 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1918 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1919 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1920 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1924 case PIX_FMT_BGR4_BYTE:
1925 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
1926 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
1927 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
1928 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
1929 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
1930 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
1931 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
1932 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
1933 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
1934 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
1935 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
1938 switch (srcFormat) {
1940 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
1942 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
1943 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
1947 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
1949 c->lumConvertRange = lumRangeFromJpeg_c;
1950 c->chrConvertRange = chrRangeFromJpeg_c;
1952 c->lumConvertRange = lumRangeToJpeg_c;
1953 c->chrConvertRange = chrRangeToJpeg_c;
1957 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
1958 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
1959 c->needs_hcscale = 1;
1962 SwsFunc ff_getSwsFunc(SwsContext *c)
1964 sws_init_swScale_c(c);
1967 ff_sws_init_swScale_mmx(c);
1969 ff_sws_init_swScale_altivec(c);