2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
333 const int16_t **lumSrc, int lumFilterSize,
334 const int16_t *chrFilter, const int16_t **chrUSrc,
335 const int16_t **chrVSrc, int chrFilterSize,
336 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
337 uint8_t *vDest, uint8_t *aDest,
338 int dstW, int chrDstW)
340 enum PixelFormat dstFormat = c->dstFormat;
342 //FIXME Optimize (just quickly written not optimized..)
344 for (i=0; i<dstW; i++) {
347 for (j=0; j<lumFilterSize; j++)
348 val += lumSrc[j][i] * lumFilter[j];
350 dest[i]= av_clip_uint8(val>>19);
356 if (dstFormat == PIX_FMT_NV12)
357 for (i=0; i<chrDstW; i++) {
361 for (j=0; j<chrFilterSize; j++) {
362 u += chrUSrc[j][i] * chrFilter[j];
363 v += chrVSrc[j][i] * chrFilter[j];
366 uDest[2*i]= av_clip_uint8(u>>19);
367 uDest[2*i+1]= av_clip_uint8(v>>19);
370 for (i=0; i<chrDstW; i++) {
374 for (j=0; j<chrFilterSize; j++) {
375 u += chrUSrc[j][i] * chrFilter[j];
376 v += chrVSrc[j][i] * chrFilter[j];
379 uDest[2*i]= av_clip_uint8(v>>19);
380 uDest[2*i+1]= av_clip_uint8(u>>19);
384 static av_always_inline void
385 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
386 const int16_t **lumSrc, int lumFilterSize,
387 const int16_t *chrFilter, const int16_t **chrUSrc,
388 const int16_t **chrVSrc, int chrFilterSize,
389 const int16_t **alpSrc, uint8_t *dest, int dstW,
390 int y, enum PixelFormat target)
394 #define output_pixel(pos, val) \
395 if (target == PIX_FMT_GRAY16BE) { \
400 for (i = 0; i < (dstW >> 1); i++) {
404 const int i2 = 2 * i;
406 for (j = 0; j < lumFilterSize; j++) {
407 Y1 += lumSrc[j][i2] * lumFilter[j];
408 Y2 += lumSrc[j][i2+1] * lumFilter[j];
412 if ((Y1 | Y2) & 0x10000) {
413 Y1 = av_clip_uint16(Y1);
414 Y2 = av_clip_uint16(Y2);
416 output_pixel(&dest[2 * i2 + 0], Y1);
417 output_pixel(&dest[2 * i2 + 2], Y2);
421 static av_always_inline void
422 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
423 const uint16_t *buf1, const uint16_t *ubuf0,
424 const uint16_t *ubuf1, const uint16_t *vbuf0,
425 const uint16_t *vbuf1, const uint16_t *abuf0,
426 const uint16_t *abuf1, uint8_t *dest, int dstW,
427 int yalpha, int uvalpha, int y,
428 enum PixelFormat target)
430 int yalpha1 = 4095 - yalpha; \
433 for (i = 0; i < (dstW >> 1); i++) {
434 const int i2 = 2 * i;
435 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
436 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
438 output_pixel(&dest[2 * i2 + 0], Y1);
439 output_pixel(&dest[2 * i2 + 2], Y2);
443 static av_always_inline void
444 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
445 const uint16_t *ubuf0, const uint16_t *ubuf1,
446 const uint16_t *vbuf0, const uint16_t *vbuf1,
447 const uint16_t *abuf0, uint8_t *dest, int dstW,
448 int uvalpha, enum PixelFormat dstFormat,
449 int flags, int y, enum PixelFormat target)
453 for (i = 0; i < (dstW >> 1); i++) {
454 const int i2 = 2 * i;
455 int Y1 = buf0[i2 ] << 1;
456 int Y2 = buf0[i2+1] << 1;
458 output_pixel(&dest[2 * i2 + 0], Y1);
459 output_pixel(&dest[2 * i2 + 2], Y2);
464 #define YUV2PACKEDWRAPPER(name, ext, fmt) \
465 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
466 const int16_t **lumSrc, int lumFilterSize, \
467 const int16_t *chrFilter, const int16_t **chrUSrc, \
468 const int16_t **chrVSrc, int chrFilterSize, \
469 const int16_t **alpSrc, uint8_t *dest, int dstW, \
472 name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
473 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
474 alpSrc, dest, dstW, y, fmt); \
477 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
478 const uint16_t *buf1, const uint16_t *ubuf0, \
479 const uint16_t *ubuf1, const uint16_t *vbuf0, \
480 const uint16_t *vbuf1, const uint16_t *abuf0, \
481 const uint16_t *abuf1, uint8_t *dest, int dstW, \
482 int yalpha, int uvalpha, int y) \
484 name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
485 vbuf0, vbuf1, abuf0, abuf1, \
486 dest, dstW, yalpha, uvalpha, y, fmt); \
489 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
490 const uint16_t *ubuf0, const uint16_t *ubuf1, \
491 const uint16_t *vbuf0, const uint16_t *vbuf1, \
492 const uint16_t *abuf0, uint8_t *dest, int dstW, \
493 int uvalpha, enum PixelFormat dstFormat, \
496 name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
497 vbuf1, abuf0, dest, dstW, uvalpha, \
498 dstFormat, flags, y, fmt); \
501 YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
502 YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
504 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
505 for (i=0; i<(dstW>>1); i++) {\
511 int av_unused A1, A2;\
512 type av_unused *r, *b, *g;\
515 for (j=0; j<lumFilterSize; j++) {\
516 Y1 += lumSrc[j][i2] * lumFilter[j];\
517 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
519 for (j=0; j<chrFilterSize; j++) {\
520 U += chrUSrc[j][i] * chrFilter[j];\
521 V += chrVSrc[j][i] * chrFilter[j];\
527 if ((Y1|Y2|U|V)&0x100) {\
528 Y1 = av_clip_uint8(Y1); \
529 Y2 = av_clip_uint8(Y2); \
530 U = av_clip_uint8(U); \
531 V = av_clip_uint8(V); \
536 for (j=0; j<lumFilterSize; j++) {\
537 A1 += alpSrc[j][i2 ] * lumFilter[j];\
538 A2 += alpSrc[j][i2+1] * lumFilter[j];\
542 if ((A1|A2)&0x100) {\
543 A1 = av_clip_uint8(A1); \
544 A2 = av_clip_uint8(A2); \
548 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
549 for (i=0; i<dstW; i++) {\
557 for (j=0; j<lumFilterSize; j++) {\
558 Y += lumSrc[j][i ] * lumFilter[j];\
560 for (j=0; j<chrFilterSize; j++) {\
561 U += chrUSrc[j][i] * chrFilter[j];\
562 V += chrVSrc[j][i] * chrFilter[j];\
569 for (j=0; j<lumFilterSize; j++)\
570 A += alpSrc[j][i ] * lumFilter[j];\
573 A = av_clip_uint8(A);\
575 Y-= c->yuv2rgb_y_offset;\
576 Y*= c->yuv2rgb_y_coeff;\
578 R= Y + V*c->yuv2rgb_v2r_coeff;\
579 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
580 B= Y + U*c->yuv2rgb_u2b_coeff;\
581 if ((R|G|B)&(0xC0000000)) {\
582 R = av_clip_uintp2(R, 30); \
583 G = av_clip_uintp2(G, 30); \
584 B = av_clip_uintp2(B, 30); \
587 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
588 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
589 r = (type *)c->table_rV[V]; \
590 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
591 b = (type *)c->table_bU[U];
593 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
594 for (i=0; i<(dstW>>1); i++) { \
596 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
597 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
598 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
599 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
600 type av_unused *r, *b, *g; \
601 int av_unused A1, A2; \
603 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
604 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
607 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
608 YSCALE_YUV_2_PACKED2_C(type,alpha)\
609 r = (type *)c->table_rV[V];\
610 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
611 b = (type *)c->table_bU[U];
613 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
614 for (i=0; i<(dstW>>1); i++) {\
616 int Y1= buf0[i2 ]>>7;\
617 int Y2= buf0[i2+1]>>7;\
618 int U= (ubuf1[i])>>7;\
619 int V= (vbuf1[i])>>7;\
620 type av_unused *r, *b, *g;\
621 int av_unused A1, A2;\
627 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
628 YSCALE_YUV_2_PACKED1_C(type,alpha)\
629 r = (type *)c->table_rV[V];\
630 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
631 b = (type *)c->table_bU[U];
633 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
634 for (i=0; i<(dstW>>1); i++) {\
636 int Y1= buf0[i2 ]>>7;\
637 int Y2= buf0[i2+1]>>7;\
638 int U= (ubuf0[i] + ubuf1[i])>>8;\
639 int V= (vbuf0[i] + vbuf1[i])>>8;\
640 type av_unused *r, *b, *g;\
641 int av_unused A1, A2;\
647 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
648 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
649 r = (type *)c->table_rV[V];\
650 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
651 b = (type *)c->table_bU[U];
653 #define YSCALE_YUV_2_MONO2_C \
654 const uint8_t * const d128=dither_8x8_220[y&7];\
655 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
656 for (i=0; i<dstW-7; i+=8) {\
658 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
659 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
660 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
661 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
662 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
663 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
664 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
665 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
666 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
670 #define YSCALE_YUV_2_MONOX_C \
671 const uint8_t * const d128=dither_8x8_220[y&7];\
672 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
674 for (i=0; i<dstW-1; i+=2) {\
679 for (j=0; j<lumFilterSize; j++) {\
680 Y1 += lumSrc[j][i] * lumFilter[j];\
681 Y2 += lumSrc[j][i+1] * lumFilter[j];\
685 if ((Y1|Y2)&0x100) {\
686 Y1 = av_clip_uint8(Y1); \
687 Y2 = av_clip_uint8(Y2); \
689 acc+= acc + g[Y1+d128[(i+0)&7]];\
690 acc+= acc + g[Y2+d128[(i+1)&7]];\
692 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
697 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
698 switch(c->dstFormat) {\
699 case PIX_FMT_RGB48BE:\
700 case PIX_FMT_RGB48LE:\
702 ((uint8_t*)dest)[ 0]= r[Y1];\
703 ((uint8_t*)dest)[ 1]= r[Y1];\
704 ((uint8_t*)dest)[ 2]= g[Y1];\
705 ((uint8_t*)dest)[ 3]= g[Y1];\
706 ((uint8_t*)dest)[ 4]= b[Y1];\
707 ((uint8_t*)dest)[ 5]= b[Y1];\
708 ((uint8_t*)dest)[ 6]= r[Y2];\
709 ((uint8_t*)dest)[ 7]= r[Y2];\
710 ((uint8_t*)dest)[ 8]= g[Y2];\
711 ((uint8_t*)dest)[ 9]= g[Y2];\
712 ((uint8_t*)dest)[10]= b[Y2];\
713 ((uint8_t*)dest)[11]= b[Y2];\
717 case PIX_FMT_BGR48BE:\
718 case PIX_FMT_BGR48LE:\
720 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
721 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
722 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
723 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
724 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
725 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
732 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
733 func(uint32_t,needAlpha)\
734 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
735 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
738 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
740 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
741 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
745 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
746 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
754 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
755 func(uint32_t,needAlpha)\
756 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
757 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
760 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
762 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
763 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
767 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
768 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
775 ((uint8_t*)dest)[0]= r[Y1];\
776 ((uint8_t*)dest)[1]= g[Y1];\
777 ((uint8_t*)dest)[2]= b[Y1];\
778 ((uint8_t*)dest)[3]= r[Y2];\
779 ((uint8_t*)dest)[4]= g[Y2];\
780 ((uint8_t*)dest)[5]= b[Y2];\
786 ((uint8_t*)dest)[0]= b[Y1];\
787 ((uint8_t*)dest)[1]= g[Y1];\
788 ((uint8_t*)dest)[2]= r[Y1];\
789 ((uint8_t*)dest)[3]= b[Y2];\
790 ((uint8_t*)dest)[4]= g[Y2];\
791 ((uint8_t*)dest)[5]= r[Y2];\
795 case PIX_FMT_RGB565BE:\
796 case PIX_FMT_RGB565LE:\
797 case PIX_FMT_BGR565BE:\
798 case PIX_FMT_BGR565LE:\
800 const int dr1= dither_2x2_8[y&1 ][0];\
801 const int dg1= dither_2x2_4[y&1 ][0];\
802 const int db1= dither_2x2_8[(y&1)^1][0];\
803 const int dr2= dither_2x2_8[y&1 ][1];\
804 const int dg2= dither_2x2_4[y&1 ][1];\
805 const int db2= dither_2x2_8[(y&1)^1][1];\
807 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
808 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
812 case PIX_FMT_RGB555BE:\
813 case PIX_FMT_RGB555LE:\
814 case PIX_FMT_BGR555BE:\
815 case PIX_FMT_BGR555LE:\
817 const int dr1= dither_2x2_8[y&1 ][0];\
818 const int dg1= dither_2x2_8[y&1 ][1];\
819 const int db1= dither_2x2_8[(y&1)^1][0];\
820 const int dr2= dither_2x2_8[y&1 ][1];\
821 const int dg2= dither_2x2_8[y&1 ][0];\
822 const int db2= dither_2x2_8[(y&1)^1][1];\
824 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
825 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
829 case PIX_FMT_RGB444BE:\
830 case PIX_FMT_RGB444LE:\
831 case PIX_FMT_BGR444BE:\
832 case PIX_FMT_BGR444LE:\
834 const int dr1= dither_4x4_16[y&3 ][0];\
835 const int dg1= dither_4x4_16[y&3 ][1];\
836 const int db1= dither_4x4_16[(y&3)^3][0];\
837 const int dr2= dither_4x4_16[y&3 ][1];\
838 const int dg2= dither_4x4_16[y&3 ][0];\
839 const int db2= dither_4x4_16[(y&3)^3][1];\
841 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
842 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
849 const uint8_t * const d64= dither_8x8_73[y&7];\
850 const uint8_t * const d32= dither_8x8_32[y&7];\
852 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
853 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
860 const uint8_t * const d64= dither_8x8_73 [y&7];\
861 const uint8_t * const d128=dither_8x8_220[y&7];\
863 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
864 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
868 case PIX_FMT_RGB4_BYTE:\
869 case PIX_FMT_BGR4_BYTE:\
871 const uint8_t * const d64= dither_8x8_73 [y&7];\
872 const uint8_t * const d128=dither_8x8_220[y&7];\
874 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
875 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
879 case PIX_FMT_MONOBLACK:\
880 case PIX_FMT_MONOWHITE:\
885 case PIX_FMT_YUYV422:\
887 ((uint8_t*)dest)[2*i2+0]= Y1;\
888 ((uint8_t*)dest)[2*i2+1]= U;\
889 ((uint8_t*)dest)[2*i2+2]= Y2;\
890 ((uint8_t*)dest)[2*i2+3]= V;\
893 case PIX_FMT_UYVY422:\
895 ((uint8_t*)dest)[2*i2+0]= U;\
896 ((uint8_t*)dest)[2*i2+1]= Y1;\
897 ((uint8_t*)dest)[2*i2+2]= V;\
898 ((uint8_t*)dest)[2*i2+3]= Y2;\
903 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
904 const int16_t **lumSrc, int lumFilterSize,
905 const int16_t *chrFilter, const int16_t **chrUSrc,
906 const int16_t **chrVSrc, int chrFilterSize,
907 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
910 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
913 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
914 const int16_t **lumSrc, int lumFilterSize,
915 const int16_t *chrFilter, const int16_t **chrUSrc,
916 const int16_t **chrVSrc, int chrFilterSize,
917 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
920 int step= c->dstFormatBpp/8;
923 switch(c->dstFormat) {
931 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
932 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
933 dest[aidx]= needAlpha ? A : 255;
940 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
941 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
949 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
966 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
967 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
968 dest[aidx]= needAlpha ? A : 255;
975 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
976 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
984 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
999 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1000 int width, int height,
1004 uint8_t *ptr = plane + stride*y;
1005 for (i=0; i<height; i++) {
1006 memset(ptr, val, width);
1011 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
1012 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
1013 uint8_t *dst, const uint8_t *src, int width, \
1017 for (i = 0; i < width; i++) { \
1018 int compA = rfunc(&src[i*6+0]) >> 8; \
1019 int compB = rfunc(&src[i*6+2]) >> 8; \
1020 int compC = rfunc(&src[i*6+4]) >> 8; \
1022 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1026 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
1027 uint8_t *dstU, uint8_t *dstV, \
1028 const uint8_t *src1, const uint8_t *src2, \
1029 int width, uint32_t *unused) \
1032 assert(src1==src2); \
1033 for (i = 0; i < width; i++) { \
1034 int compA = rfunc(&src1[6*i + 0]) >> 8; \
1035 int compB = rfunc(&src1[6*i + 2]) >> 8; \
1036 int compC = rfunc(&src1[6*i + 4]) >> 8; \
1038 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1039 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1043 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
1044 uint8_t *dstU, uint8_t *dstV, \
1045 const uint8_t *src1, const uint8_t *src2, \
1046 int width, uint32_t *unused) \
1049 assert(src1==src2); \
1050 for (i = 0; i < width; i++) { \
1051 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
1052 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
1053 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1055 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1056 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1059 rgb48funcs(LE, AV_RL16, r, g, b);
1060 rgb48funcs(BE, AV_RB16, r, g, b);
1061 rgb48funcs(LE, AV_RL16, b, g, r);
1062 rgb48funcs(BE, AV_RB16, b, g, r);
1064 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1065 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1066 int width, uint32_t *unused)\
1069 for (i=0; i<width; i++) {\
1070 int b= (((const type*)src)[i]>>shb)&maskb;\
1071 int g= (((const type*)src)[i]>>shg)&maskg;\
1072 int r= (((const type*)src)[i]>>shr)&maskr;\
1074 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1078 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1079 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1080 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1081 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1082 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1083 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1084 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1085 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1087 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1090 for (i=0; i<width; i++) {
1095 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1098 for (i=0; i<width; i++) {
1103 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1104 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1105 const uint8_t *src, const uint8_t *dummy, \
1106 int width, uint32_t *unused)\
1109 for (i=0; i<width; i++) {\
1110 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1111 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1112 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1114 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1115 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1118 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1119 const uint8_t *src, const uint8_t *dummy, \
1120 int width, uint32_t *unused)\
1123 for (i=0; i<width; i++) {\
1124 int pix0= ((const type*)src)[2*i+0]>>shp;\
1125 int pix1= ((const type*)src)[2*i+1]>>shp;\
1126 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1127 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1128 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1129 g&= maskg|(2*maskg);\
1133 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1134 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1138 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1139 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1140 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1141 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1142 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1143 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1144 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1145 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1147 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1150 for (i=0; i<width; i++) {
1153 dst[i]= pal[d] & 0xFF;
1157 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1158 const uint8_t *src1, const uint8_t *src2,
1159 int width, uint32_t *pal)
1162 assert(src1 == src2);
1163 for (i=0; i<width; i++) {
1164 int p= pal[src1[i]];
1171 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1172 int width, uint32_t *unused)
1175 for (i=0; i<width/8; i++) {
1178 dst[8*i+j]= ((d>>(7-j))&1)*255;
1182 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1183 int width, uint32_t *unused)
1186 for (i=0; i<width/8; i++) {
1189 dst[8*i+j]= ((d>>(7-j))&1)*255;
1193 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1194 const int16_t *chrUSrc, const int16_t *chrVSrc,
1195 const int16_t *alpSrc,
1196 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1197 uint8_t *aDest, int dstW, int chrDstW)
1200 for (i=0; i<dstW; i++) {
1201 int val= (lumSrc[i]+64)>>7;
1202 dest[i]= av_clip_uint8(val);
1206 for (i=0; i<chrDstW; i++) {
1207 int u=(chrUSrc[i]+64)>>7;
1208 int v=(chrVSrc[i]+64)>>7;
1209 uDest[i]= av_clip_uint8(u);
1210 vDest[i]= av_clip_uint8(v);
1213 if (CONFIG_SWSCALE_ALPHA && aDest)
1214 for (i=0; i<dstW; i++) {
1215 int val= (alpSrc[i]+64)>>7;
1216 aDest[i]= av_clip_uint8(val);
1221 * vertical bilinear scale YV12 to RGB
1223 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1224 const uint16_t *buf1, const uint16_t *ubuf0,
1225 const uint16_t *ubuf1, const uint16_t *vbuf0,
1226 const uint16_t *vbuf1, const uint16_t *abuf0,
1227 const uint16_t *abuf1, uint8_t *dest, int dstW,
1228 int yalpha, int uvalpha, int y)
1230 int yalpha1=4095- yalpha;
1231 int uvalpha1=4095-uvalpha;
1234 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
1238 * YV12 to RGB without scaling or interpolating
1240 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1241 const uint16_t *ubuf0, const uint16_t *ubuf1,
1242 const uint16_t *vbuf0, const uint16_t *vbuf1,
1243 const uint16_t *abuf0, uint8_t *dest, int dstW,
1244 int uvalpha, enum PixelFormat dstFormat,
1247 const int yalpha1=0;
1250 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1251 const int yalpha= 4096; //FIXME ...
1253 if (uvalpha < 2048) {
1254 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
1256 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
1260 //FIXME yuy2* can read up to 7 samples too much
1262 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1266 for (i=0; i<width; i++)
1270 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1271 const uint8_t *src2, int width, uint32_t *unused)
1274 for (i=0; i<width; i++) {
1275 dstU[i]= src1[4*i + 1];
1276 dstV[i]= src1[4*i + 3];
1278 assert(src1 == src2);
1281 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1282 const uint8_t *src2, int width, uint32_t *unused)
1285 for (i=0; i<width; i++) {
1286 dstU[i]= src1[2*i + 1];
1287 dstV[i]= src2[2*i + 1];
1291 /* This is almost identical to the previous, end exists only because
1292 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1293 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1297 for (i=0; i<width; i++)
1301 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1302 const uint8_t *src2, int width, uint32_t *unused)
1305 for (i=0; i<width; i++) {
1306 dstU[i]= src1[4*i + 0];
1307 dstV[i]= src1[4*i + 2];
1309 assert(src1 == src2);
1312 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1313 const uint8_t *src2, int width, uint32_t *unused)
1316 for (i=0; i<width; i++) {
1322 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1323 const uint8_t *src, int width)
1326 for (i = 0; i < width; i++) {
1327 dst1[i] = src[2*i+0];
1328 dst2[i] = src[2*i+1];
1332 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1333 const uint8_t *src1, const uint8_t *src2,
1334 int width, uint32_t *unused)
1336 nvXXtoUV_c(dstU, dstV, src1, width);
1339 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1340 const uint8_t *src1, const uint8_t *src2,
1341 int width, uint32_t *unused)
1343 nvXXtoUV_c(dstV, dstU, src1, width);
1346 // FIXME Maybe dither instead.
1347 static av_always_inline void
1348 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1349 const uint8_t *_srcU, const uint8_t *_srcV,
1350 int width, enum PixelFormat origin, int depth)
1353 const uint16_t *srcU = (const uint16_t *) _srcU;
1354 const uint16_t *srcV = (const uint16_t *) _srcV;
1356 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1357 for (i = 0; i < width; i++) {
1358 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1359 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1363 static av_always_inline void
1364 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1365 int width, enum PixelFormat origin, int depth)
1368 const uint16_t *srcY = (const uint16_t*)_srcY;
1370 for (i = 0; i < width; i++)
1371 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1375 #define YUV_NBPS(depth, BE_LE, origin) \
1376 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1377 const uint8_t *srcU, const uint8_t *srcV, \
1378 int width, uint32_t *unused) \
1380 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1382 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1383 int width, uint32_t *unused) \
1385 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1388 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1389 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1390 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1391 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1393 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1394 int width, uint32_t *unused)
1397 for (i=0; i<width; i++) {
1402 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1406 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1407 const uint8_t *src2, int width, uint32_t *unused)
1410 for (i=0; i<width; i++) {
1411 int b= src1[3*i + 0];
1412 int g= src1[3*i + 1];
1413 int r= src1[3*i + 2];
1415 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1416 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1418 assert(src1 == src2);
1421 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1422 const uint8_t *src2, int width, uint32_t *unused)
1425 for (i=0; i<width; i++) {
1426 int b= src1[6*i + 0] + src1[6*i + 3];
1427 int g= src1[6*i + 1] + src1[6*i + 4];
1428 int r= src1[6*i + 2] + src1[6*i + 5];
1430 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1431 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1433 assert(src1 == src2);
1436 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1440 for (i=0; i<width; i++) {
1445 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1449 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1450 const uint8_t *src2, int width, uint32_t *unused)
1454 for (i=0; i<width; i++) {
1455 int r= src1[3*i + 0];
1456 int g= src1[3*i + 1];
1457 int b= src1[3*i + 2];
1459 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1460 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1464 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1465 const uint8_t *src2, int width, uint32_t *unused)
1469 for (i=0; i<width; i++) {
1470 int r= src1[6*i + 0] + src1[6*i + 3];
1471 int g= src1[6*i + 1] + src1[6*i + 4];
1472 int b= src1[6*i + 2] + src1[6*i + 5];
1474 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1475 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1480 // bilinear / bicubic scaling
1481 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1483 const int16_t *filter, const int16_t *filterPos,
1487 for (i=0; i<dstW; i++) {
1489 int srcPos= filterPos[i];
1491 for (j=0; j<filterSize; j++) {
1492 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1494 //filter += hFilterSize;
1495 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1500 //FIXME all pal and rgb srcFormats could do this convertion as well
1501 //FIXME all scalers more complex than bilinear could do half of this transform
1502 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1505 for (i = 0; i < width; i++) {
1506 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1507 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1510 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1513 for (i = 0; i < width; i++) {
1514 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1515 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1518 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1521 for (i = 0; i < width; i++)
1522 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1524 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1527 for (i = 0; i < width; i++)
1528 dst[i] = (dst[i]*14071 + 33561947)>>14;
1531 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1532 const uint8_t *src, int srcW, int xInc)
1535 unsigned int xpos=0;
1536 for (i=0;i<dstWidth;i++) {
1537 register unsigned int xx=xpos>>16;
1538 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1539 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1544 // *** horizontal scale Y line to temp buffer
1545 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1546 const uint8_t *src, int srcW, int xInc,
1547 const int16_t *hLumFilter,
1548 const int16_t *hLumFilterPos, int hLumFilterSize,
1549 uint8_t *formatConvBuffer,
1550 uint32_t *pal, int isAlpha)
1552 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1553 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1556 toYV12(formatConvBuffer, src, srcW, pal);
1557 src= formatConvBuffer;
1560 if (!c->hyscale_fast) {
1561 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1562 } else { // fast bilinear upscale / crap downscale
1563 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1567 convertRange(dst, dstWidth);
1570 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1571 int dstWidth, const uint8_t *src1,
1572 const uint8_t *src2, int srcW, int xInc)
1575 unsigned int xpos=0;
1576 for (i=0;i<dstWidth;i++) {
1577 register unsigned int xx=xpos>>16;
1578 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1579 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1580 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1585 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1586 const uint8_t *src1, const uint8_t *src2,
1587 int srcW, int xInc, const int16_t *hChrFilter,
1588 const int16_t *hChrFilterPos, int hChrFilterSize,
1589 uint8_t *formatConvBuffer, uint32_t *pal)
1592 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1593 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1594 src1= formatConvBuffer;
1598 if (!c->hcscale_fast) {
1599 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1600 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1601 } else { // fast bilinear upscale / crap downscale
1602 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1605 if (c->chrConvertRange)
1606 c->chrConvertRange(dst1, dst2, dstWidth);
1609 static av_always_inline void
1610 find_c_packed_planar_out_funcs(SwsContext *c,
1611 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1612 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1613 yuv2packedX_fn *yuv2packedX)
1615 enum PixelFormat dstFormat = c->dstFormat;
1617 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1618 *yuv2yuvX = yuv2nv12X_c;
1619 } else if (is16BPS(dstFormat)) {
1620 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1621 } else if (is9_OR_10BPS(dstFormat)) {
1622 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1623 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1625 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1628 *yuv2yuv1 = yuv2yuv1_c;
1629 *yuv2yuvX = yuv2yuvX_c;
1631 if(c->flags & SWS_FULL_CHR_H_INT) {
1632 *yuv2packedX = yuv2rgbX_c_full;
1634 switch (dstFormat) {
1635 case PIX_FMT_GRAY16BE:
1636 *yuv2packed1 = yuv2gray16BE_1_c;
1637 *yuv2packed2 = yuv2gray16BE_2_c;
1638 *yuv2packedX = yuv2gray16BE_X_c;
1640 case PIX_FMT_GRAY16LE:
1641 *yuv2packed1 = yuv2gray16LE_1_c;
1642 *yuv2packed2 = yuv2gray16LE_2_c;
1643 *yuv2packedX = yuv2gray16LE_X_c;
1646 *yuv2packed1 = yuv2packed1_c;
1647 *yuv2packed2 = yuv2packed2_c;
1648 *yuv2packedX = yuv2packedX_c;
1654 #define DEBUG_SWSCALE_BUFFERS 0
1655 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1657 static int swScale(SwsContext *c, const uint8_t* src[],
1658 int srcStride[], int srcSliceY,
1659 int srcSliceH, uint8_t* dst[], int dstStride[])
1661 /* load a few things into local vars to make the code more readable? and faster */
1662 const int srcW= c->srcW;
1663 const int dstW= c->dstW;
1664 const int dstH= c->dstH;
1665 const int chrDstW= c->chrDstW;
1666 const int chrSrcW= c->chrSrcW;
1667 const int lumXInc= c->lumXInc;
1668 const int chrXInc= c->chrXInc;
1669 const enum PixelFormat dstFormat= c->dstFormat;
1670 const int flags= c->flags;
1671 int16_t *vLumFilterPos= c->vLumFilterPos;
1672 int16_t *vChrFilterPos= c->vChrFilterPos;
1673 int16_t *hLumFilterPos= c->hLumFilterPos;
1674 int16_t *hChrFilterPos= c->hChrFilterPos;
1675 int16_t *vLumFilter= c->vLumFilter;
1676 int16_t *vChrFilter= c->vChrFilter;
1677 int16_t *hLumFilter= c->hLumFilter;
1678 int16_t *hChrFilter= c->hChrFilter;
1679 int32_t *lumMmxFilter= c->lumMmxFilter;
1680 int32_t *chrMmxFilter= c->chrMmxFilter;
1681 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1682 const int vLumFilterSize= c->vLumFilterSize;
1683 const int vChrFilterSize= c->vChrFilterSize;
1684 const int hLumFilterSize= c->hLumFilterSize;
1685 const int hChrFilterSize= c->hChrFilterSize;
1686 int16_t **lumPixBuf= c->lumPixBuf;
1687 int16_t **chrUPixBuf= c->chrUPixBuf;
1688 int16_t **chrVPixBuf= c->chrVPixBuf;
1689 int16_t **alpPixBuf= c->alpPixBuf;
1690 const int vLumBufSize= c->vLumBufSize;
1691 const int vChrBufSize= c->vChrBufSize;
1692 uint8_t *formatConvBuffer= c->formatConvBuffer;
1693 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1694 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1696 uint32_t *pal=c->pal_yuv;
1697 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1698 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1699 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1700 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1701 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1703 /* vars which will change and which we need to store back in the context */
1705 int lumBufIndex= c->lumBufIndex;
1706 int chrBufIndex= c->chrBufIndex;
1707 int lastInLumBuf= c->lastInLumBuf;
1708 int lastInChrBuf= c->lastInChrBuf;
1710 if (isPacked(c->srcFormat)) {
1718 srcStride[3]= srcStride[0];
1720 srcStride[1]<<= c->vChrDrop;
1721 srcStride[2]<<= c->vChrDrop;
1723 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1724 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1725 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1726 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1727 srcSliceY, srcSliceH, dstY, dstH);
1728 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1729 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1731 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1732 static int warnedAlready=0; //FIXME move this into the context perhaps
1733 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1734 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1735 " ->cannot do aligned memory accesses anymore\n");
1740 /* Note the user might start scaling the picture in the middle so this
1741 will not get executed. This is not really intended but works
1742 currently, so people might do it. */
1743 if (srcSliceY ==0) {
1753 for (;dstY < dstH; dstY++) {
1754 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1755 const int chrDstY= dstY>>c->chrDstVSubSample;
1756 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1757 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1758 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1760 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1761 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1762 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1763 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1764 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1765 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1768 //handle holes (FAST_BILINEAR & weird filters)
1769 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1770 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1771 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1772 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1774 DEBUG_BUFFERS("dstY: %d\n", dstY);
1775 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1776 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1777 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1778 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1780 // Do we have enough lines in this slice to output the dstY line
1781 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1783 if (!enough_lines) {
1784 lastLumSrcY = srcSliceY + srcSliceH - 1;
1785 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1786 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1787 lastLumSrcY, lastChrSrcY);
1790 //Do horizontal scaling
1791 while(lastInLumBuf < lastLumSrcY) {
1792 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1793 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1795 assert(lumBufIndex < 2*vLumBufSize);
1796 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1797 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1798 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1799 hLumFilter, hLumFilterPos, hLumFilterSize,
1802 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1803 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1804 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1808 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1809 lumBufIndex, lastInLumBuf);
1811 while(lastInChrBuf < lastChrSrcY) {
1812 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1813 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1815 assert(chrBufIndex < 2*vChrBufSize);
1816 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1817 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1818 //FIXME replace parameters through context struct (some at least)
1820 if (c->needs_hcscale)
1821 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1822 chrDstW, src1, src2, chrSrcW, chrXInc,
1823 hChrFilter, hChrFilterPos, hChrFilterSize,
1824 formatConvBuffer, pal);
1826 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1827 chrBufIndex, lastInChrBuf);
1829 //wrap buf index around to stay inside the ring buffer
1830 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1831 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1833 break; //we can't output a dstY line so let's try with the next slice
1836 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1838 if (dstY >= dstH-2) {
1839 // hmm looks like we can't use MMX here without overwriting this array's tail
1840 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1841 &yuv2packed1, &yuv2packed2,
1846 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1847 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1848 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1849 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1850 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1851 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1852 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1853 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1854 const int16_t *lumBuf = lumSrcPtr[0];
1855 const int16_t *chrUBuf= chrUSrcPtr[0];
1856 const int16_t *chrVBuf= chrVSrcPtr[0];
1857 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1858 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1859 uDest, vDest, aDest, dstW, chrDstW);
1860 } else { //General YV12
1862 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1863 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1864 chrVSrcPtr, vChrFilterSize,
1865 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1868 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1869 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1870 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1871 int chrAlpha= vChrFilter[2*dstY+1];
1872 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1873 *chrVSrcPtr, *(chrVSrcPtr+1),
1874 alpPixBuf ? *alpSrcPtr : NULL,
1875 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1876 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1877 int lumAlpha= vLumFilter[2*dstY+1];
1878 int chrAlpha= vChrFilter[2*dstY+1];
1880 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1882 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1883 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1884 *chrVSrcPtr, *(chrVSrcPtr+1),
1885 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1886 dest, dstW, lumAlpha, chrAlpha, dstY);
1887 } else { //general RGB
1889 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1890 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1891 alpSrcPtr, dest, dstW, dstY);
1897 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1898 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1901 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1902 __asm__ volatile("sfence":::"memory");
1906 /* store changed local vars back in the context */
1908 c->lumBufIndex= lumBufIndex;
1909 c->chrBufIndex= chrBufIndex;
1910 c->lastInLumBuf= lastInLumBuf;
1911 c->lastInChrBuf= lastInChrBuf;
1913 return dstY - lastDstY;
1916 static av_cold void sws_init_swScale_c(SwsContext *c)
1918 enum PixelFormat srcFormat = c->srcFormat;
1920 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1921 &c->yuv2packed1, &c->yuv2packed2,
1924 c->hScale = hScale_c;
1926 if (c->flags & SWS_FAST_BILINEAR) {
1927 c->hyscale_fast = hyscale_fast_c;
1928 c->hcscale_fast = hcscale_fast_c;
1931 c->chrToYV12 = NULL;
1933 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1934 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1935 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1936 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1940 case PIX_FMT_BGR4_BYTE:
1941 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1942 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1943 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1944 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1945 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1946 case PIX_FMT_YUV420P16BE:
1947 case PIX_FMT_YUV422P16BE:
1948 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1949 case PIX_FMT_YUV420P16LE:
1950 case PIX_FMT_YUV422P16LE:
1951 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1953 if (c->chrSrcHSubSample) {
1955 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1956 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1957 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1958 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1959 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1960 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1961 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1962 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1963 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1964 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1965 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1966 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1967 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1968 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1972 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1973 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1974 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1975 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1976 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1977 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1978 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1979 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1980 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1981 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1982 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1983 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1984 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1985 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1989 c->lumToYV12 = NULL;
1990 c->alpToYV12 = NULL;
1991 switch (srcFormat) {
1992 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1993 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1994 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1995 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1996 case PIX_FMT_YUYV422 :
1997 case PIX_FMT_YUV420P16BE:
1998 case PIX_FMT_YUV422P16BE:
1999 case PIX_FMT_YUV444P16BE:
2000 case PIX_FMT_Y400A :
2001 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2002 case PIX_FMT_UYVY422 :
2003 case PIX_FMT_YUV420P16LE:
2004 case PIX_FMT_YUV422P16LE:
2005 case PIX_FMT_YUV444P16LE:
2006 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2007 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2008 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
2009 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
2010 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2011 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
2012 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
2016 case PIX_FMT_BGR4_BYTE:
2017 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2018 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2019 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2020 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2021 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2022 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2023 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2024 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2025 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2026 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2027 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2030 switch (srcFormat) {
2032 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2034 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2035 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2039 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2041 c->lumConvertRange = lumRangeFromJpeg_c;
2042 c->chrConvertRange = chrRangeFromJpeg_c;
2044 c->lumConvertRange = lumRangeToJpeg_c;
2045 c->chrConvertRange = chrRangeToJpeg_c;
2049 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2050 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2051 c->needs_hcscale = 1;
2054 SwsFunc ff_getSwsFunc(SwsContext *c)
2056 sws_init_swScale_c(c);
2059 ff_sws_init_swScale_mmx(c);
2061 ff_sws_init_swScale_altivec(c);