2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
333 const int16_t **lumSrc, int lumFilterSize,
334 const int16_t *chrFilter, const int16_t **chrUSrc,
335 const int16_t **chrVSrc, int chrFilterSize,
336 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
337 uint8_t *vDest, uint8_t *aDest,
338 int dstW, int chrDstW)
340 enum PixelFormat dstFormat = c->dstFormat;
342 //FIXME Optimize (just quickly written not optimized..)
344 for (i=0; i<dstW; i++) {
347 for (j=0; j<lumFilterSize; j++)
348 val += lumSrc[j][i] * lumFilter[j];
350 dest[i]= av_clip_uint8(val>>19);
356 if (dstFormat == PIX_FMT_NV12)
357 for (i=0; i<chrDstW; i++) {
361 for (j=0; j<chrFilterSize; j++) {
362 u += chrUSrc[j][i] * chrFilter[j];
363 v += chrVSrc[j][i] * chrFilter[j];
366 uDest[2*i]= av_clip_uint8(u>>19);
367 uDest[2*i+1]= av_clip_uint8(v>>19);
370 for (i=0; i<chrDstW; i++) {
374 for (j=0; j<chrFilterSize; j++) {
375 u += chrUSrc[j][i] * chrFilter[j];
376 v += chrVSrc[j][i] * chrFilter[j];
379 uDest[2*i]= av_clip_uint8(v>>19);
380 uDest[2*i+1]= av_clip_uint8(u>>19);
384 static av_always_inline void
385 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
386 const int16_t **lumSrc, int lumFilterSize,
387 const int16_t *chrFilter, const int16_t **chrUSrc,
388 const int16_t **chrVSrc, int chrFilterSize,
389 const int16_t **alpSrc, uint8_t *dest, int dstW,
390 int y, enum PixelFormat target)
394 #define output_pixel(pos, val) \
395 if (target == PIX_FMT_GRAY16BE) { \
400 for (i = 0; i < (dstW >> 1); i++) {
404 const int i2 = 2 * i;
406 for (j = 0; j < lumFilterSize; j++) {
407 Y1 += lumSrc[j][i2] * lumFilter[j];
408 Y2 += lumSrc[j][i2+1] * lumFilter[j];
412 if ((Y1 | Y2) & 0x10000) {
413 Y1 = av_clip_uint16(Y1);
414 Y2 = av_clip_uint16(Y2);
416 output_pixel(&dest[2 * i2 + 0], Y1);
417 output_pixel(&dest[2 * i2 + 2], Y2);
421 static av_always_inline void
422 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
423 const uint16_t *buf1, const uint16_t *ubuf0,
424 const uint16_t *ubuf1, const uint16_t *vbuf0,
425 const uint16_t *vbuf1, const uint16_t *abuf0,
426 const uint16_t *abuf1, uint8_t *dest, int dstW,
427 int yalpha, int uvalpha, int y,
428 enum PixelFormat target)
430 int yalpha1 = 4095 - yalpha; \
433 for (i = 0; i < (dstW >> 1); i++) {
434 const int i2 = 2 * i;
435 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
436 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
438 output_pixel(&dest[2 * i2 + 0], Y1);
439 output_pixel(&dest[2 * i2 + 2], Y2);
443 static av_always_inline void
444 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
445 const uint16_t *ubuf0, const uint16_t *ubuf1,
446 const uint16_t *vbuf0, const uint16_t *vbuf1,
447 const uint16_t *abuf0, uint8_t *dest, int dstW,
448 int uvalpha, enum PixelFormat dstFormat,
449 int flags, int y, enum PixelFormat target)
453 for (i = 0; i < (dstW >> 1); i++) {
454 const int i2 = 2 * i;
455 int Y1 = buf0[i2 ] << 1;
456 int Y2 = buf0[i2+1] << 1;
458 output_pixel(&dest[2 * i2 + 0], Y1);
459 output_pixel(&dest[2 * i2 + 2], Y2);
464 #define YUV2PACKEDWRAPPER(name, ext, fmt) \
465 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
466 const int16_t **lumSrc, int lumFilterSize, \
467 const int16_t *chrFilter, const int16_t **chrUSrc, \
468 const int16_t **chrVSrc, int chrFilterSize, \
469 const int16_t **alpSrc, uint8_t *dest, int dstW, \
472 name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
473 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
474 alpSrc, dest, dstW, y, fmt); \
477 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
478 const uint16_t *buf1, const uint16_t *ubuf0, \
479 const uint16_t *ubuf1, const uint16_t *vbuf0, \
480 const uint16_t *vbuf1, const uint16_t *abuf0, \
481 const uint16_t *abuf1, uint8_t *dest, int dstW, \
482 int yalpha, int uvalpha, int y) \
484 name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
485 vbuf0, vbuf1, abuf0, abuf1, \
486 dest, dstW, yalpha, uvalpha, y, fmt); \
489 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
490 const uint16_t *ubuf0, const uint16_t *ubuf1, \
491 const uint16_t *vbuf0, const uint16_t *vbuf1, \
492 const uint16_t *abuf0, uint8_t *dest, int dstW, \
493 int uvalpha, enum PixelFormat dstFormat, \
496 name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
497 vbuf1, abuf0, dest, dstW, uvalpha, \
498 dstFormat, flags, y, fmt); \
501 YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
502 YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
504 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
505 for (i=0; i<(dstW>>1); i++) {\
511 int av_unused A1, A2;\
512 type av_unused *r, *b, *g;\
515 for (j=0; j<lumFilterSize; j++) {\
516 Y1 += lumSrc[j][i2] * lumFilter[j];\
517 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
519 for (j=0; j<chrFilterSize; j++) {\
520 U += chrUSrc[j][i] * chrFilter[j];\
521 V += chrVSrc[j][i] * chrFilter[j];\
527 if ((Y1|Y2|U|V)&0x100) {\
528 Y1 = av_clip_uint8(Y1); \
529 Y2 = av_clip_uint8(Y2); \
530 U = av_clip_uint8(U); \
531 V = av_clip_uint8(V); \
536 for (j=0; j<lumFilterSize; j++) {\
537 A1 += alpSrc[j][i2 ] * lumFilter[j];\
538 A2 += alpSrc[j][i2+1] * lumFilter[j];\
542 if ((A1|A2)&0x100) {\
543 A1 = av_clip_uint8(A1); \
544 A2 = av_clip_uint8(A2); \
548 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
549 for (i=0; i<dstW; i++) {\
557 for (j=0; j<lumFilterSize; j++) {\
558 Y += lumSrc[j][i ] * lumFilter[j];\
560 for (j=0; j<chrFilterSize; j++) {\
561 U += chrUSrc[j][i] * chrFilter[j];\
562 V += chrVSrc[j][i] * chrFilter[j];\
569 for (j=0; j<lumFilterSize; j++)\
570 A += alpSrc[j][i ] * lumFilter[j];\
573 A = av_clip_uint8(A);\
575 Y-= c->yuv2rgb_y_offset;\
576 Y*= c->yuv2rgb_y_coeff;\
578 R= Y + V*c->yuv2rgb_v2r_coeff;\
579 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
580 B= Y + U*c->yuv2rgb_u2b_coeff;\
581 if ((R|G|B)&(0xC0000000)) {\
582 R = av_clip_uintp2(R, 30); \
583 G = av_clip_uintp2(G, 30); \
584 B = av_clip_uintp2(B, 30); \
587 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
588 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
589 r = (type *)c->table_rV[V]; \
590 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
591 b = (type *)c->table_bU[U];
593 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
594 for (i=0; i<(dstW>>1); i++) { \
596 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
597 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
598 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
599 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
600 type av_unused *r, *b, *g; \
601 int av_unused A1, A2; \
603 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
604 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
607 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
608 YSCALE_YUV_2_PACKED2_C(type,alpha)\
609 r = (type *)c->table_rV[V];\
610 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
611 b = (type *)c->table_bU[U];
613 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
614 for (i=0; i<(dstW>>1); i++) {\
616 int Y1= buf0[i2 ]>>7;\
617 int Y2= buf0[i2+1]>>7;\
618 int U= (ubuf1[i])>>7;\
619 int V= (vbuf1[i])>>7;\
620 type av_unused *r, *b, *g;\
621 int av_unused A1, A2;\
627 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
628 YSCALE_YUV_2_PACKED1_C(type,alpha)\
629 r = (type *)c->table_rV[V];\
630 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
631 b = (type *)c->table_bU[U];
633 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
634 for (i=0; i<(dstW>>1); i++) {\
636 int Y1= buf0[i2 ]>>7;\
637 int Y2= buf0[i2+1]>>7;\
638 int U= (ubuf0[i] + ubuf1[i])>>8;\
639 int V= (vbuf0[i] + vbuf1[i])>>8;\
640 type av_unused *r, *b, *g;\
641 int av_unused A1, A2;\
647 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
648 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
649 r = (type *)c->table_rV[V];\
650 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
651 b = (type *)c->table_bU[U];
653 #define YSCALE_YUV_2_MONO2_C \
654 const uint8_t * const d128=dither_8x8_220[y&7];\
655 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
656 for (i=0; i<dstW-7; i+=8) {\
658 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
659 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
660 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
661 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
662 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
663 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
664 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
665 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
666 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
670 #define YSCALE_YUV_2_MONOX_C \
671 const uint8_t * const d128=dither_8x8_220[y&7];\
672 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
674 for (i=0; i<dstW-1; i+=2) {\
679 for (j=0; j<lumFilterSize; j++) {\
680 Y1 += lumSrc[j][i] * lumFilter[j];\
681 Y2 += lumSrc[j][i+1] * lumFilter[j];\
685 if ((Y1|Y2)&0x100) {\
686 Y1 = av_clip_uint8(Y1); \
687 Y2 = av_clip_uint8(Y2); \
689 acc+= acc + g[Y1+d128[(i+0)&7]];\
690 acc+= acc + g[Y2+d128[(i+1)&7]];\
692 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
697 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
698 switch(c->dstFormat) {\
699 case PIX_FMT_RGB48BE:\
700 case PIX_FMT_RGB48LE:\
702 ((uint8_t*)dest)[ 0]= r[Y1];\
703 ((uint8_t*)dest)[ 1]= r[Y1];\
704 ((uint8_t*)dest)[ 2]= g[Y1];\
705 ((uint8_t*)dest)[ 3]= g[Y1];\
706 ((uint8_t*)dest)[ 4]= b[Y1];\
707 ((uint8_t*)dest)[ 5]= b[Y1];\
708 ((uint8_t*)dest)[ 6]= r[Y2];\
709 ((uint8_t*)dest)[ 7]= r[Y2];\
710 ((uint8_t*)dest)[ 8]= g[Y2];\
711 ((uint8_t*)dest)[ 9]= g[Y2];\
712 ((uint8_t*)dest)[10]= b[Y2];\
713 ((uint8_t*)dest)[11]= b[Y2];\
717 case PIX_FMT_BGR48BE:\
718 case PIX_FMT_BGR48LE:\
720 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
721 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
722 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
723 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
724 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
725 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
732 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
733 func(uint32_t,needAlpha)\
734 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
735 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
738 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
740 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
741 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
745 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
746 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
754 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
755 func(uint32_t,needAlpha)\
756 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
757 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
760 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
762 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
763 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
767 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
768 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
775 ((uint8_t*)dest)[0]= r[Y1];\
776 ((uint8_t*)dest)[1]= g[Y1];\
777 ((uint8_t*)dest)[2]= b[Y1];\
778 ((uint8_t*)dest)[3]= r[Y2];\
779 ((uint8_t*)dest)[4]= g[Y2];\
780 ((uint8_t*)dest)[5]= b[Y2];\
786 ((uint8_t*)dest)[0]= b[Y1];\
787 ((uint8_t*)dest)[1]= g[Y1];\
788 ((uint8_t*)dest)[2]= r[Y1];\
789 ((uint8_t*)dest)[3]= b[Y2];\
790 ((uint8_t*)dest)[4]= g[Y2];\
791 ((uint8_t*)dest)[5]= r[Y2];\
795 case PIX_FMT_RGB565BE:\
796 case PIX_FMT_RGB565LE:\
797 case PIX_FMT_BGR565BE:\
798 case PIX_FMT_BGR565LE:\
800 const int dr1= dither_2x2_8[y&1 ][0];\
801 const int dg1= dither_2x2_4[y&1 ][0];\
802 const int db1= dither_2x2_8[(y&1)^1][0];\
803 const int dr2= dither_2x2_8[y&1 ][1];\
804 const int dg2= dither_2x2_4[y&1 ][1];\
805 const int db2= dither_2x2_8[(y&1)^1][1];\
807 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
808 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
812 case PIX_FMT_RGB555BE:\
813 case PIX_FMT_RGB555LE:\
814 case PIX_FMT_BGR555BE:\
815 case PIX_FMT_BGR555LE:\
817 const int dr1= dither_2x2_8[y&1 ][0];\
818 const int dg1= dither_2x2_8[y&1 ][1];\
819 const int db1= dither_2x2_8[(y&1)^1][0];\
820 const int dr2= dither_2x2_8[y&1 ][1];\
821 const int dg2= dither_2x2_8[y&1 ][0];\
822 const int db2= dither_2x2_8[(y&1)^1][1];\
824 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
825 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
829 case PIX_FMT_RGB444BE:\
830 case PIX_FMT_RGB444LE:\
831 case PIX_FMT_BGR444BE:\
832 case PIX_FMT_BGR444LE:\
834 const int dr1= dither_4x4_16[y&3 ][0];\
835 const int dg1= dither_4x4_16[y&3 ][1];\
836 const int db1= dither_4x4_16[(y&3)^3][0];\
837 const int dr2= dither_4x4_16[y&3 ][1];\
838 const int dg2= dither_4x4_16[y&3 ][0];\
839 const int db2= dither_4x4_16[(y&3)^3][1];\
841 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
842 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
849 const uint8_t * const d64= dither_8x8_73[y&7];\
850 const uint8_t * const d32= dither_8x8_32[y&7];\
852 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
853 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
860 const uint8_t * const d64= dither_8x8_73 [y&7];\
861 const uint8_t * const d128=dither_8x8_220[y&7];\
863 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
864 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
868 case PIX_FMT_RGB4_BYTE:\
869 case PIX_FMT_BGR4_BYTE:\
871 const uint8_t * const d64= dither_8x8_73 [y&7];\
872 const uint8_t * const d128=dither_8x8_220[y&7];\
874 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
875 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
879 case PIX_FMT_MONOBLACK:\
880 case PIX_FMT_MONOWHITE:\
885 case PIX_FMT_YUYV422:\
887 ((uint8_t*)dest)[2*i2+0]= Y1;\
888 ((uint8_t*)dest)[2*i2+1]= U;\
889 ((uint8_t*)dest)[2*i2+2]= Y2;\
890 ((uint8_t*)dest)[2*i2+3]= V;\
893 case PIX_FMT_UYVY422:\
895 ((uint8_t*)dest)[2*i2+0]= U;\
896 ((uint8_t*)dest)[2*i2+1]= Y1;\
897 ((uint8_t*)dest)[2*i2+2]= V;\
898 ((uint8_t*)dest)[2*i2+3]= Y2;\
903 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
904 const int16_t **lumSrc, int lumFilterSize,
905 const int16_t *chrFilter, const int16_t **chrUSrc,
906 const int16_t **chrVSrc, int chrFilterSize,
907 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
910 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
913 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
914 const int16_t **lumSrc, int lumFilterSize,
915 const int16_t *chrFilter, const int16_t **chrUSrc,
916 const int16_t **chrVSrc, int chrFilterSize,
917 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
920 int step= c->dstFormatBpp/8;
923 switch(c->dstFormat) {
931 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
932 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
933 dest[aidx]= needAlpha ? A : 255;
940 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
941 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
949 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
966 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
967 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
968 dest[aidx]= needAlpha ? A : 255;
975 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
976 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
984 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
999 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1000 int width, int height,
1004 uint8_t *ptr = plane + stride*y;
1005 for (i=0; i<height; i++) {
1006 memset(ptr, val, width);
1011 #define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
1012 static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
1013 uint8_t *dst, const uint8_t *src, int width, \
1017 for (i = 0; i < width; i++) { \
1018 int compA = rfunc(&src[i*6+0]) >> 8; \
1019 int compB = rfunc(&src[i*6+2]) >> 8; \
1020 int compC = rfunc(&src[i*6+4]) >> 8; \
1022 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1026 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
1027 uint8_t *dstU, uint8_t *dstV, \
1028 const uint8_t *src1, const uint8_t *src2, \
1029 int width, uint32_t *unused) \
1032 assert(src1==src2); \
1033 for (i = 0; i < width; i++) { \
1034 int compA = rfunc(&src1[6*i + 0]) >> 8; \
1035 int compB = rfunc(&src1[6*i + 2]) >> 8; \
1036 int compC = rfunc(&src1[6*i + 4]) >> 8; \
1038 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1039 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
1043 static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
1044 uint8_t *dstU, uint8_t *dstV, \
1045 const uint8_t *src1, const uint8_t *src2, \
1046 int width, uint32_t *unused) \
1049 assert(src1==src2); \
1050 for (i = 0; i < width; i++) { \
1051 int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
1052 int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
1053 int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
1055 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1056 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
1059 rgb48funcs(LE, AV_RL16, r, g, b);
1060 rgb48funcs(BE, AV_RB16, r, g, b);
1061 rgb48funcs(LE, AV_RL16, b, g, r);
1062 rgb48funcs(BE, AV_RB16, b, g, r);
1064 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1065 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1066 int width, uint32_t *unused)\
1069 for (i=0; i<width; i++) {\
1070 int b= (((const type*)src)[i]>>shb)&maskb;\
1071 int g= (((const type*)src)[i]>>shg)&maskg;\
1072 int r= (((const type*)src)[i]>>shr)&maskr;\
1074 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1078 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1079 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1080 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1081 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1082 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1083 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1084 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1085 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1087 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1090 for (i=0; i<width; i++) {
1095 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1098 for (i=0; i<width; i++) {
1103 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1104 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1105 const uint8_t *src, const uint8_t *dummy, \
1106 int width, uint32_t *unused)\
1109 for (i=0; i<width; i++) {\
1110 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1111 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1112 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1114 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1115 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1118 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1119 const uint8_t *src, const uint8_t *dummy, \
1120 int width, uint32_t *unused)\
1123 for (i=0; i<width; i++) {\
1124 int pix0= ((const type*)src)[2*i+0]>>shp;\
1125 int pix1= ((const type*)src)[2*i+1]>>shp;\
1126 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1127 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1128 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1129 g&= maskg|(2*maskg);\
1133 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1134 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1138 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1139 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1140 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1141 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1142 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1143 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1144 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1145 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1147 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1150 for (i=0; i<width; i++) {
1153 dst[i]= pal[d] & 0xFF;
1157 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1158 const uint8_t *src1, const uint8_t *src2,
1159 int width, uint32_t *pal)
1162 assert(src1 == src2);
1163 for (i=0; i<width; i++) {
1164 int p= pal[src1[i]];
1171 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1172 int width, uint32_t *unused)
1175 for (i=0; i<width/8; i++) {
1178 dst[8*i+j]= ((d>>(7-j))&1)*255;
1182 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1183 int width, uint32_t *unused)
1186 for (i=0; i<width/8; i++) {
1189 dst[8*i+j]= ((d>>(7-j))&1)*255;
1193 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1194 const int16_t *chrUSrc, const int16_t *chrVSrc,
1195 const int16_t *alpSrc,
1196 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1197 uint8_t *aDest, int dstW, int chrDstW)
1200 for (i=0; i<dstW; i++) {
1201 int val= (lumSrc[i]+64)>>7;
1202 dest[i]= av_clip_uint8(val);
1206 for (i=0; i<chrDstW; i++) {
1207 int u=(chrUSrc[i]+64)>>7;
1208 int v=(chrVSrc[i]+64)>>7;
1209 uDest[i]= av_clip_uint8(u);
1210 vDest[i]= av_clip_uint8(v);
1213 if (CONFIG_SWSCALE_ALPHA && aDest)
1214 for (i=0; i<dstW; i++) {
1215 int val= (alpSrc[i]+64)>>7;
1216 aDest[i]= av_clip_uint8(val);
1221 * vertical bilinear scale YV12 to RGB
1223 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1224 const uint16_t *buf1, const uint16_t *ubuf0,
1225 const uint16_t *ubuf1, const uint16_t *vbuf0,
1226 const uint16_t *vbuf1, const uint16_t *abuf0,
1227 const uint16_t *abuf1, uint8_t *dest, int dstW,
1228 int yalpha, int uvalpha, int y)
1230 int yalpha1=4095- yalpha;
1231 int uvalpha1=4095-uvalpha;
1234 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
1238 * YV12 to RGB without scaling or interpolating
1240 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1241 const uint16_t *ubuf0, const uint16_t *ubuf1,
1242 const uint16_t *vbuf0, const uint16_t *vbuf1,
1243 const uint16_t *abuf0, uint8_t *dest, int dstW,
1244 int uvalpha, enum PixelFormat dstFormat,
1247 const int yalpha1=0;
1250 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1251 const int yalpha= 4096; //FIXME ...
1253 if (uvalpha < 2048) {
1254 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
1256 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
1260 //FIXME yuy2* can read up to 7 samples too much
1262 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1266 for (i=0; i<width; i++)
1270 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1271 const uint8_t *src2, int width, uint32_t *unused)
1274 for (i=0; i<width; i++) {
1275 dstU[i]= src1[4*i + 1];
1276 dstV[i]= src1[4*i + 3];
1278 assert(src1 == src2);
1281 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1282 const uint8_t *src2, int width, uint32_t *unused)
1285 for (i=0; i<width; i++) {
1286 dstU[i]= src1[2*i + 1];
1287 dstV[i]= src2[2*i + 1];
1291 /* This is almost identical to the previous, end exists only because
1292 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1293 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1297 for (i=0; i<width; i++)
1301 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1302 const uint8_t *src2, int width, uint32_t *unused)
1305 for (i=0; i<width; i++) {
1306 dstU[i]= src1[4*i + 0];
1307 dstV[i]= src1[4*i + 2];
1309 assert(src1 == src2);
1312 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1313 const uint8_t *src2, int width, uint32_t *unused)
1316 for (i=0; i<width; i++) {
1322 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1323 const uint8_t *src, int width)
1326 for (i = 0; i < width; i++) {
1327 dst1[i] = src[2*i+0];
1328 dst2[i] = src[2*i+1];
1332 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1333 const uint8_t *src1, const uint8_t *src2,
1334 int width, uint32_t *unused)
1336 nvXXtoUV_c(dstU, dstV, src1, width);
1339 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1340 const uint8_t *src1, const uint8_t *src2,
1341 int width, uint32_t *unused)
1343 nvXXtoUV_c(dstV, dstU, src1, width);
1346 // FIXME Maybe dither instead.
1347 #define YUV_NBPS(depth, endianness, rfunc) \
1348 static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1349 const uint8_t *_srcU, const uint8_t *_srcV, \
1350 int width, uint32_t *unused) \
1353 const uint16_t *srcU = (const uint16_t*)_srcU; \
1354 const uint16_t *srcV = (const uint16_t*)_srcV; \
1355 for (i = 0; i < width; i++) { \
1356 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
1357 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
1361 static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
1362 int width, uint32_t *unused) \
1365 const uint16_t *srcY = (const uint16_t*)_srcY; \
1366 for (i = 0; i < width; i++) \
1367 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
1370 YUV_NBPS( 9, LE, AV_RL16)
1371 YUV_NBPS( 9, BE, AV_RB16)
1372 YUV_NBPS(10, LE, AV_RL16)
1373 YUV_NBPS(10, BE, AV_RB16)
1375 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1376 int width, uint32_t *unused)
1379 for (i=0; i<width; i++) {
1384 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1388 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1389 const uint8_t *src2, int width, uint32_t *unused)
1392 for (i=0; i<width; i++) {
1393 int b= src1[3*i + 0];
1394 int g= src1[3*i + 1];
1395 int r= src1[3*i + 2];
1397 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1398 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1400 assert(src1 == src2);
1403 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1404 const uint8_t *src2, int width, uint32_t *unused)
1407 for (i=0; i<width; i++) {
1408 int b= src1[6*i + 0] + src1[6*i + 3];
1409 int g= src1[6*i + 1] + src1[6*i + 4];
1410 int r= src1[6*i + 2] + src1[6*i + 5];
1412 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1413 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1415 assert(src1 == src2);
1418 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1422 for (i=0; i<width; i++) {
1427 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1431 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1432 const uint8_t *src2, int width, uint32_t *unused)
1436 for (i=0; i<width; i++) {
1437 int r= src1[3*i + 0];
1438 int g= src1[3*i + 1];
1439 int b= src1[3*i + 2];
1441 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1442 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1446 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1447 const uint8_t *src2, int width, uint32_t *unused)
1451 for (i=0; i<width; i++) {
1452 int r= src1[6*i + 0] + src1[6*i + 3];
1453 int g= src1[6*i + 1] + src1[6*i + 4];
1454 int b= src1[6*i + 2] + src1[6*i + 5];
1456 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1457 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1462 // bilinear / bicubic scaling
1463 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1465 const int16_t *filter, const int16_t *filterPos,
1469 for (i=0; i<dstW; i++) {
1471 int srcPos= filterPos[i];
1473 for (j=0; j<filterSize; j++) {
1474 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1476 //filter += hFilterSize;
1477 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1482 //FIXME all pal and rgb srcFormats could do this convertion as well
1483 //FIXME all scalers more complex than bilinear could do half of this transform
1484 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1487 for (i = 0; i < width; i++) {
1488 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1489 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1492 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1495 for (i = 0; i < width; i++) {
1496 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1497 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1500 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1503 for (i = 0; i < width; i++)
1504 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1506 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1509 for (i = 0; i < width; i++)
1510 dst[i] = (dst[i]*14071 + 33561947)>>14;
1513 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1514 const uint8_t *src, int srcW, int xInc)
1517 unsigned int xpos=0;
1518 for (i=0;i<dstWidth;i++) {
1519 register unsigned int xx=xpos>>16;
1520 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1521 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1526 // *** horizontal scale Y line to temp buffer
1527 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1528 const uint8_t *src, int srcW, int xInc,
1529 const int16_t *hLumFilter,
1530 const int16_t *hLumFilterPos, int hLumFilterSize,
1531 uint8_t *formatConvBuffer,
1532 uint32_t *pal, int isAlpha)
1534 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1535 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1538 toYV12(formatConvBuffer, src, srcW, pal);
1539 src= formatConvBuffer;
1542 if (!c->hyscale_fast) {
1543 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1544 } else { // fast bilinear upscale / crap downscale
1545 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1549 convertRange(dst, dstWidth);
1552 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1553 int dstWidth, const uint8_t *src1,
1554 const uint8_t *src2, int srcW, int xInc)
1557 unsigned int xpos=0;
1558 for (i=0;i<dstWidth;i++) {
1559 register unsigned int xx=xpos>>16;
1560 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1561 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1562 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1567 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1568 const uint8_t *src1, const uint8_t *src2,
1569 int srcW, int xInc, const int16_t *hChrFilter,
1570 const int16_t *hChrFilterPos, int hChrFilterSize,
1571 uint8_t *formatConvBuffer, uint32_t *pal)
1574 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1575 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1576 src1= formatConvBuffer;
1580 if (!c->hcscale_fast) {
1581 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1582 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1583 } else { // fast bilinear upscale / crap downscale
1584 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1587 if (c->chrConvertRange)
1588 c->chrConvertRange(dst1, dst2, dstWidth);
1591 static av_always_inline void
1592 find_c_packed_planar_out_funcs(SwsContext *c,
1593 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1594 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1595 yuv2packedX_fn *yuv2packedX)
1597 enum PixelFormat dstFormat = c->dstFormat;
1599 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1600 *yuv2yuvX = yuv2nv12X_c;
1601 } else if (is16BPS(dstFormat)) {
1602 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1603 } else if (is9_OR_10BPS(dstFormat)) {
1604 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1605 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1607 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1610 *yuv2yuv1 = yuv2yuv1_c;
1611 *yuv2yuvX = yuv2yuvX_c;
1613 if(c->flags & SWS_FULL_CHR_H_INT) {
1614 *yuv2packedX = yuv2rgbX_c_full;
1616 switch (dstFormat) {
1617 case PIX_FMT_GRAY16BE:
1618 *yuv2packed1 = yuv2gray16BE_1_c;
1619 *yuv2packed2 = yuv2gray16BE_2_c;
1620 *yuv2packedX = yuv2gray16BE_X_c;
1622 case PIX_FMT_GRAY16LE:
1623 *yuv2packed1 = yuv2gray16LE_1_c;
1624 *yuv2packed2 = yuv2gray16LE_2_c;
1625 *yuv2packedX = yuv2gray16LE_X_c;
1628 *yuv2packed1 = yuv2packed1_c;
1629 *yuv2packed2 = yuv2packed2_c;
1630 *yuv2packedX = yuv2packedX_c;
1636 #define DEBUG_SWSCALE_BUFFERS 0
1637 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1639 static int swScale(SwsContext *c, const uint8_t* src[],
1640 int srcStride[], int srcSliceY,
1641 int srcSliceH, uint8_t* dst[], int dstStride[])
1643 /* load a few things into local vars to make the code more readable? and faster */
1644 const int srcW= c->srcW;
1645 const int dstW= c->dstW;
1646 const int dstH= c->dstH;
1647 const int chrDstW= c->chrDstW;
1648 const int chrSrcW= c->chrSrcW;
1649 const int lumXInc= c->lumXInc;
1650 const int chrXInc= c->chrXInc;
1651 const enum PixelFormat dstFormat= c->dstFormat;
1652 const int flags= c->flags;
1653 int16_t *vLumFilterPos= c->vLumFilterPos;
1654 int16_t *vChrFilterPos= c->vChrFilterPos;
1655 int16_t *hLumFilterPos= c->hLumFilterPos;
1656 int16_t *hChrFilterPos= c->hChrFilterPos;
1657 int16_t *vLumFilter= c->vLumFilter;
1658 int16_t *vChrFilter= c->vChrFilter;
1659 int16_t *hLumFilter= c->hLumFilter;
1660 int16_t *hChrFilter= c->hChrFilter;
1661 int32_t *lumMmxFilter= c->lumMmxFilter;
1662 int32_t *chrMmxFilter= c->chrMmxFilter;
1663 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1664 const int vLumFilterSize= c->vLumFilterSize;
1665 const int vChrFilterSize= c->vChrFilterSize;
1666 const int hLumFilterSize= c->hLumFilterSize;
1667 const int hChrFilterSize= c->hChrFilterSize;
1668 int16_t **lumPixBuf= c->lumPixBuf;
1669 int16_t **chrUPixBuf= c->chrUPixBuf;
1670 int16_t **chrVPixBuf= c->chrVPixBuf;
1671 int16_t **alpPixBuf= c->alpPixBuf;
1672 const int vLumBufSize= c->vLumBufSize;
1673 const int vChrBufSize= c->vChrBufSize;
1674 uint8_t *formatConvBuffer= c->formatConvBuffer;
1675 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1676 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1678 uint32_t *pal=c->pal_yuv;
1679 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1680 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1681 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1682 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1683 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1685 /* vars which will change and which we need to store back in the context */
1687 int lumBufIndex= c->lumBufIndex;
1688 int chrBufIndex= c->chrBufIndex;
1689 int lastInLumBuf= c->lastInLumBuf;
1690 int lastInChrBuf= c->lastInChrBuf;
1692 if (isPacked(c->srcFormat)) {
1700 srcStride[3]= srcStride[0];
1702 srcStride[1]<<= c->vChrDrop;
1703 srcStride[2]<<= c->vChrDrop;
1705 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1706 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1707 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1708 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1709 srcSliceY, srcSliceH, dstY, dstH);
1710 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1711 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1713 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1714 static int warnedAlready=0; //FIXME move this into the context perhaps
1715 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1716 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1717 " ->cannot do aligned memory accesses anymore\n");
1722 /* Note the user might start scaling the picture in the middle so this
1723 will not get executed. This is not really intended but works
1724 currently, so people might do it. */
1725 if (srcSliceY ==0) {
1735 for (;dstY < dstH; dstY++) {
1736 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1737 const int chrDstY= dstY>>c->chrDstVSubSample;
1738 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1739 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1740 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1742 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1743 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1744 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1745 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1746 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1747 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1750 //handle holes (FAST_BILINEAR & weird filters)
1751 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1752 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1753 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1754 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1756 DEBUG_BUFFERS("dstY: %d\n", dstY);
1757 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1758 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1759 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1760 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1762 // Do we have enough lines in this slice to output the dstY line
1763 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1765 if (!enough_lines) {
1766 lastLumSrcY = srcSliceY + srcSliceH - 1;
1767 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1768 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1769 lastLumSrcY, lastChrSrcY);
1772 //Do horizontal scaling
1773 while(lastInLumBuf < lastLumSrcY) {
1774 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1775 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1777 assert(lumBufIndex < 2*vLumBufSize);
1778 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1779 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1780 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1781 hLumFilter, hLumFilterPos, hLumFilterSize,
1784 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1785 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1786 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1790 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1791 lumBufIndex, lastInLumBuf);
1793 while(lastInChrBuf < lastChrSrcY) {
1794 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1795 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1797 assert(chrBufIndex < 2*vChrBufSize);
1798 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1799 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1800 //FIXME replace parameters through context struct (some at least)
1802 if (c->needs_hcscale)
1803 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1804 chrDstW, src1, src2, chrSrcW, chrXInc,
1805 hChrFilter, hChrFilterPos, hChrFilterSize,
1806 formatConvBuffer, pal);
1808 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1809 chrBufIndex, lastInChrBuf);
1811 //wrap buf index around to stay inside the ring buffer
1812 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1813 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1815 break; //we can't output a dstY line so let's try with the next slice
1818 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1820 if (dstY >= dstH-2) {
1821 // hmm looks like we can't use MMX here without overwriting this array's tail
1822 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1823 &yuv2packed1, &yuv2packed2,
1828 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1829 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1830 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1831 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1832 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1833 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1834 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1835 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1836 const int16_t *lumBuf = lumSrcPtr[0];
1837 const int16_t *chrUBuf= chrUSrcPtr[0];
1838 const int16_t *chrVBuf= chrVSrcPtr[0];
1839 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1840 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1841 uDest, vDest, aDest, dstW, chrDstW);
1842 } else { //General YV12
1844 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1845 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1846 chrVSrcPtr, vChrFilterSize,
1847 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1850 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1851 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1852 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1853 int chrAlpha= vChrFilter[2*dstY+1];
1854 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1855 *chrVSrcPtr, *(chrVSrcPtr+1),
1856 alpPixBuf ? *alpSrcPtr : NULL,
1857 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1858 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1859 int lumAlpha= vLumFilter[2*dstY+1];
1860 int chrAlpha= vChrFilter[2*dstY+1];
1862 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1864 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1865 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1866 *chrVSrcPtr, *(chrVSrcPtr+1),
1867 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1868 dest, dstW, lumAlpha, chrAlpha, dstY);
1869 } else { //general RGB
1871 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1872 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1873 alpSrcPtr, dest, dstW, dstY);
1879 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1880 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1883 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1884 __asm__ volatile("sfence":::"memory");
1888 /* store changed local vars back in the context */
1890 c->lumBufIndex= lumBufIndex;
1891 c->chrBufIndex= chrBufIndex;
1892 c->lastInLumBuf= lastInLumBuf;
1893 c->lastInChrBuf= lastInChrBuf;
1895 return dstY - lastDstY;
1898 static av_cold void sws_init_swScale_c(SwsContext *c)
1900 enum PixelFormat srcFormat = c->srcFormat;
1902 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1903 &c->yuv2packed1, &c->yuv2packed2,
1906 c->hScale = hScale_c;
1908 if (c->flags & SWS_FAST_BILINEAR) {
1909 c->hyscale_fast = hyscale_fast_c;
1910 c->hcscale_fast = hcscale_fast_c;
1913 c->chrToYV12 = NULL;
1915 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1916 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1917 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1918 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1922 case PIX_FMT_BGR4_BYTE:
1923 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1924 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1925 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1926 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1927 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1928 case PIX_FMT_YUV420P16BE:
1929 case PIX_FMT_YUV422P16BE:
1930 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1931 case PIX_FMT_YUV420P16LE:
1932 case PIX_FMT_YUV422P16LE:
1933 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1935 if (c->chrSrcHSubSample) {
1937 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1938 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1939 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1940 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1941 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1942 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1943 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1944 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1945 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1946 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1947 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1948 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1949 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1950 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1954 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
1955 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
1956 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
1957 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
1958 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
1959 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
1960 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
1961 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
1962 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
1963 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
1964 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
1965 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
1966 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
1967 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
1971 c->lumToYV12 = NULL;
1972 c->alpToYV12 = NULL;
1973 switch (srcFormat) {
1974 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
1975 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
1976 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
1977 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
1978 case PIX_FMT_YUYV422 :
1979 case PIX_FMT_YUV420P16BE:
1980 case PIX_FMT_YUV422P16BE:
1981 case PIX_FMT_YUV444P16BE:
1982 case PIX_FMT_Y400A :
1983 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
1984 case PIX_FMT_UYVY422 :
1985 case PIX_FMT_YUV420P16LE:
1986 case PIX_FMT_YUV422P16LE:
1987 case PIX_FMT_YUV444P16LE:
1988 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
1989 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
1990 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
1991 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
1992 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
1993 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
1994 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
1998 case PIX_FMT_BGR4_BYTE:
1999 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2000 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2001 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2002 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2003 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2004 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2005 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2006 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2007 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2008 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2009 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2012 switch (srcFormat) {
2014 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2016 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2017 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2021 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2023 c->lumConvertRange = lumRangeFromJpeg_c;
2024 c->chrConvertRange = chrRangeFromJpeg_c;
2026 c->lumConvertRange = lumRangeToJpeg_c;
2027 c->chrConvertRange = chrRangeToJpeg_c;
2031 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2032 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2033 c->needs_hcscale = 1;
2036 SwsFunc ff_getSwsFunc(SwsContext *c)
2038 sws_init_swScale_c(c);
2041 ff_sws_init_swScale_mmx(c);
2043 ff_sws_init_swScale_altivec(c);