2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
333 const int16_t **lumSrc, int lumFilterSize,
334 const int16_t *chrFilter, const int16_t **chrUSrc,
335 const int16_t **chrVSrc, int chrFilterSize,
336 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
337 uint8_t *vDest, uint8_t *aDest,
338 int dstW, int chrDstW)
340 enum PixelFormat dstFormat = c->dstFormat;
342 //FIXME Optimize (just quickly written not optimized..)
344 for (i=0; i<dstW; i++) {
347 for (j=0; j<lumFilterSize; j++)
348 val += lumSrc[j][i] * lumFilter[j];
350 dest[i]= av_clip_uint8(val>>19);
356 if (dstFormat == PIX_FMT_NV12)
357 for (i=0; i<chrDstW; i++) {
361 for (j=0; j<chrFilterSize; j++) {
362 u += chrUSrc[j][i] * chrFilter[j];
363 v += chrVSrc[j][i] * chrFilter[j];
366 uDest[2*i]= av_clip_uint8(u>>19);
367 uDest[2*i+1]= av_clip_uint8(v>>19);
370 for (i=0; i<chrDstW; i++) {
374 for (j=0; j<chrFilterSize; j++) {
375 u += chrUSrc[j][i] * chrFilter[j];
376 v += chrVSrc[j][i] * chrFilter[j];
379 uDest[2*i]= av_clip_uint8(v>>19);
380 uDest[2*i+1]= av_clip_uint8(u>>19);
384 static av_always_inline void
385 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
386 const int16_t **lumSrc, int lumFilterSize,
387 const int16_t *chrFilter, const int16_t **chrUSrc,
388 const int16_t **chrVSrc, int chrFilterSize,
389 const int16_t **alpSrc, uint8_t *dest, int dstW,
390 int y, enum PixelFormat target)
394 #define output_pixel(pos, val) \
395 if (target == PIX_FMT_GRAY16BE) { \
400 for (i = 0; i < (dstW >> 1); i++) {
404 const int i2 = 2 * i;
406 for (j = 0; j < lumFilterSize; j++) {
407 Y1 += lumSrc[j][i2] * lumFilter[j];
408 Y2 += lumSrc[j][i2+1] * lumFilter[j];
412 if ((Y1 | Y2) & 0x10000) {
413 Y1 = av_clip_uint16(Y1);
414 Y2 = av_clip_uint16(Y2);
416 output_pixel(&dest[2 * i2 + 0], Y1);
417 output_pixel(&dest[2 * i2 + 2], Y2);
421 static av_always_inline void
422 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
423 const uint16_t *buf1, const uint16_t *ubuf0,
424 const uint16_t *ubuf1, const uint16_t *vbuf0,
425 const uint16_t *vbuf1, const uint16_t *abuf0,
426 const uint16_t *abuf1, uint8_t *dest, int dstW,
427 int yalpha, int uvalpha, int y,
428 enum PixelFormat target)
430 int yalpha1 = 4095 - yalpha; \
433 for (i = 0; i < (dstW >> 1); i++) {
434 const int i2 = 2 * i;
435 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
436 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
438 output_pixel(&dest[2 * i2 + 0], Y1);
439 output_pixel(&dest[2 * i2 + 2], Y2);
443 static av_always_inline void
444 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
445 const uint16_t *ubuf0, const uint16_t *ubuf1,
446 const uint16_t *vbuf0, const uint16_t *vbuf1,
447 const uint16_t *abuf0, uint8_t *dest, int dstW,
448 int uvalpha, enum PixelFormat dstFormat,
449 int flags, int y, enum PixelFormat target)
453 for (i = 0; i < (dstW >> 1); i++) {
454 const int i2 = 2 * i;
455 int Y1 = buf0[i2 ] << 1;
456 int Y2 = buf0[i2+1] << 1;
458 output_pixel(&dest[2 * i2 + 0], Y1);
459 output_pixel(&dest[2 * i2 + 2], Y2);
464 #define YUV2PACKEDWRAPPER(name, ext, fmt) \
465 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
466 const int16_t **lumSrc, int lumFilterSize, \
467 const int16_t *chrFilter, const int16_t **chrUSrc, \
468 const int16_t **chrVSrc, int chrFilterSize, \
469 const int16_t **alpSrc, uint8_t *dest, int dstW, \
472 name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
473 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
474 alpSrc, dest, dstW, y, fmt); \
477 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
478 const uint16_t *buf1, const uint16_t *ubuf0, \
479 const uint16_t *ubuf1, const uint16_t *vbuf0, \
480 const uint16_t *vbuf1, const uint16_t *abuf0, \
481 const uint16_t *abuf1, uint8_t *dest, int dstW, \
482 int yalpha, int uvalpha, int y) \
484 name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
485 vbuf0, vbuf1, abuf0, abuf1, \
486 dest, dstW, yalpha, uvalpha, y, fmt); \
489 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
490 const uint16_t *ubuf0, const uint16_t *ubuf1, \
491 const uint16_t *vbuf0, const uint16_t *vbuf1, \
492 const uint16_t *abuf0, uint8_t *dest, int dstW, \
493 int uvalpha, enum PixelFormat dstFormat, \
496 name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
497 vbuf1, abuf0, dest, dstW, uvalpha, \
498 dstFormat, flags, y, fmt); \
501 YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
502 YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
504 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
505 for (i=0; i<(dstW>>1); i++) {\
511 int av_unused A1, A2;\
512 type av_unused *r, *b, *g;\
515 for (j=0; j<lumFilterSize; j++) {\
516 Y1 += lumSrc[j][i2] * lumFilter[j];\
517 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
519 for (j=0; j<chrFilterSize; j++) {\
520 U += chrUSrc[j][i] * chrFilter[j];\
521 V += chrVSrc[j][i] * chrFilter[j];\
527 if ((Y1|Y2|U|V)&0x100) {\
528 Y1 = av_clip_uint8(Y1); \
529 Y2 = av_clip_uint8(Y2); \
530 U = av_clip_uint8(U); \
531 V = av_clip_uint8(V); \
536 for (j=0; j<lumFilterSize; j++) {\
537 A1 += alpSrc[j][i2 ] * lumFilter[j];\
538 A2 += alpSrc[j][i2+1] * lumFilter[j];\
542 if ((A1|A2)&0x100) {\
543 A1 = av_clip_uint8(A1); \
544 A2 = av_clip_uint8(A2); \
548 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
549 for (i=0; i<dstW; i++) {\
557 for (j=0; j<lumFilterSize; j++) {\
558 Y += lumSrc[j][i ] * lumFilter[j];\
560 for (j=0; j<chrFilterSize; j++) {\
561 U += chrUSrc[j][i] * chrFilter[j];\
562 V += chrVSrc[j][i] * chrFilter[j];\
569 for (j=0; j<lumFilterSize; j++)\
570 A += alpSrc[j][i ] * lumFilter[j];\
573 A = av_clip_uint8(A);\
575 Y-= c->yuv2rgb_y_offset;\
576 Y*= c->yuv2rgb_y_coeff;\
578 R= Y + V*c->yuv2rgb_v2r_coeff;\
579 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
580 B= Y + U*c->yuv2rgb_u2b_coeff;\
581 if ((R|G|B)&(0xC0000000)) {\
582 R = av_clip_uintp2(R, 30); \
583 G = av_clip_uintp2(G, 30); \
584 B = av_clip_uintp2(B, 30); \
587 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
588 YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
589 r = (type *)c->table_rV[V]; \
590 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
591 b = (type *)c->table_bU[U];
593 #define YSCALE_YUV_2_PACKED2_C(type,alpha) \
594 for (i=0; i<(dstW>>1); i++) { \
596 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
597 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
598 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
599 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
600 type av_unused *r, *b, *g; \
601 int av_unused A1, A2; \
603 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
604 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
607 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
608 YSCALE_YUV_2_PACKED2_C(type,alpha)\
609 r = (type *)c->table_rV[V];\
610 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
611 b = (type *)c->table_bU[U];
613 #define YSCALE_YUV_2_PACKED1_C(type,alpha) \
614 for (i=0; i<(dstW>>1); i++) {\
616 int Y1= buf0[i2 ]>>7;\
617 int Y2= buf0[i2+1]>>7;\
618 int U= (ubuf1[i])>>7;\
619 int V= (vbuf1[i])>>7;\
620 type av_unused *r, *b, *g;\
621 int av_unused A1, A2;\
627 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
628 YSCALE_YUV_2_PACKED1_C(type,alpha)\
629 r = (type *)c->table_rV[V];\
630 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
631 b = (type *)c->table_bU[U];
633 #define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
634 for (i=0; i<(dstW>>1); i++) {\
636 int Y1= buf0[i2 ]>>7;\
637 int Y2= buf0[i2+1]>>7;\
638 int U= (ubuf0[i] + ubuf1[i])>>8;\
639 int V= (vbuf0[i] + vbuf1[i])>>8;\
640 type av_unused *r, *b, *g;\
641 int av_unused A1, A2;\
647 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
648 YSCALE_YUV_2_PACKED1B_C(type,alpha)\
649 r = (type *)c->table_rV[V];\
650 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
651 b = (type *)c->table_bU[U];
653 #define YSCALE_YUV_2_MONO2_C \
654 const uint8_t * const d128=dither_8x8_220[y&7];\
655 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
656 for (i=0; i<dstW-7; i+=8) {\
658 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
659 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
660 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
661 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
662 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
663 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
664 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
665 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
666 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
670 #define YSCALE_YUV_2_MONOX_C \
671 const uint8_t * const d128=dither_8x8_220[y&7];\
672 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
674 for (i=0; i<dstW-1; i+=2) {\
679 for (j=0; j<lumFilterSize; j++) {\
680 Y1 += lumSrc[j][i] * lumFilter[j];\
681 Y2 += lumSrc[j][i+1] * lumFilter[j];\
685 if ((Y1|Y2)&0x100) {\
686 Y1 = av_clip_uint8(Y1); \
687 Y2 = av_clip_uint8(Y2); \
689 acc+= acc + g[Y1+d128[(i+0)&7]];\
690 acc+= acc + g[Y2+d128[(i+1)&7]];\
692 ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
697 #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
698 switch(c->dstFormat) {\
699 case PIX_FMT_RGB48BE:\
700 case PIX_FMT_RGB48LE:\
702 ((uint8_t*)dest)[ 0]= r[Y1];\
703 ((uint8_t*)dest)[ 1]= r[Y1];\
704 ((uint8_t*)dest)[ 2]= g[Y1];\
705 ((uint8_t*)dest)[ 3]= g[Y1];\
706 ((uint8_t*)dest)[ 4]= b[Y1];\
707 ((uint8_t*)dest)[ 5]= b[Y1];\
708 ((uint8_t*)dest)[ 6]= r[Y2];\
709 ((uint8_t*)dest)[ 7]= r[Y2];\
710 ((uint8_t*)dest)[ 8]= g[Y2];\
711 ((uint8_t*)dest)[ 9]= g[Y2];\
712 ((uint8_t*)dest)[10]= b[Y2];\
713 ((uint8_t*)dest)[11]= b[Y2];\
717 case PIX_FMT_BGR48BE:\
718 case PIX_FMT_BGR48LE:\
720 ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
721 ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
722 ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
723 ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
724 ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
725 ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
732 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
733 func(uint32_t,needAlpha)\
734 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
735 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
738 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
740 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
741 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
745 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
746 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
754 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
755 func(uint32_t,needAlpha)\
756 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
757 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
760 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
762 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
763 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
767 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
768 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
775 ((uint8_t*)dest)[0]= r[Y1];\
776 ((uint8_t*)dest)[1]= g[Y1];\
777 ((uint8_t*)dest)[2]= b[Y1];\
778 ((uint8_t*)dest)[3]= r[Y2];\
779 ((uint8_t*)dest)[4]= g[Y2];\
780 ((uint8_t*)dest)[5]= b[Y2];\
786 ((uint8_t*)dest)[0]= b[Y1];\
787 ((uint8_t*)dest)[1]= g[Y1];\
788 ((uint8_t*)dest)[2]= r[Y1];\
789 ((uint8_t*)dest)[3]= b[Y2];\
790 ((uint8_t*)dest)[4]= g[Y2];\
791 ((uint8_t*)dest)[5]= r[Y2];\
795 case PIX_FMT_RGB565BE:\
796 case PIX_FMT_RGB565LE:\
797 case PIX_FMT_BGR565BE:\
798 case PIX_FMT_BGR565LE:\
800 const int dr1= dither_2x2_8[y&1 ][0];\
801 const int dg1= dither_2x2_4[y&1 ][0];\
802 const int db1= dither_2x2_8[(y&1)^1][0];\
803 const int dr2= dither_2x2_8[y&1 ][1];\
804 const int dg2= dither_2x2_4[y&1 ][1];\
805 const int db2= dither_2x2_8[(y&1)^1][1];\
807 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
808 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
812 case PIX_FMT_RGB555BE:\
813 case PIX_FMT_RGB555LE:\
814 case PIX_FMT_BGR555BE:\
815 case PIX_FMT_BGR555LE:\
817 const int dr1= dither_2x2_8[y&1 ][0];\
818 const int dg1= dither_2x2_8[y&1 ][1];\
819 const int db1= dither_2x2_8[(y&1)^1][0];\
820 const int dr2= dither_2x2_8[y&1 ][1];\
821 const int dg2= dither_2x2_8[y&1 ][0];\
822 const int db2= dither_2x2_8[(y&1)^1][1];\
824 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
825 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
829 case PIX_FMT_RGB444BE:\
830 case PIX_FMT_RGB444LE:\
831 case PIX_FMT_BGR444BE:\
832 case PIX_FMT_BGR444LE:\
834 const int dr1= dither_4x4_16[y&3 ][0];\
835 const int dg1= dither_4x4_16[y&3 ][1];\
836 const int db1= dither_4x4_16[(y&3)^3][0];\
837 const int dr2= dither_4x4_16[y&3 ][1];\
838 const int dg2= dither_4x4_16[y&3 ][0];\
839 const int db2= dither_4x4_16[(y&3)^3][1];\
841 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
842 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
849 const uint8_t * const d64= dither_8x8_73[y&7];\
850 const uint8_t * const d32= dither_8x8_32[y&7];\
852 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
853 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
860 const uint8_t * const d64= dither_8x8_73 [y&7];\
861 const uint8_t * const d128=dither_8x8_220[y&7];\
863 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
864 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
868 case PIX_FMT_RGB4_BYTE:\
869 case PIX_FMT_BGR4_BYTE:\
871 const uint8_t * const d64= dither_8x8_73 [y&7];\
872 const uint8_t * const d128=dither_8x8_220[y&7];\
874 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
875 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
879 case PIX_FMT_MONOBLACK:\
880 case PIX_FMT_MONOWHITE:\
885 case PIX_FMT_YUYV422:\
887 ((uint8_t*)dest)[2*i2+0]= Y1;\
888 ((uint8_t*)dest)[2*i2+1]= U;\
889 ((uint8_t*)dest)[2*i2+2]= Y2;\
890 ((uint8_t*)dest)[2*i2+3]= V;\
893 case PIX_FMT_UYVY422:\
895 ((uint8_t*)dest)[2*i2+0]= U;\
896 ((uint8_t*)dest)[2*i2+1]= Y1;\
897 ((uint8_t*)dest)[2*i2+2]= V;\
898 ((uint8_t*)dest)[2*i2+3]= Y2;\
903 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
904 const int16_t **lumSrc, int lumFilterSize,
905 const int16_t *chrFilter, const int16_t **chrUSrc,
906 const int16_t **chrVSrc, int chrFilterSize,
907 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
910 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
913 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
914 const int16_t **lumSrc, int lumFilterSize,
915 const int16_t *chrFilter, const int16_t **chrUSrc,
916 const int16_t **chrVSrc, int chrFilterSize,
917 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
920 int step= c->dstFormatBpp/8;
923 switch(c->dstFormat) {
931 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
932 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
933 dest[aidx]= needAlpha ? A : 255;
940 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
941 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
949 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
966 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
967 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
968 dest[aidx]= needAlpha ? A : 255;
975 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
976 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
984 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
999 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1000 int width, int height,
1004 uint8_t *ptr = plane + stride*y;
1005 for (i=0; i<height; i++) {
1006 memset(ptr, val, width);
1011 static av_always_inline void
1012 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
1013 enum PixelFormat origin)
1016 for (i = 0; i < width; i++) {
1017 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1018 int a = input_pixel(&src[i*6+0]) >> 8;
1019 int g = input_pixel(&src[i*6+2]) >> 8;
1020 int c = input_pixel(&src[i*6+4]) >> 8;
1022 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? c : a)
1023 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? a : c)
1024 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1028 static av_always_inline void
1029 rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1030 const uint8_t *src1, const uint8_t *src2,
1031 int width, enum PixelFormat origin)
1035 for (i = 0; i < width; i++) {
1036 int a = input_pixel(&src1[6*i + 0]) >> 8;
1037 int g = input_pixel(&src1[6*i + 2]) >> 8;
1038 int c = input_pixel(&src1[6*i + 4]) >> 8;
1040 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1041 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1045 static av_always_inline void
1046 rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1047 const uint8_t *src1, const uint8_t *src2,
1048 int width, enum PixelFormat origin)
1052 for (i = 0; i < width; i++) {
1053 int a = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
1054 int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
1055 int c = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
1057 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1058 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1065 #define rgb48funcs(pattern, BE_LE, origin) \
1066 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
1067 int width, uint32_t *unused) \
1069 rgb48ToY_c_template(dst, src, width, origin); \
1072 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1073 const uint8_t *src1, const uint8_t *src2, \
1074 int width, uint32_t *unused) \
1076 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1079 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1080 const uint8_t *src1, const uint8_t *src2, \
1081 int width, uint32_t *unused) \
1083 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1086 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1087 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1088 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1089 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1091 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1092 static void name ## _c(uint8_t *dst, const uint8_t *src, \
1093 int width, uint32_t *unused)\
1096 for (i=0; i<width; i++) {\
1097 int b= (((const type*)src)[i]>>shb)&maskb;\
1098 int g= (((const type*)src)[i]>>shg)&maskg;\
1099 int r= (((const type*)src)[i]>>shr)&maskr;\
1101 dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1105 BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1106 BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1107 BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8)
1108 BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8)
1109 BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8)
1110 BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7)
1111 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1112 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1114 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1117 for (i=0; i<width; i++) {
1122 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1125 for (i=0; i<width; i++) {
1130 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
1131 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
1132 const uint8_t *src, const uint8_t *dummy, \
1133 int width, uint32_t *unused)\
1136 for (i=0; i<width; i++) {\
1137 int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
1138 int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
1139 int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
1141 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1142 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1145 static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
1146 const uint8_t *src, const uint8_t *dummy, \
1147 int width, uint32_t *unused)\
1150 for (i=0; i<width; i++) {\
1151 int pix0= ((const type*)src)[2*i+0]>>shp;\
1152 int pix1= ((const type*)src)[2*i+1]>>shp;\
1153 int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1154 int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1155 int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1156 g&= maskg|(2*maskg);\
1160 dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1161 dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1165 BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1166 BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1167 BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1168 BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8)
1169 BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8)
1170 BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7)
1171 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1172 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1174 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1177 for (i=0; i<width; i++) {
1180 dst[i]= pal[d] & 0xFF;
1184 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1185 const uint8_t *src1, const uint8_t *src2,
1186 int width, uint32_t *pal)
1189 assert(src1 == src2);
1190 for (i=0; i<width; i++) {
1191 int p= pal[src1[i]];
1198 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1199 int width, uint32_t *unused)
1202 for (i=0; i<width/8; i++) {
1205 dst[8*i+j]= ((d>>(7-j))&1)*255;
1209 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1210 int width, uint32_t *unused)
1213 for (i=0; i<width/8; i++) {
1216 dst[8*i+j]= ((d>>(7-j))&1)*255;
1220 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
1221 const int16_t *chrUSrc, const int16_t *chrVSrc,
1222 const int16_t *alpSrc,
1223 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
1224 uint8_t *aDest, int dstW, int chrDstW)
1227 for (i=0; i<dstW; i++) {
1228 int val= (lumSrc[i]+64)>>7;
1229 dest[i]= av_clip_uint8(val);
1233 for (i=0; i<chrDstW; i++) {
1234 int u=(chrUSrc[i]+64)>>7;
1235 int v=(chrVSrc[i]+64)>>7;
1236 uDest[i]= av_clip_uint8(u);
1237 vDest[i]= av_clip_uint8(v);
1240 if (CONFIG_SWSCALE_ALPHA && aDest)
1241 for (i=0; i<dstW; i++) {
1242 int val= (alpSrc[i]+64)>>7;
1243 aDest[i]= av_clip_uint8(val);
1248 * vertical bilinear scale YV12 to RGB
1250 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1251 const uint16_t *buf1, const uint16_t *ubuf0,
1252 const uint16_t *ubuf1, const uint16_t *vbuf0,
1253 const uint16_t *vbuf1, const uint16_t *abuf0,
1254 const uint16_t *abuf1, uint8_t *dest, int dstW,
1255 int yalpha, int uvalpha, int y)
1257 int yalpha1=4095- yalpha;
1258 int uvalpha1=4095-uvalpha;
1261 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
1265 * YV12 to RGB without scaling or interpolating
1267 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1268 const uint16_t *ubuf0, const uint16_t *ubuf1,
1269 const uint16_t *vbuf0, const uint16_t *vbuf1,
1270 const uint16_t *abuf0, uint8_t *dest, int dstW,
1271 int uvalpha, enum PixelFormat dstFormat,
1274 const int yalpha1=0;
1277 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1278 const int yalpha= 4096; //FIXME ...
1280 if (uvalpha < 2048) {
1281 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
1283 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
1287 //FIXME yuy2* can read up to 7 samples too much
1289 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1293 for (i=0; i<width; i++)
1297 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1298 const uint8_t *src2, int width, uint32_t *unused)
1301 for (i=0; i<width; i++) {
1302 dstU[i]= src1[4*i + 1];
1303 dstV[i]= src1[4*i + 3];
1305 assert(src1 == src2);
1308 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1309 const uint8_t *src2, int width, uint32_t *unused)
1312 for (i=0; i<width; i++) {
1313 dstU[i]= src1[2*i + 1];
1314 dstV[i]= src2[2*i + 1];
1318 /* This is almost identical to the previous, end exists only because
1319 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1320 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1324 for (i=0; i<width; i++)
1328 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1329 const uint8_t *src2, int width, uint32_t *unused)
1332 for (i=0; i<width; i++) {
1333 dstU[i]= src1[4*i + 0];
1334 dstV[i]= src1[4*i + 2];
1336 assert(src1 == src2);
1339 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1340 const uint8_t *src2, int width, uint32_t *unused)
1343 for (i=0; i<width; i++) {
1349 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1350 const uint8_t *src, int width)
1353 for (i = 0; i < width; i++) {
1354 dst1[i] = src[2*i+0];
1355 dst2[i] = src[2*i+1];
1359 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1360 const uint8_t *src1, const uint8_t *src2,
1361 int width, uint32_t *unused)
1363 nvXXtoUV_c(dstU, dstV, src1, width);
1366 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1367 const uint8_t *src1, const uint8_t *src2,
1368 int width, uint32_t *unused)
1370 nvXXtoUV_c(dstV, dstU, src1, width);
1373 // FIXME Maybe dither instead.
1374 static av_always_inline void
1375 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1376 const uint8_t *_srcU, const uint8_t *_srcV,
1377 int width, enum PixelFormat origin, int depth)
1380 const uint16_t *srcU = (const uint16_t *) _srcU;
1381 const uint16_t *srcV = (const uint16_t *) _srcV;
1383 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1384 for (i = 0; i < width; i++) {
1385 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1386 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1390 static av_always_inline void
1391 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1392 int width, enum PixelFormat origin, int depth)
1395 const uint16_t *srcY = (const uint16_t*)_srcY;
1397 for (i = 0; i < width; i++)
1398 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1402 #define YUV_NBPS(depth, BE_LE, origin) \
1403 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1404 const uint8_t *srcU, const uint8_t *srcV, \
1405 int width, uint32_t *unused) \
1407 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1409 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1410 int width, uint32_t *unused) \
1412 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1415 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1416 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1417 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1418 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1420 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1421 int width, uint32_t *unused)
1424 for (i=0; i<width; i++) {
1429 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1433 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1434 const uint8_t *src2, int width, uint32_t *unused)
1437 for (i=0; i<width; i++) {
1438 int b= src1[3*i + 0];
1439 int g= src1[3*i + 1];
1440 int r= src1[3*i + 2];
1442 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1443 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1445 assert(src1 == src2);
1448 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1449 const uint8_t *src2, int width, uint32_t *unused)
1452 for (i=0; i<width; i++) {
1453 int b= src1[6*i + 0] + src1[6*i + 3];
1454 int g= src1[6*i + 1] + src1[6*i + 4];
1455 int r= src1[6*i + 2] + src1[6*i + 5];
1457 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1458 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1460 assert(src1 == src2);
1463 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1467 for (i=0; i<width; i++) {
1472 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1476 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1477 const uint8_t *src2, int width, uint32_t *unused)
1481 for (i=0; i<width; i++) {
1482 int r= src1[3*i + 0];
1483 int g= src1[3*i + 1];
1484 int b= src1[3*i + 2];
1486 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1487 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1491 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1492 const uint8_t *src2, int width, uint32_t *unused)
1496 for (i=0; i<width; i++) {
1497 int r= src1[6*i + 0] + src1[6*i + 3];
1498 int g= src1[6*i + 1] + src1[6*i + 4];
1499 int b= src1[6*i + 2] + src1[6*i + 5];
1501 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1502 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1507 // bilinear / bicubic scaling
1508 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1510 const int16_t *filter, const int16_t *filterPos,
1514 for (i=0; i<dstW; i++) {
1516 int srcPos= filterPos[i];
1518 for (j=0; j<filterSize; j++) {
1519 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1521 //filter += hFilterSize;
1522 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1527 //FIXME all pal and rgb srcFormats could do this convertion as well
1528 //FIXME all scalers more complex than bilinear could do half of this transform
1529 static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1532 for (i = 0; i < width; i++) {
1533 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1534 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1537 static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
1540 for (i = 0; i < width; i++) {
1541 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1542 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1545 static void lumRangeToJpeg_c(uint16_t *dst, int width)
1548 for (i = 0; i < width; i++)
1549 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1551 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
1554 for (i = 0; i < width; i++)
1555 dst[i] = (dst[i]*14071 + 33561947)>>14;
1558 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1559 const uint8_t *src, int srcW, int xInc)
1562 unsigned int xpos=0;
1563 for (i=0;i<dstWidth;i++) {
1564 register unsigned int xx=xpos>>16;
1565 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1566 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1571 // *** horizontal scale Y line to temp buffer
1572 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1573 const uint8_t *src, int srcW, int xInc,
1574 const int16_t *hLumFilter,
1575 const int16_t *hLumFilterPos, int hLumFilterSize,
1576 uint8_t *formatConvBuffer,
1577 uint32_t *pal, int isAlpha)
1579 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1580 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1583 toYV12(formatConvBuffer, src, srcW, pal);
1584 src= formatConvBuffer;
1587 if (!c->hyscale_fast) {
1588 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1589 } else { // fast bilinear upscale / crap downscale
1590 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1594 convertRange(dst, dstWidth);
1597 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1598 int dstWidth, const uint8_t *src1,
1599 const uint8_t *src2, int srcW, int xInc)
1602 unsigned int xpos=0;
1603 for (i=0;i<dstWidth;i++) {
1604 register unsigned int xx=xpos>>16;
1605 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1606 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1607 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1612 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1613 const uint8_t *src1, const uint8_t *src2,
1614 int srcW, int xInc, const int16_t *hChrFilter,
1615 const int16_t *hChrFilterPos, int hChrFilterSize,
1616 uint8_t *formatConvBuffer, uint32_t *pal)
1619 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1620 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1621 src1= formatConvBuffer;
1625 if (!c->hcscale_fast) {
1626 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1627 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1628 } else { // fast bilinear upscale / crap downscale
1629 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1632 if (c->chrConvertRange)
1633 c->chrConvertRange(dst1, dst2, dstWidth);
1636 static av_always_inline void
1637 find_c_packed_planar_out_funcs(SwsContext *c,
1638 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1639 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1640 yuv2packedX_fn *yuv2packedX)
1642 enum PixelFormat dstFormat = c->dstFormat;
1644 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1645 *yuv2yuvX = yuv2nv12X_c;
1646 } else if (is16BPS(dstFormat)) {
1647 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1648 } else if (is9_OR_10BPS(dstFormat)) {
1649 if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
1650 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1652 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1655 *yuv2yuv1 = yuv2yuv1_c;
1656 *yuv2yuvX = yuv2yuvX_c;
1658 if(c->flags & SWS_FULL_CHR_H_INT) {
1659 *yuv2packedX = yuv2rgbX_c_full;
1661 switch (dstFormat) {
1662 case PIX_FMT_GRAY16BE:
1663 *yuv2packed1 = yuv2gray16BE_1_c;
1664 *yuv2packed2 = yuv2gray16BE_2_c;
1665 *yuv2packedX = yuv2gray16BE_X_c;
1667 case PIX_FMT_GRAY16LE:
1668 *yuv2packed1 = yuv2gray16LE_1_c;
1669 *yuv2packed2 = yuv2gray16LE_2_c;
1670 *yuv2packedX = yuv2gray16LE_X_c;
1673 *yuv2packed1 = yuv2packed1_c;
1674 *yuv2packed2 = yuv2packed2_c;
1675 *yuv2packedX = yuv2packedX_c;
1681 #define DEBUG_SWSCALE_BUFFERS 0
1682 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1684 static int swScale(SwsContext *c, const uint8_t* src[],
1685 int srcStride[], int srcSliceY,
1686 int srcSliceH, uint8_t* dst[], int dstStride[])
1688 /* load a few things into local vars to make the code more readable? and faster */
1689 const int srcW= c->srcW;
1690 const int dstW= c->dstW;
1691 const int dstH= c->dstH;
1692 const int chrDstW= c->chrDstW;
1693 const int chrSrcW= c->chrSrcW;
1694 const int lumXInc= c->lumXInc;
1695 const int chrXInc= c->chrXInc;
1696 const enum PixelFormat dstFormat= c->dstFormat;
1697 const int flags= c->flags;
1698 int16_t *vLumFilterPos= c->vLumFilterPos;
1699 int16_t *vChrFilterPos= c->vChrFilterPos;
1700 int16_t *hLumFilterPos= c->hLumFilterPos;
1701 int16_t *hChrFilterPos= c->hChrFilterPos;
1702 int16_t *vLumFilter= c->vLumFilter;
1703 int16_t *vChrFilter= c->vChrFilter;
1704 int16_t *hLumFilter= c->hLumFilter;
1705 int16_t *hChrFilter= c->hChrFilter;
1706 int32_t *lumMmxFilter= c->lumMmxFilter;
1707 int32_t *chrMmxFilter= c->chrMmxFilter;
1708 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1709 const int vLumFilterSize= c->vLumFilterSize;
1710 const int vChrFilterSize= c->vChrFilterSize;
1711 const int hLumFilterSize= c->hLumFilterSize;
1712 const int hChrFilterSize= c->hChrFilterSize;
1713 int16_t **lumPixBuf= c->lumPixBuf;
1714 int16_t **chrUPixBuf= c->chrUPixBuf;
1715 int16_t **chrVPixBuf= c->chrVPixBuf;
1716 int16_t **alpPixBuf= c->alpPixBuf;
1717 const int vLumBufSize= c->vLumBufSize;
1718 const int vChrBufSize= c->vChrBufSize;
1719 uint8_t *formatConvBuffer= c->formatConvBuffer;
1720 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1721 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1723 uint32_t *pal=c->pal_yuv;
1724 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
1725 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
1726 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1727 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1728 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1730 /* vars which will change and which we need to store back in the context */
1732 int lumBufIndex= c->lumBufIndex;
1733 int chrBufIndex= c->chrBufIndex;
1734 int lastInLumBuf= c->lastInLumBuf;
1735 int lastInChrBuf= c->lastInChrBuf;
1737 if (isPacked(c->srcFormat)) {
1745 srcStride[3]= srcStride[0];
1747 srcStride[1]<<= c->vChrDrop;
1748 srcStride[2]<<= c->vChrDrop;
1750 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1751 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1752 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1753 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1754 srcSliceY, srcSliceH, dstY, dstH);
1755 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1756 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1758 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1759 static int warnedAlready=0; //FIXME move this into the context perhaps
1760 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1761 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1762 " ->cannot do aligned memory accesses anymore\n");
1767 /* Note the user might start scaling the picture in the middle so this
1768 will not get executed. This is not really intended but works
1769 currently, so people might do it. */
1770 if (srcSliceY ==0) {
1780 for (;dstY < dstH; dstY++) {
1781 unsigned char *dest =dst[0]+dstStride[0]*dstY;
1782 const int chrDstY= dstY>>c->chrDstVSubSample;
1783 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
1784 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
1785 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
1787 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1788 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1789 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1790 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
1791 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
1792 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
1795 //handle holes (FAST_BILINEAR & weird filters)
1796 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1797 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1798 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1799 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1801 DEBUG_BUFFERS("dstY: %d\n", dstY);
1802 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1803 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1804 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1805 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1807 // Do we have enough lines in this slice to output the dstY line
1808 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1810 if (!enough_lines) {
1811 lastLumSrcY = srcSliceY + srcSliceH - 1;
1812 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1813 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1814 lastLumSrcY, lastChrSrcY);
1817 //Do horizontal scaling
1818 while(lastInLumBuf < lastLumSrcY) {
1819 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
1820 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
1822 assert(lumBufIndex < 2*vLumBufSize);
1823 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1824 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1825 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1826 hLumFilter, hLumFilterPos, hLumFilterSize,
1829 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1830 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
1831 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1835 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1836 lumBufIndex, lastInLumBuf);
1838 while(lastInChrBuf < lastChrSrcY) {
1839 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
1840 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
1842 assert(chrBufIndex < 2*vChrBufSize);
1843 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1844 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1845 //FIXME replace parameters through context struct (some at least)
1847 if (c->needs_hcscale)
1848 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1849 chrDstW, src1, src2, chrSrcW, chrXInc,
1850 hChrFilter, hChrFilterPos, hChrFilterSize,
1851 formatConvBuffer, pal);
1853 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1854 chrBufIndex, lastInChrBuf);
1856 //wrap buf index around to stay inside the ring buffer
1857 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1858 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1860 break; //we can't output a dstY line so let's try with the next slice
1863 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
1865 if (dstY >= dstH-2) {
1866 // hmm looks like we can't use MMX here without overwriting this array's tail
1867 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
1868 &yuv2packed1, &yuv2packed2,
1873 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
1874 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1875 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
1876 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
1877 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
1878 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
1879 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
1880 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
1881 const int16_t *lumBuf = lumSrcPtr[0];
1882 const int16_t *chrUBuf= chrUSrcPtr[0];
1883 const int16_t *chrVBuf= chrVSrcPtr[0];
1884 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
1885 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
1886 uDest, vDest, aDest, dstW, chrDstW);
1887 } else { //General YV12
1889 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
1890 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
1891 chrVSrcPtr, vChrFilterSize,
1892 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
1895 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
1896 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
1897 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
1898 int chrAlpha= vChrFilter[2*dstY+1];
1899 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
1900 *chrVSrcPtr, *(chrVSrcPtr+1),
1901 alpPixBuf ? *alpSrcPtr : NULL,
1902 dest, dstW, chrAlpha, dstFormat, flags, dstY);
1903 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
1904 int lumAlpha= vLumFilter[2*dstY+1];
1905 int chrAlpha= vChrFilter[2*dstY+1];
1907 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
1909 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
1910 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
1911 *chrVSrcPtr, *(chrVSrcPtr+1),
1912 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
1913 dest, dstW, lumAlpha, chrAlpha, dstY);
1914 } else { //general RGB
1916 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
1917 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
1918 alpSrcPtr, dest, dstW, dstY);
1924 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
1925 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
1928 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
1929 __asm__ volatile("sfence":::"memory");
1933 /* store changed local vars back in the context */
1935 c->lumBufIndex= lumBufIndex;
1936 c->chrBufIndex= chrBufIndex;
1937 c->lastInLumBuf= lastInLumBuf;
1938 c->lastInChrBuf= lastInChrBuf;
1940 return dstY - lastDstY;
1943 static av_cold void sws_init_swScale_c(SwsContext *c)
1945 enum PixelFormat srcFormat = c->srcFormat;
1947 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
1948 &c->yuv2packed1, &c->yuv2packed2,
1951 c->hScale = hScale_c;
1953 if (c->flags & SWS_FAST_BILINEAR) {
1954 c->hyscale_fast = hyscale_fast_c;
1955 c->hcscale_fast = hcscale_fast_c;
1958 c->chrToYV12 = NULL;
1960 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
1961 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
1962 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
1963 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
1967 case PIX_FMT_BGR4_BYTE:
1968 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
1969 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
1970 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
1971 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
1972 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
1973 case PIX_FMT_YUV420P16BE:
1974 case PIX_FMT_YUV422P16BE:
1975 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
1976 case PIX_FMT_YUV420P16LE:
1977 case PIX_FMT_YUV422P16LE:
1978 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
1980 if (c->chrSrcHSubSample) {
1982 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
1983 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
1984 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
1985 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
1986 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
1987 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
1988 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
1989 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
1990 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
1991 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
1992 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
1993 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
1994 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
1995 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
1999 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
2000 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
2001 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
2002 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
2003 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2004 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
2005 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2006 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
2007 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
2008 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2009 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
2010 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2011 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
2012 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
2016 c->lumToYV12 = NULL;
2017 c->alpToYV12 = NULL;
2018 switch (srcFormat) {
2019 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2020 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2021 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2022 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2023 case PIX_FMT_YUYV422 :
2024 case PIX_FMT_YUV420P16BE:
2025 case PIX_FMT_YUV422P16BE:
2026 case PIX_FMT_YUV444P16BE:
2027 case PIX_FMT_Y400A :
2028 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2029 case PIX_FMT_UYVY422 :
2030 case PIX_FMT_YUV420P16LE:
2031 case PIX_FMT_YUV422P16LE:
2032 case PIX_FMT_YUV444P16LE:
2033 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2034 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2035 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break;
2036 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break;
2037 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2038 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break;
2039 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break;
2043 case PIX_FMT_BGR4_BYTE:
2044 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2045 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2046 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2047 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2048 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2049 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2050 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2051 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2052 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2053 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2054 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2057 switch (srcFormat) {
2059 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2061 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2062 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2066 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2068 c->lumConvertRange = lumRangeFromJpeg_c;
2069 c->chrConvertRange = chrRangeFromJpeg_c;
2071 c->lumConvertRange = lumRangeToJpeg_c;
2072 c->chrConvertRange = chrRangeToJpeg_c;
2076 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2077 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2078 c->needs_hcscale = 1;
2081 SwsFunc ff_getSwsFunc(SwsContext *c)
2083 sws_init_swScale_c(c);
2086 ff_sws_init_swScale_mmx(c);
2088 ff_sws_init_swScale_altivec(c);