2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 static av_always_inline void
198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
199 int lumFilterSize, const int16_t *chrFilter,
200 const int16_t **chrUSrc, const int16_t **chrVSrc,
201 int chrFilterSize, const int16_t **alpSrc,
202 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
203 uint16_t *aDest, int dstW, int chrDstW,
204 int big_endian, int output_bits)
206 //FIXME Optimize (just quickly written not optimized..)
208 int shift = 11 + 16 - output_bits;
210 #define output_pixel(pos, val) \
212 if (output_bits == 16) { \
213 AV_WB16(pos, av_clip_uint16(val >> shift)); \
215 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
218 if (output_bits == 16) { \
219 AV_WL16(pos, av_clip_uint16(val >> shift)); \
221 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
224 for (i = 0; i < dstW; i++) {
225 int val = 1 << (26-output_bits);
228 for (j = 0; j < lumFilterSize; j++)
229 val += lumSrc[j][i] * lumFilter[j];
231 output_pixel(&dest[i], val);
235 for (i = 0; i < chrDstW; i++) {
236 int u = 1 << (26-output_bits);
237 int v = 1 << (26-output_bits);
240 for (j = 0; j < chrFilterSize; j++) {
241 u += chrUSrc[j][i] * chrFilter[j];
242 v += chrVSrc[j][i] * chrFilter[j];
245 output_pixel(&uDest[i], u);
246 output_pixel(&vDest[i], v);
250 if (CONFIG_SWSCALE_ALPHA && aDest) {
251 for (i = 0; i < dstW; i++) {
252 int val = 1 << (26-output_bits);
255 for (j = 0; j < lumFilterSize; j++)
256 val += alpSrc[j][i] * lumFilter[j];
258 output_pixel(&aDest[i], val);
264 #define yuv2NBPS(bits, BE_LE, is_be) \
265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
266 const int16_t **lumSrc, int lumFilterSize, \
267 const int16_t *chrFilter, const int16_t **chrUSrc, \
268 const int16_t **chrVSrc, \
269 int chrFilterSize, const int16_t **alpSrc, \
270 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
271 uint8_t *_aDest, int dstW, int chrDstW) \
273 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
274 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
275 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
276 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
278 dest, uDest, vDest, aDest, \
279 dstW, chrDstW, is_be, bits); \
288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
289 const int16_t **lumSrc, int lumFilterSize,
290 const int16_t *chrFilter, const int16_t **chrUSrc,
291 const int16_t **chrVSrc,
292 int chrFilterSize, const int16_t **alpSrc,
293 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
294 uint8_t *aDest, int dstW, int chrDstW)
296 //FIXME Optimize (just quickly written not optimized..)
298 for (i=0; i<dstW; i++) {
301 for (j=0; j<lumFilterSize; j++)
302 val += lumSrc[j][i] * lumFilter[j];
304 dest[i]= av_clip_uint8(val>>19);
308 for (i=0; i<chrDstW; i++) {
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 uDest[i]= av_clip_uint8(u>>19);
318 vDest[i]= av_clip_uint8(v>>19);
321 if (CONFIG_SWSCALE_ALPHA && aDest)
322 for (i=0; i<dstW; i++) {
325 for (j=0; j<lumFilterSize; j++)
326 val += alpSrc[j][i] * lumFilter[j];
328 aDest[i]= av_clip_uint8(val>>19);
332 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
333 const int16_t *chrUSrc, const int16_t *chrVSrc,
334 const int16_t *alpSrc,
335 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
336 uint8_t *aDest, int dstW, int chrDstW)
339 for (i=0; i<dstW; i++) {
340 int val= (lumSrc[i]+64)>>7;
341 dest[i]= av_clip_uint8(val);
345 for (i=0; i<chrDstW; i++) {
346 int u=(chrUSrc[i]+64)>>7;
347 int v=(chrVSrc[i]+64)>>7;
348 uDest[i]= av_clip_uint8(u);
349 vDest[i]= av_clip_uint8(v);
352 if (CONFIG_SWSCALE_ALPHA && aDest)
353 for (i=0; i<dstW; i++) {
354 int val= (alpSrc[i]+64)>>7;
355 aDest[i]= av_clip_uint8(val);
359 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
360 const int16_t **lumSrc, int lumFilterSize,
361 const int16_t *chrFilter, const int16_t **chrUSrc,
362 const int16_t **chrVSrc, int chrFilterSize,
363 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
364 uint8_t *vDest, uint8_t *aDest,
365 int dstW, int chrDstW)
367 enum PixelFormat dstFormat = c->dstFormat;
369 //FIXME Optimize (just quickly written not optimized..)
371 for (i=0; i<dstW; i++) {
374 for (j=0; j<lumFilterSize; j++)
375 val += lumSrc[j][i] * lumFilter[j];
377 dest[i]= av_clip_uint8(val>>19);
383 if (dstFormat == PIX_FMT_NV12)
384 for (i=0; i<chrDstW; i++) {
388 for (j=0; j<chrFilterSize; j++) {
389 u += chrUSrc[j][i] * chrFilter[j];
390 v += chrVSrc[j][i] * chrFilter[j];
393 uDest[2*i]= av_clip_uint8(u>>19);
394 uDest[2*i+1]= av_clip_uint8(v>>19);
397 for (i=0; i<chrDstW; i++) {
401 for (j=0; j<chrFilterSize; j++) {
402 u += chrUSrc[j][i] * chrFilter[j];
403 v += chrVSrc[j][i] * chrFilter[j];
406 uDest[2*i]= av_clip_uint8(v>>19);
407 uDest[2*i+1]= av_clip_uint8(u>>19);
411 #define output_pixel(pos, val) \
412 if (target == PIX_FMT_GRAY16BE) { \
418 static av_always_inline void
419 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
420 const int16_t **lumSrc, int lumFilterSize,
421 const int16_t *chrFilter, const int16_t **chrUSrc,
422 const int16_t **chrVSrc, int chrFilterSize,
423 const int16_t **alpSrc, uint8_t *dest, int dstW,
424 int y, enum PixelFormat target)
428 for (i = 0; i < (dstW >> 1); i++) {
432 const int i2 = 2 * i;
434 for (j = 0; j < lumFilterSize; j++) {
435 Y1 += lumSrc[j][i2] * lumFilter[j];
436 Y2 += lumSrc[j][i2+1] * lumFilter[j];
440 if ((Y1 | Y2) & 0x10000) {
441 Y1 = av_clip_uint16(Y1);
442 Y2 = av_clip_uint16(Y2);
444 output_pixel(&dest[2 * i2 + 0], Y1);
445 output_pixel(&dest[2 * i2 + 2], Y2);
449 static av_always_inline void
450 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
451 const uint16_t *buf1, const uint16_t *ubuf0,
452 const uint16_t *ubuf1, const uint16_t *vbuf0,
453 const uint16_t *vbuf1, const uint16_t *abuf0,
454 const uint16_t *abuf1, uint8_t *dest, int dstW,
455 int yalpha, int uvalpha, int y,
456 enum PixelFormat target)
458 int yalpha1 = 4095 - yalpha; \
461 for (i = 0; i < (dstW >> 1); i++) {
462 const int i2 = 2 * i;
463 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
464 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
466 output_pixel(&dest[2 * i2 + 0], Y1);
467 output_pixel(&dest[2 * i2 + 2], Y2);
471 static av_always_inline void
472 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
473 const uint16_t *ubuf0, const uint16_t *ubuf1,
474 const uint16_t *vbuf0, const uint16_t *vbuf1,
475 const uint16_t *abuf0, uint8_t *dest, int dstW,
476 int uvalpha, enum PixelFormat dstFormat,
477 int flags, int y, enum PixelFormat target)
481 for (i = 0; i < (dstW >> 1); i++) {
482 const int i2 = 2 * i;
483 int Y1 = buf0[i2 ] << 1;
484 int Y2 = buf0[i2+1] << 1;
486 output_pixel(&dest[2 * i2 + 0], Y1);
487 output_pixel(&dest[2 * i2 + 2], Y2);
493 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
494 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
495 const int16_t **lumSrc, int lumFilterSize, \
496 const int16_t *chrFilter, const int16_t **chrUSrc, \
497 const int16_t **chrVSrc, int chrFilterSize, \
498 const int16_t **alpSrc, uint8_t *dest, int dstW, \
501 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
502 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
503 alpSrc, dest, dstW, y, fmt); \
506 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
507 const uint16_t *buf1, const uint16_t *ubuf0, \
508 const uint16_t *ubuf1, const uint16_t *vbuf0, \
509 const uint16_t *vbuf1, const uint16_t *abuf0, \
510 const uint16_t *abuf1, uint8_t *dest, int dstW, \
511 int yalpha, int uvalpha, int y) \
513 name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
514 vbuf0, vbuf1, abuf0, abuf1, \
515 dest, dstW, yalpha, uvalpha, y, fmt); \
518 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
519 const uint16_t *ubuf0, const uint16_t *ubuf1, \
520 const uint16_t *vbuf0, const uint16_t *vbuf1, \
521 const uint16_t *abuf0, uint8_t *dest, int dstW, \
522 int uvalpha, enum PixelFormat dstFormat, \
525 name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
526 vbuf1, abuf0, dest, dstW, uvalpha, \
527 dstFormat, flags, y, fmt); \
530 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
531 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
533 #define output_pixel(pos, acc) \
534 if (target == PIX_FMT_MONOBLACK) { \
540 static av_always_inline void
541 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
542 const int16_t **lumSrc, int lumFilterSize,
543 const int16_t *chrFilter, const int16_t **chrUSrc,
544 const int16_t **chrVSrc, int chrFilterSize,
545 const int16_t **alpSrc, uint8_t *dest, int dstW,
546 int y, enum PixelFormat target)
548 const uint8_t * const d128=dither_8x8_220[y&7];
549 uint8_t *g = c->table_gU[128] + c->table_gV[128];
553 for (i = 0; i < dstW - 1; i += 2) {
558 for (j = 0; j < lumFilterSize; j++) {
559 Y1 += lumSrc[j][i] * lumFilter[j];
560 Y2 += lumSrc[j][i+1] * lumFilter[j];
564 if ((Y1 | Y2) & 0x100) {
565 Y1 = av_clip_uint8(Y1);
566 Y2 = av_clip_uint8(Y2);
568 acc += acc + g[Y1 + d128[(i + 0) & 7]];
569 acc += acc + g[Y2 + d128[(i + 1) & 7]];
571 output_pixel(*dest++, acc);
576 static av_always_inline void
577 yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
578 const uint16_t *buf1, const uint16_t *ubuf0,
579 const uint16_t *ubuf1, const uint16_t *vbuf0,
580 const uint16_t *vbuf1, const uint16_t *abuf0,
581 const uint16_t *abuf1, uint8_t *dest, int dstW,
582 int yalpha, int uvalpha, int y,
583 enum PixelFormat target)
585 const uint8_t * const d128 = dither_8x8_220[y & 7];
586 uint8_t *g = c->table_gU[128] + c->table_gV[128];
587 int yalpha1 = 4095 - yalpha;
590 for (i = 0; i < dstW - 7; i += 8) {
591 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
592 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
593 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
594 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
595 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
596 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
597 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
598 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
599 output_pixel(*dest++, acc);
603 static av_always_inline void
604 yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
605 const uint16_t *ubuf0, const uint16_t *ubuf1,
606 const uint16_t *vbuf0, const uint16_t *vbuf1,
607 const uint16_t *abuf0, uint8_t *dest, int dstW,
608 int uvalpha, enum PixelFormat dstFormat,
609 int flags, int y, enum PixelFormat target)
611 const uint8_t * const d128 = dither_8x8_220[y & 7];
612 uint8_t *g = c->table_gU[128] + c->table_gV[128];
615 for (i = 0; i < dstW - 7; i += 8) {
616 int acc = g[(buf0[i ] >> 7) + d128[0]];
617 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
618 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
619 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
620 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
621 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
622 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
623 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
624 output_pixel(*dest++, acc);
630 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
631 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
633 #define output_pixels(pos, Y1, U, Y2, V) \
634 if (target == PIX_FMT_YUYV422) { \
635 dest[pos + 0] = Y1; \
637 dest[pos + 2] = Y2; \
641 dest[pos + 1] = Y1; \
643 dest[pos + 3] = Y2; \
646 static av_always_inline void
647 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
648 const int16_t **lumSrc, int lumFilterSize,
649 const int16_t *chrFilter, const int16_t **chrUSrc,
650 const int16_t **chrVSrc, int chrFilterSize,
651 const int16_t **alpSrc, uint8_t *dest, int dstW,
652 int y, enum PixelFormat target)
656 for (i = 0; i < (dstW >> 1); i++) {
663 for (j = 0; j < lumFilterSize; j++) {
664 Y1 += lumSrc[j][i * 2] * lumFilter[j];
665 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
667 for (j = 0; j < chrFilterSize; j++) {
668 U += chrUSrc[j][i] * chrFilter[j];
669 V += chrVSrc[j][i] * chrFilter[j];
675 if ((Y1 | Y2 | U | V) & 0x100) {
676 Y1 = av_clip_uint8(Y1);
677 Y2 = av_clip_uint8(Y2);
678 U = av_clip_uint8(U);
679 V = av_clip_uint8(V);
681 output_pixels(4*i, Y1, U, Y2, V);
685 static av_always_inline void
686 yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
687 const uint16_t *buf1, const uint16_t *ubuf0,
688 const uint16_t *ubuf1, const uint16_t *vbuf0,
689 const uint16_t *vbuf1, const uint16_t *abuf0,
690 const uint16_t *abuf1, uint8_t *dest, int dstW,
691 int yalpha, int uvalpha, int y,
692 enum PixelFormat target)
694 int yalpha1 = 4095 - yalpha;
695 int uvalpha1 = 4095 - uvalpha;
698 for (i = 0; i < (dstW >> 1); i++) {
699 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
700 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
701 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
702 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
704 output_pixels(i * 4, Y1, U, Y2, V);
708 static av_always_inline void
709 yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
710 const uint16_t *ubuf0, const uint16_t *ubuf1,
711 const uint16_t *vbuf0, const uint16_t *vbuf1,
712 const uint16_t *abuf0, uint8_t *dest, int dstW,
713 int uvalpha, enum PixelFormat dstFormat,
714 int flags, int y, enum PixelFormat target)
718 if (uvalpha < 2048) {
719 for (i = 0; i < (dstW >> 1); i++) {
720 int Y1 = buf0[i * 2] >> 7;
721 int Y2 = buf0[i * 2 + 1] >> 7;
722 int U = ubuf1[i] >> 7;
723 int V = vbuf1[i] >> 7;
725 output_pixels(i * 4, Y1, U, Y2, V);
728 for (i = 0; i < (dstW >> 1); i++) {
729 int Y1 = buf0[i * 2] >> 7;
730 int Y2 = buf0[i * 2 + 1] >> 7;
731 int U = (ubuf0[i] + ubuf1[i]) >> 8;
732 int V = (vbuf0[i] + vbuf1[i]) >> 8;
734 output_pixels(i * 4, Y1, U, Y2, V);
741 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
742 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
744 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
745 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
747 static av_always_inline void
748 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
749 const int16_t **lumSrc, int lumFilterSize,
750 const int16_t *chrFilter, const int16_t **chrUSrc,
751 const int16_t **chrVSrc, int chrFilterSize,
752 const int16_t **alpSrc, uint8_t *dest, int dstW,
753 int y, enum PixelFormat target)
757 for (i = 0; i < (dstW >> 1); i++) {
763 const uint8_t *r, *g, *b;
765 for (j = 0; j < lumFilterSize; j++) {
766 Y1 += lumSrc[j][i * 2] * lumFilter[j];
767 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
769 for (j = 0; j < chrFilterSize; j++) {
770 U += chrUSrc[j][i] * chrFilter[j];
771 V += chrVSrc[j][i] * chrFilter[j];
777 if ((Y1 | Y2 | U | V) & 0x100) {
778 Y1 = av_clip_uint8(Y1);
779 Y2 = av_clip_uint8(Y2);
780 U = av_clip_uint8(U);
781 V = av_clip_uint8(V);
784 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
785 r = (const uint8_t *) c->table_rV[V];
786 g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
787 b = (const uint8_t *) c->table_bU[U];
789 dest[ 0] = dest[ 1] = r_b[Y1];
790 dest[ 2] = dest[ 3] = g[Y1];
791 dest[ 4] = dest[ 5] = b_r[Y1];
792 dest[ 6] = dest[ 7] = r_b[Y2];
793 dest[ 8] = dest[ 9] = g[Y2];
794 dest[10] = dest[11] = b_r[Y2];
799 static av_always_inline void
800 yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
801 const uint16_t *buf1, const uint16_t *ubuf0,
802 const uint16_t *ubuf1, const uint16_t *vbuf0,
803 const uint16_t *vbuf1, const uint16_t *abuf0,
804 const uint16_t *abuf1, uint8_t *dest, int dstW,
805 int yalpha, int uvalpha, int y,
806 enum PixelFormat target)
808 int yalpha1 = 4095 - yalpha;
809 int uvalpha1 = 4095 - uvalpha;
812 for (i = 0; i < (dstW >> 1); i++) {
813 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
814 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
815 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
816 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
817 const uint8_t *r = (const uint8_t *) c->table_rV[V],
818 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
819 *b = (const uint8_t *) c->table_bU[U];
821 dest[ 0] = dest[ 1] = r_b[Y1];
822 dest[ 2] = dest[ 3] = g[Y1];
823 dest[ 4] = dest[ 5] = b_r[Y1];
824 dest[ 6] = dest[ 7] = r_b[Y2];
825 dest[ 8] = dest[ 9] = g[Y2];
826 dest[10] = dest[11] = b_r[Y2];
831 static av_always_inline void
832 yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
833 const uint16_t *ubuf0, const uint16_t *ubuf1,
834 const uint16_t *vbuf0, const uint16_t *vbuf1,
835 const uint16_t *abuf0, uint8_t *dest, int dstW,
836 int uvalpha, enum PixelFormat dstFormat,
837 int flags, int y, enum PixelFormat target)
841 if (uvalpha < 2048) {
842 for (i = 0; i < (dstW >> 1); i++) {
843 int Y1 = buf0[i * 2] >> 7;
844 int Y2 = buf0[i * 2 + 1] >> 7;
845 int U = ubuf1[i] >> 7;
846 int V = vbuf1[i] >> 7;
847 const uint8_t *r = (const uint8_t *) c->table_rV[V],
848 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
849 *b = (const uint8_t *) c->table_bU[U];
851 dest[ 0] = dest[ 1] = r_b[Y1];
852 dest[ 2] = dest[ 3] = g[Y1];
853 dest[ 4] = dest[ 5] = b_r[Y1];
854 dest[ 6] = dest[ 7] = r_b[Y2];
855 dest[ 8] = dest[ 9] = g[Y2];
856 dest[10] = dest[11] = b_r[Y2];
860 for (i = 0; i < (dstW >> 1); i++) {
861 int Y1 = buf0[i * 2] >> 7;
862 int Y2 = buf0[i * 2 + 1] >> 7;
863 int U = (ubuf0[i] + ubuf1[i]) >> 8;
864 int V = (vbuf0[i] + vbuf1[i]) >> 8;
865 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
866 const uint8_t *r = (const uint8_t *) c->table_rV[V],
867 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
868 *b = (const uint8_t *) c->table_bU[U];
870 dest[ 0] = dest[ 1] = r_b[Y1];
871 dest[ 2] = dest[ 3] = g[Y1];
872 dest[ 4] = dest[ 5] = b_r[Y1];
873 dest[ 6] = dest[ 7] = r_b[Y2];
874 dest[ 8] = dest[ 9] = g[Y2];
875 dest[10] = dest[11] = b_r[Y2];
884 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
885 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
886 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
887 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
889 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
890 for (i=0; i<(dstW>>1); i++) {\
896 int av_unused A1, A2;\
897 type av_unused *r, *b, *g;\
900 for (j=0; j<lumFilterSize; j++) {\
901 Y1 += lumSrc[j][i2] * lumFilter[j];\
902 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
904 for (j=0; j<chrFilterSize; j++) {\
905 U += chrUSrc[j][i] * chrFilter[j];\
906 V += chrVSrc[j][i] * chrFilter[j];\
912 if ((Y1|Y2|U|V)&0x100) {\
913 Y1 = av_clip_uint8(Y1); \
914 Y2 = av_clip_uint8(Y2); \
915 U = av_clip_uint8(U); \
916 V = av_clip_uint8(V); \
921 for (j=0; j<lumFilterSize; j++) {\
922 A1 += alpSrc[j][i2 ] * lumFilter[j];\
923 A2 += alpSrc[j][i2+1] * lumFilter[j];\
927 if ((A1|A2)&0x100) {\
928 A1 = av_clip_uint8(A1); \
929 A2 = av_clip_uint8(A2); \
932 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
933 r = (type *)c->table_rV[V]; \
934 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
935 b = (type *)c->table_bU[U];
937 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
938 for (i=0; i<dstW; i++) {\
946 for (j=0; j<lumFilterSize; j++) {\
947 Y += lumSrc[j][i ] * lumFilter[j];\
949 for (j=0; j<chrFilterSize; j++) {\
950 U += chrUSrc[j][i] * chrFilter[j];\
951 V += chrVSrc[j][i] * chrFilter[j];\
958 for (j=0; j<lumFilterSize; j++)\
959 A += alpSrc[j][i ] * lumFilter[j];\
962 A = av_clip_uint8(A);\
964 Y-= c->yuv2rgb_y_offset;\
965 Y*= c->yuv2rgb_y_coeff;\
967 R= Y + V*c->yuv2rgb_v2r_coeff;\
968 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
969 B= Y + U*c->yuv2rgb_u2b_coeff;\
970 if ((R|G|B)&(0xC0000000)) {\
971 R = av_clip_uintp2(R, 30); \
972 G = av_clip_uintp2(G, 30); \
973 B = av_clip_uintp2(B, 30); \
976 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
977 for (i=0; i<(dstW>>1); i++) { \
979 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
980 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
981 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
982 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
983 type av_unused *r, *b, *g; \
984 int av_unused A1, A2; \
986 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
987 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
989 r = (type *)c->table_rV[V];\
990 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
991 b = (type *)c->table_bU[U];
993 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
994 for (i=0; i<(dstW>>1); i++) {\
996 int Y1= buf0[i2 ]>>7;\
997 int Y2= buf0[i2+1]>>7;\
998 int U= (ubuf1[i])>>7;\
999 int V= (vbuf1[i])>>7;\
1000 type av_unused *r, *b, *g;\
1001 int av_unused A1, A2;\
1004 A2= abuf0[i2+1]>>7;\
1006 r = (type *)c->table_rV[V];\
1007 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1008 b = (type *)c->table_bU[U];
1010 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
1011 for (i=0; i<(dstW>>1); i++) {\
1013 int Y1= buf0[i2 ]>>7;\
1014 int Y2= buf0[i2+1]>>7;\
1015 int U= (ubuf0[i] + ubuf1[i])>>8;\
1016 int V= (vbuf0[i] + vbuf1[i])>>8;\
1017 type av_unused *r, *b, *g;\
1018 int av_unused A1, A2;\
1021 A2= abuf0[i2+1]>>7;\
1023 r = (type *)c->table_rV[V];\
1024 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1025 b = (type *)c->table_bU[U];
1027 #define YSCALE_YUV_2_ANYRGB_C(func)\
1028 switch(c->dstFormat) {\
1031 if (CONFIG_SMALL) {\
1032 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
1033 func(uint32_t,needAlpha)\
1034 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
1035 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
1038 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
1040 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
1041 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
1045 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
1046 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
1053 if (CONFIG_SMALL) {\
1054 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
1055 func(uint32_t,needAlpha)\
1056 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
1057 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
1060 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
1062 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
1063 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
1067 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
1068 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
1073 case PIX_FMT_RGB24:\
1075 ((uint8_t*)dest)[0]= r[Y1];\
1076 ((uint8_t*)dest)[1]= g[Y1];\
1077 ((uint8_t*)dest)[2]= b[Y1];\
1078 ((uint8_t*)dest)[3]= r[Y2];\
1079 ((uint8_t*)dest)[4]= g[Y2];\
1080 ((uint8_t*)dest)[5]= b[Y2];\
1084 case PIX_FMT_BGR24:\
1086 ((uint8_t*)dest)[0]= b[Y1];\
1087 ((uint8_t*)dest)[1]= g[Y1];\
1088 ((uint8_t*)dest)[2]= r[Y1];\
1089 ((uint8_t*)dest)[3]= b[Y2];\
1090 ((uint8_t*)dest)[4]= g[Y2];\
1091 ((uint8_t*)dest)[5]= r[Y2];\
1095 case PIX_FMT_RGB565:\
1096 case PIX_FMT_BGR565:\
1098 const int dr1= dither_2x2_8[y&1 ][0];\
1099 const int dg1= dither_2x2_4[y&1 ][0];\
1100 const int db1= dither_2x2_8[(y&1)^1][0];\
1101 const int dr2= dither_2x2_8[y&1 ][1];\
1102 const int dg2= dither_2x2_4[y&1 ][1];\
1103 const int db2= dither_2x2_8[(y&1)^1][1];\
1105 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1106 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1110 case PIX_FMT_RGB555:\
1111 case PIX_FMT_BGR555:\
1113 const int dr1= dither_2x2_8[y&1 ][0];\
1114 const int dg1= dither_2x2_8[y&1 ][1];\
1115 const int db1= dither_2x2_8[(y&1)^1][0];\
1116 const int dr2= dither_2x2_8[y&1 ][1];\
1117 const int dg2= dither_2x2_8[y&1 ][0];\
1118 const int db2= dither_2x2_8[(y&1)^1][1];\
1120 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1121 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1125 case PIX_FMT_RGB444:\
1126 case PIX_FMT_BGR444:\
1128 const int dr1= dither_4x4_16[y&3 ][0];\
1129 const int dg1= dither_4x4_16[y&3 ][1];\
1130 const int db1= dither_4x4_16[(y&3)^3][0];\
1131 const int dr2= dither_4x4_16[y&3 ][1];\
1132 const int dg2= dither_4x4_16[y&3 ][0];\
1133 const int db2= dither_4x4_16[(y&3)^3][1];\
1135 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1136 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1143 const uint8_t * const d64= dither_8x8_73[y&7];\
1144 const uint8_t * const d32= dither_8x8_32[y&7];\
1146 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
1147 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
1154 const uint8_t * const d64= dither_8x8_73 [y&7];\
1155 const uint8_t * const d128=dither_8x8_220[y&7];\
1157 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
1158 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
1162 case PIX_FMT_RGB4_BYTE:\
1163 case PIX_FMT_BGR4_BYTE:\
1165 const uint8_t * const d64= dither_8x8_73 [y&7];\
1166 const uint8_t * const d128=dither_8x8_220[y&7];\
1168 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
1169 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
1175 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
1176 const int16_t **lumSrc, int lumFilterSize,
1177 const int16_t *chrFilter, const int16_t **chrUSrc,
1178 const int16_t **chrVSrc, int chrFilterSize,
1179 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1182 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
1185 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
1186 const int16_t **lumSrc, int lumFilterSize,
1187 const int16_t *chrFilter, const int16_t **chrUSrc,
1188 const int16_t **chrVSrc, int chrFilterSize,
1189 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1192 int step= c->dstFormatBpp/8;
1195 switch(c->dstFormat) {
1203 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1204 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1205 dest[aidx]= needAlpha ? A : 255;
1212 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1213 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1221 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1238 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1239 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1240 dest[aidx]= needAlpha ? A : 255;
1247 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1248 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1256 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1272 * vertical bilinear scale YV12 to RGB
1274 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1275 const uint16_t *buf1, const uint16_t *ubuf0,
1276 const uint16_t *ubuf1, const uint16_t *vbuf0,
1277 const uint16_t *vbuf1, const uint16_t *abuf0,
1278 const uint16_t *abuf1, uint8_t *dest, int dstW,
1279 int yalpha, int uvalpha, int y)
1281 int yalpha1=4095- yalpha;
1282 int uvalpha1=4095-uvalpha;
1285 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
1289 * YV12 to RGB without scaling or interpolating
1291 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1292 const uint16_t *ubuf0, const uint16_t *ubuf1,
1293 const uint16_t *vbuf0, const uint16_t *vbuf1,
1294 const uint16_t *abuf0, uint8_t *dest, int dstW,
1295 int uvalpha, enum PixelFormat dstFormat,
1300 if (uvalpha < 2048) {
1301 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
1303 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
1307 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1308 int width, int height,
1312 uint8_t *ptr = plane + stride*y;
1313 for (i=0; i<height; i++) {
1314 memset(ptr, val, width);
1319 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1321 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1322 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1324 static av_always_inline void
1325 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
1326 enum PixelFormat origin)
1329 for (i = 0; i < width; i++) {
1330 int r_b = input_pixel(&src[i*6+0]) >> 8;
1331 int g = input_pixel(&src[i*6+2]) >> 8;
1332 int b_r = input_pixel(&src[i*6+4]) >> 8;
1334 dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1338 static av_always_inline void
1339 rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1340 const uint8_t *src1, const uint8_t *src2,
1341 int width, enum PixelFormat origin)
1345 for (i = 0; i < width; i++) {
1346 int r_b = input_pixel(&src1[i*6+0]) >> 8;
1347 int g = input_pixel(&src1[i*6+2]) >> 8;
1348 int b_r = input_pixel(&src1[i*6+4]) >> 8;
1350 dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1351 dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1355 static av_always_inline void
1356 rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1357 const uint8_t *src1, const uint8_t *src2,
1358 int width, enum PixelFormat origin)
1362 for (i = 0; i < width; i++) {
1363 int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
1364 int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
1365 int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
1367 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1368 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1376 #define rgb48funcs(pattern, BE_LE, origin) \
1377 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
1378 int width, uint32_t *unused) \
1380 rgb48ToY_c_template(dst, src, width, origin); \
1383 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1384 const uint8_t *src1, const uint8_t *src2, \
1385 int width, uint32_t *unused) \
1387 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1390 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1391 const uint8_t *src1, const uint8_t *src2, \
1392 int width, uint32_t *unused) \
1394 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1397 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1398 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1399 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1400 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1402 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1403 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1404 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1406 static av_always_inline void
1407 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1408 int width, enum PixelFormat origin,
1409 int shr, int shg, int shb, int shp,
1410 int maskr, int maskg, int maskb,
1411 int rsh, int gsh, int bsh, int S)
1413 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1414 rnd = 33 << (S - 1);
1417 for (i = 0; i < width; i++) {
1418 int px = input_pixel(i) >> shp;
1419 int b = (px & maskb) >> shb;
1420 int g = (px & maskg) >> shg;
1421 int r = (px & maskr) >> shr;
1423 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1427 static av_always_inline void
1428 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1429 const uint8_t *src, int width,
1430 enum PixelFormat origin,
1431 int shr, int shg, int shb, int shp,
1432 int maskr, int maskg, int maskb,
1433 int rsh, int gsh, int bsh, int S)
1435 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1436 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1437 rnd = 257 << (S - 1);
1440 for (i = 0; i < width; i++) {
1441 int px = input_pixel(i) >> shp;
1442 int b = (px & maskb) >> shb;
1443 int g = (px & maskg) >> shg;
1444 int r = (px & maskr) >> shr;
1446 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1447 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1451 static av_always_inline void
1452 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1453 const uint8_t *src, int width,
1454 enum PixelFormat origin,
1455 int shr, int shg, int shb, int shp,
1456 int maskr, int maskg, int maskb,
1457 int rsh, int gsh, int bsh, int S)
1459 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1460 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1461 rnd = 257 << S, maskgx = ~(maskr | maskb);
1464 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1465 for (i = 0; i < width; i++) {
1466 int px0 = input_pixel(2 * i + 0) >> shp;
1467 int px1 = input_pixel(2 * i + 1) >> shp;
1468 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1469 int rb = px0 + px1 - g;
1471 b = (rb & maskb) >> shb;
1472 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1473 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1476 g = (g & maskg) >> shg;
1478 r = (rb & maskr) >> shr;
1480 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1481 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1487 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1488 maskg, maskb, rsh, gsh, bsh, S) \
1489 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1490 int width, uint32_t *unused) \
1492 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1493 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1496 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1497 const uint8_t *src, const uint8_t *dummy, \
1498 int width, uint32_t *unused) \
1500 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1501 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1504 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1505 const uint8_t *src, const uint8_t *dummy, \
1506 int width, uint32_t *unused) \
1508 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1509 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1512 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1513 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1514 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1515 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1516 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1517 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1518 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1519 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1520 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1521 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1522 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1523 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1525 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1528 for (i=0; i<width; i++) {
1533 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1536 for (i=0; i<width; i++) {
1541 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1544 for (i=0; i<width; i++) {
1547 dst[i]= pal[d] & 0xFF;
1551 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1552 const uint8_t *src1, const uint8_t *src2,
1553 int width, uint32_t *pal)
1556 assert(src1 == src2);
1557 for (i=0; i<width; i++) {
1558 int p= pal[src1[i]];
1565 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1566 int width, uint32_t *unused)
1569 for (i=0; i<width/8; i++) {
1572 dst[8*i+j]= ((d>>(7-j))&1)*255;
1576 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1577 int width, uint32_t *unused)
1580 for (i=0; i<width/8; i++) {
1583 dst[8*i+j]= ((d>>(7-j))&1)*255;
1587 //FIXME yuy2* can read up to 7 samples too much
1589 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1593 for (i=0; i<width; i++)
1597 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1598 const uint8_t *src2, int width, uint32_t *unused)
1601 for (i=0; i<width; i++) {
1602 dstU[i]= src1[4*i + 1];
1603 dstV[i]= src1[4*i + 3];
1605 assert(src1 == src2);
1608 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1609 const uint8_t *src2, int width, uint32_t *unused)
1612 for (i=0; i<width; i++) {
1613 dstU[i]= src1[2*i + 1];
1614 dstV[i]= src2[2*i + 1];
1618 /* This is almost identical to the previous, end exists only because
1619 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1620 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1624 for (i=0; i<width; i++)
1628 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1629 const uint8_t *src2, int width, uint32_t *unused)
1632 for (i=0; i<width; i++) {
1633 dstU[i]= src1[4*i + 0];
1634 dstV[i]= src1[4*i + 2];
1636 assert(src1 == src2);
1639 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1640 const uint8_t *src2, int width, uint32_t *unused)
1643 for (i=0; i<width; i++) {
1649 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1650 const uint8_t *src, int width)
1653 for (i = 0; i < width; i++) {
1654 dst1[i] = src[2*i+0];
1655 dst2[i] = src[2*i+1];
1659 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1660 const uint8_t *src1, const uint8_t *src2,
1661 int width, uint32_t *unused)
1663 nvXXtoUV_c(dstU, dstV, src1, width);
1666 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1667 const uint8_t *src1, const uint8_t *src2,
1668 int width, uint32_t *unused)
1670 nvXXtoUV_c(dstV, dstU, src1, width);
1673 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1675 // FIXME Maybe dither instead.
1676 static av_always_inline void
1677 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1678 const uint8_t *_srcU, const uint8_t *_srcV,
1679 int width, enum PixelFormat origin, int depth)
1682 const uint16_t *srcU = (const uint16_t *) _srcU;
1683 const uint16_t *srcV = (const uint16_t *) _srcV;
1685 for (i = 0; i < width; i++) {
1686 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1687 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1691 static av_always_inline void
1692 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1693 int width, enum PixelFormat origin, int depth)
1696 const uint16_t *srcY = (const uint16_t*)_srcY;
1698 for (i = 0; i < width; i++)
1699 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1704 #define YUV_NBPS(depth, BE_LE, origin) \
1705 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1706 const uint8_t *srcU, const uint8_t *srcV, \
1707 int width, uint32_t *unused) \
1709 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1711 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1712 int width, uint32_t *unused) \
1714 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1717 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1718 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1719 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1720 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1722 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1723 int width, uint32_t *unused)
1726 for (i=0; i<width; i++) {
1731 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1735 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1736 const uint8_t *src2, int width, uint32_t *unused)
1739 for (i=0; i<width; i++) {
1740 int b= src1[3*i + 0];
1741 int g= src1[3*i + 1];
1742 int r= src1[3*i + 2];
1744 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1745 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1747 assert(src1 == src2);
1750 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1751 const uint8_t *src2, int width, uint32_t *unused)
1754 for (i=0; i<width; i++) {
1755 int b= src1[6*i + 0] + src1[6*i + 3];
1756 int g= src1[6*i + 1] + src1[6*i + 4];
1757 int r= src1[6*i + 2] + src1[6*i + 5];
1759 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1760 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1762 assert(src1 == src2);
1765 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1769 for (i=0; i<width; i++) {
1774 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1778 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1779 const uint8_t *src2, int width, uint32_t *unused)
1783 for (i=0; i<width; i++) {
1784 int r= src1[3*i + 0];
1785 int g= src1[3*i + 1];
1786 int b= src1[3*i + 2];
1788 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1789 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1793 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1794 const uint8_t *src2, int width, uint32_t *unused)
1798 for (i=0; i<width; i++) {
1799 int r= src1[6*i + 0] + src1[6*i + 3];
1800 int g= src1[6*i + 1] + src1[6*i + 4];
1801 int b= src1[6*i + 2] + src1[6*i + 5];
1803 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1804 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1809 // bilinear / bicubic scaling
1810 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1812 const int16_t *filter, const int16_t *filterPos,
1816 for (i=0; i<dstW; i++) {
1818 int srcPos= filterPos[i];
1820 for (j=0; j<filterSize; j++) {
1821 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1823 //filter += hFilterSize;
1824 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1829 //FIXME all pal and rgb srcFormats could do this convertion as well
1830 //FIXME all scalers more complex than bilinear could do half of this transform
1831 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1834 for (i = 0; i < width; i++) {
1835 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1836 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1839 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1842 for (i = 0; i < width; i++) {
1843 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1844 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1847 static void lumRangeToJpeg_c(int16_t *dst, int width)
1850 for (i = 0; i < width; i++)
1851 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1853 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1856 for (i = 0; i < width; i++)
1857 dst[i] = (dst[i]*14071 + 33561947)>>14;
1860 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1861 const uint8_t *src, int srcW, int xInc)
1864 unsigned int xpos=0;
1865 for (i=0;i<dstWidth;i++) {
1866 register unsigned int xx=xpos>>16;
1867 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1868 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1873 // *** horizontal scale Y line to temp buffer
1874 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
1875 const uint8_t *src, int srcW, int xInc,
1876 const int16_t *hLumFilter,
1877 const int16_t *hLumFilterPos, int hLumFilterSize,
1878 uint8_t *formatConvBuffer,
1879 uint32_t *pal, int isAlpha)
1881 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1882 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1885 toYV12(formatConvBuffer, src, srcW, pal);
1886 src= formatConvBuffer;
1889 if (!c->hyscale_fast) {
1890 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
1891 } else { // fast bilinear upscale / crap downscale
1892 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1896 convertRange(dst, dstWidth);
1899 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1900 int dstWidth, const uint8_t *src1,
1901 const uint8_t *src2, int srcW, int xInc)
1904 unsigned int xpos=0;
1905 for (i=0;i<dstWidth;i++) {
1906 register unsigned int xx=xpos>>16;
1907 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1908 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1909 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1914 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
1915 const uint8_t *src1, const uint8_t *src2,
1916 int srcW, int xInc, const int16_t *hChrFilter,
1917 const int16_t *hChrFilterPos, int hChrFilterSize,
1918 uint8_t *formatConvBuffer, uint32_t *pal)
1921 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
1922 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1923 src1= formatConvBuffer;
1927 if (!c->hcscale_fast) {
1928 c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1929 c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
1930 } else { // fast bilinear upscale / crap downscale
1931 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1934 if (c->chrConvertRange)
1935 c->chrConvertRange(dst1, dst2, dstWidth);
1938 static av_always_inline void
1939 find_c_packed_planar_out_funcs(SwsContext *c,
1940 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
1941 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1942 yuv2packedX_fn *yuv2packedX)
1944 enum PixelFormat dstFormat = c->dstFormat;
1946 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
1947 *yuv2yuvX = yuv2nv12X_c;
1948 } else if (is16BPS(dstFormat)) {
1949 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
1950 } else if (is9_OR_10BPS(dstFormat)) {
1951 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
1952 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
1954 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
1957 *yuv2yuv1 = yuv2yuv1_c;
1958 *yuv2yuvX = yuv2yuvX_c;
1960 if(c->flags & SWS_FULL_CHR_H_INT) {
1961 *yuv2packedX = yuv2rgbX_c_full;
1963 switch (dstFormat) {
1964 case PIX_FMT_GRAY16BE:
1965 *yuv2packed1 = yuv2gray16BE_1_c;
1966 *yuv2packed2 = yuv2gray16BE_2_c;
1967 *yuv2packedX = yuv2gray16BE_X_c;
1969 case PIX_FMT_GRAY16LE:
1970 *yuv2packed1 = yuv2gray16LE_1_c;
1971 *yuv2packed2 = yuv2gray16LE_2_c;
1972 *yuv2packedX = yuv2gray16LE_X_c;
1974 case PIX_FMT_MONOWHITE:
1975 *yuv2packed1 = yuv2monowhite_1_c;
1976 *yuv2packed2 = yuv2monowhite_2_c;
1977 *yuv2packedX = yuv2monowhite_X_c;
1979 case PIX_FMT_MONOBLACK:
1980 *yuv2packed1 = yuv2monoblack_1_c;
1981 *yuv2packed2 = yuv2monoblack_2_c;
1982 *yuv2packedX = yuv2monoblack_X_c;
1984 case PIX_FMT_YUYV422:
1985 *yuv2packed1 = yuv2yuyv422_1_c;
1986 *yuv2packed2 = yuv2yuyv422_2_c;
1987 *yuv2packedX = yuv2yuyv422_X_c;
1989 case PIX_FMT_UYVY422:
1990 *yuv2packed1 = yuv2uyvy422_1_c;
1991 *yuv2packed2 = yuv2uyvy422_2_c;
1992 *yuv2packedX = yuv2uyvy422_X_c;
1994 case PIX_FMT_RGB48LE:
1995 //*yuv2packed1 = yuv2rgb48le_1_c;
1996 //*yuv2packed2 = yuv2rgb48le_2_c;
1997 //*yuv2packedX = yuv2rgb48le_X_c;
1999 case PIX_FMT_RGB48BE:
2000 *yuv2packed1 = yuv2rgb48be_1_c;
2001 *yuv2packed2 = yuv2rgb48be_2_c;
2002 *yuv2packedX = yuv2rgb48be_X_c;
2004 case PIX_FMT_BGR48LE:
2005 //*yuv2packed1 = yuv2bgr48le_1_c;
2006 //*yuv2packed2 = yuv2bgr48le_2_c;
2007 //*yuv2packedX = yuv2bgr48le_X_c;
2009 case PIX_FMT_BGR48BE:
2010 *yuv2packed1 = yuv2bgr48be_1_c;
2011 *yuv2packed2 = yuv2bgr48be_2_c;
2012 *yuv2packedX = yuv2bgr48be_X_c;
2015 *yuv2packed1 = yuv2packed1_c;
2016 *yuv2packed2 = yuv2packed2_c;
2017 *yuv2packedX = yuv2packedX_c;
2023 #define DEBUG_SWSCALE_BUFFERS 0
2024 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2026 static int swScale(SwsContext *c, const uint8_t* src[],
2027 int srcStride[], int srcSliceY,
2028 int srcSliceH, uint8_t* dst[], int dstStride[])
2030 /* load a few things into local vars to make the code more readable? and faster */
2031 const int srcW= c->srcW;
2032 const int dstW= c->dstW;
2033 const int dstH= c->dstH;
2034 const int chrDstW= c->chrDstW;
2035 const int chrSrcW= c->chrSrcW;
2036 const int lumXInc= c->lumXInc;
2037 const int chrXInc= c->chrXInc;
2038 const enum PixelFormat dstFormat= c->dstFormat;
2039 const int flags= c->flags;
2040 int16_t *vLumFilterPos= c->vLumFilterPos;
2041 int16_t *vChrFilterPos= c->vChrFilterPos;
2042 int16_t *hLumFilterPos= c->hLumFilterPos;
2043 int16_t *hChrFilterPos= c->hChrFilterPos;
2044 int16_t *vLumFilter= c->vLumFilter;
2045 int16_t *vChrFilter= c->vChrFilter;
2046 int16_t *hLumFilter= c->hLumFilter;
2047 int16_t *hChrFilter= c->hChrFilter;
2048 int32_t *lumMmxFilter= c->lumMmxFilter;
2049 int32_t *chrMmxFilter= c->chrMmxFilter;
2050 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2051 const int vLumFilterSize= c->vLumFilterSize;
2052 const int vChrFilterSize= c->vChrFilterSize;
2053 const int hLumFilterSize= c->hLumFilterSize;
2054 const int hChrFilterSize= c->hChrFilterSize;
2055 int16_t **lumPixBuf= c->lumPixBuf;
2056 int16_t **chrUPixBuf= c->chrUPixBuf;
2057 int16_t **chrVPixBuf= c->chrVPixBuf;
2058 int16_t **alpPixBuf= c->alpPixBuf;
2059 const int vLumBufSize= c->vLumBufSize;
2060 const int vChrBufSize= c->vChrBufSize;
2061 uint8_t *formatConvBuffer= c->formatConvBuffer;
2062 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2063 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2065 uint32_t *pal=c->pal_yuv;
2066 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2067 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2068 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2069 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2070 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2072 /* vars which will change and which we need to store back in the context */
2074 int lumBufIndex= c->lumBufIndex;
2075 int chrBufIndex= c->chrBufIndex;
2076 int lastInLumBuf= c->lastInLumBuf;
2077 int lastInChrBuf= c->lastInChrBuf;
2079 if (isPacked(c->srcFormat)) {
2087 srcStride[3]= srcStride[0];
2089 srcStride[1]<<= c->vChrDrop;
2090 srcStride[2]<<= c->vChrDrop;
2092 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2093 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2094 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2095 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2096 srcSliceY, srcSliceH, dstY, dstH);
2097 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2098 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2100 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2101 static int warnedAlready=0; //FIXME move this into the context perhaps
2102 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2103 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2104 " ->cannot do aligned memory accesses anymore\n");
2109 /* Note the user might start scaling the picture in the middle so this
2110 will not get executed. This is not really intended but works
2111 currently, so people might do it. */
2112 if (srcSliceY ==0) {
2122 for (;dstY < dstH; dstY++) {
2123 unsigned char *dest =dst[0]+dstStride[0]*dstY;
2124 const int chrDstY= dstY>>c->chrDstVSubSample;
2125 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2126 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
2127 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2129 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2130 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2131 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2132 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2133 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2134 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2137 //handle holes (FAST_BILINEAR & weird filters)
2138 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2139 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2140 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2141 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2143 DEBUG_BUFFERS("dstY: %d\n", dstY);
2144 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2145 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2146 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2147 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2149 // Do we have enough lines in this slice to output the dstY line
2150 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2152 if (!enough_lines) {
2153 lastLumSrcY = srcSliceY + srcSliceH - 1;
2154 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2155 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2156 lastLumSrcY, lastChrSrcY);
2159 //Do horizontal scaling
2160 while(lastInLumBuf < lastLumSrcY) {
2161 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2162 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2164 assert(lumBufIndex < 2*vLumBufSize);
2165 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2166 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2167 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2168 hLumFilter, hLumFilterPos, hLumFilterSize,
2171 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2172 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2173 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2177 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2178 lumBufIndex, lastInLumBuf);
2180 while(lastInChrBuf < lastChrSrcY) {
2181 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2182 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2184 assert(chrBufIndex < 2*vChrBufSize);
2185 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2186 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2187 //FIXME replace parameters through context struct (some at least)
2189 if (c->needs_hcscale)
2190 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2191 chrDstW, src1, src2, chrSrcW, chrXInc,
2192 hChrFilter, hChrFilterPos, hChrFilterSize,
2193 formatConvBuffer, pal);
2195 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2196 chrBufIndex, lastInChrBuf);
2198 //wrap buf index around to stay inside the ring buffer
2199 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2200 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2202 break; //we can't output a dstY line so let's try with the next slice
2205 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2207 if (dstY >= dstH-2) {
2208 // hmm looks like we can't use MMX here without overwriting this array's tail
2209 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2210 &yuv2packed1, &yuv2packed2,
2215 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2216 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2217 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2218 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2219 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2220 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2221 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
2222 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2223 const int16_t *lumBuf = lumSrcPtr[0];
2224 const int16_t *chrUBuf= chrUSrcPtr[0];
2225 const int16_t *chrVBuf= chrVSrcPtr[0];
2226 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2227 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
2228 uDest, vDest, aDest, dstW, chrDstW);
2229 } else { //General YV12
2231 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2232 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
2233 chrVSrcPtr, vChrFilterSize,
2234 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2237 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2238 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2239 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2240 int chrAlpha= vChrFilter[2*dstY+1];
2241 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
2242 *chrVSrcPtr, *(chrVSrcPtr+1),
2243 alpPixBuf ? *alpSrcPtr : NULL,
2244 dest, dstW, chrAlpha, dstFormat, flags, dstY);
2245 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2246 int lumAlpha= vLumFilter[2*dstY+1];
2247 int chrAlpha= vChrFilter[2*dstY+1];
2249 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2251 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
2252 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
2253 *chrVSrcPtr, *(chrVSrcPtr+1),
2254 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2255 dest, dstW, lumAlpha, chrAlpha, dstY);
2256 } else { //general RGB
2258 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2259 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2260 alpSrcPtr, dest, dstW, dstY);
2266 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2267 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2270 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2271 __asm__ volatile("sfence":::"memory");
2275 /* store changed local vars back in the context */
2277 c->lumBufIndex= lumBufIndex;
2278 c->chrBufIndex= chrBufIndex;
2279 c->lastInLumBuf= lastInLumBuf;
2280 c->lastInChrBuf= lastInChrBuf;
2282 return dstY - lastDstY;
2285 static av_cold void sws_init_swScale_c(SwsContext *c)
2287 enum PixelFormat srcFormat = c->srcFormat;
2289 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2290 &c->yuv2packed1, &c->yuv2packed2,
2293 c->hScale = hScale_c;
2295 if (c->flags & SWS_FAST_BILINEAR) {
2296 c->hyscale_fast = hyscale_fast_c;
2297 c->hcscale_fast = hcscale_fast_c;
2300 c->chrToYV12 = NULL;
2302 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2303 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2304 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2305 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2309 case PIX_FMT_BGR4_BYTE:
2310 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2311 case PIX_FMT_YUV444P9BE:
2312 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
2313 case PIX_FMT_YUV444P9LE:
2314 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
2315 case PIX_FMT_YUV444P10BE:
2316 case PIX_FMT_YUV422P10BE:
2317 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
2318 case PIX_FMT_YUV422P10LE:
2319 case PIX_FMT_YUV444P10LE:
2320 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
2321 case PIX_FMT_YUV420P16BE:
2322 case PIX_FMT_YUV422P16BE:
2323 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
2324 case PIX_FMT_YUV420P16LE:
2325 case PIX_FMT_YUV422P16LE:
2326 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
2328 if (c->chrSrcHSubSample) {
2330 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2331 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2332 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2333 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2334 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2335 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2336 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2337 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2338 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2339 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2340 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2341 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2342 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2343 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2344 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2345 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2346 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2347 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2351 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2352 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2353 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2354 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2355 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2356 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2357 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2358 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2359 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2360 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2361 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2362 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2363 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2364 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2365 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2366 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2367 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2368 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2372 c->lumToYV12 = NULL;
2373 c->alpToYV12 = NULL;
2374 switch (srcFormat) {
2375 case PIX_FMT_YUV444P9BE:
2376 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
2377 case PIX_FMT_YUV444P9LE:
2378 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
2379 case PIX_FMT_YUV444P10BE:
2380 case PIX_FMT_YUV422P10BE:
2381 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
2382 case PIX_FMT_YUV444P10LE:
2383 case PIX_FMT_YUV422P10LE:
2384 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
2385 case PIX_FMT_YUYV422 :
2386 case PIX_FMT_YUV420P16BE:
2387 case PIX_FMT_YUV422P16BE:
2388 case PIX_FMT_YUV444P16BE:
2389 case PIX_FMT_Y400A :
2390 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
2391 case PIX_FMT_UYVY422 :
2392 case PIX_FMT_YUV420P16LE:
2393 case PIX_FMT_YUV422P16LE:
2394 case PIX_FMT_YUV444P16LE:
2395 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
2396 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2397 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2398 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2399 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2400 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2401 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2402 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2403 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2404 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2405 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2409 case PIX_FMT_BGR4_BYTE:
2410 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2411 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2412 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2413 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2414 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2415 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2416 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2417 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2418 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2419 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2420 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2423 switch (srcFormat) {
2425 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2427 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2428 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2432 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2434 c->lumConvertRange = lumRangeFromJpeg_c;
2435 c->chrConvertRange = chrRangeFromJpeg_c;
2437 c->lumConvertRange = lumRangeToJpeg_c;
2438 c->chrConvertRange = chrRangeToJpeg_c;
2442 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2443 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2444 c->needs_hcscale = 1;
2447 SwsFunc ff_getSwsFunc(SwsContext *c)
2449 sws_init_swScale_c(c);
2452 ff_sws_init_swScale_mmx(c);
2454 ff_sws_init_swScale_altivec(c);