2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
186 static av_always_inline void
187 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
188 int lumFilterSize, const int16_t *chrFilter,
189 const int32_t **chrUSrc, const int32_t **chrVSrc,
190 int chrFilterSize, const int32_t **alpSrc,
191 uint16_t *dest[4], int dstW, int chrDstW,
192 int big_endian, int output_bits)
194 //FIXME Optimize (just quickly written not optimized..)
196 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
197 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
198 int shift = 15 + 16 - output_bits;
200 #define output_pixel(pos, val) \
202 if (output_bits == 16) { \
203 AV_WB16(pos, av_clip_uint16(val >> shift)); \
205 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
208 if (output_bits == 16) { \
209 AV_WL16(pos, av_clip_uint16(val >> shift)); \
211 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
214 for (i = 0; i < dstW; i++) {
215 int val = 1 << (30-output_bits);
218 for (j = 0; j < lumFilterSize; j++)
219 val += lumSrc[j][i] * lumFilter[j];
221 output_pixel(&yDest[i], val);
225 for (i = 0; i < chrDstW; i++) {
226 int u = 1 << (30-output_bits);
227 int v = 1 << (30-output_bits);
230 for (j = 0; j < chrFilterSize; j++) {
231 u += chrUSrc[j][i] * chrFilter[j];
232 v += chrVSrc[j][i] * chrFilter[j];
235 output_pixel(&uDest[i], u);
236 output_pixel(&vDest[i], v);
240 if (CONFIG_SWSCALE_ALPHA && aDest) {
241 for (i = 0; i < dstW; i++) {
242 int val = 1 << (30-output_bits);
245 for (j = 0; j < lumFilterSize; j++)
246 val += alpSrc[j][i] * lumFilter[j];
248 output_pixel(&aDest[i], val);
254 #define yuv2NBPS(bits, BE_LE, is_be) \
255 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
256 const int16_t **_lumSrc, int lumFilterSize, \
257 const int16_t *chrFilter, const int16_t **_chrUSrc, \
258 const int16_t **_chrVSrc, \
259 int chrFilterSize, const int16_t **_alpSrc, \
260 uint8_t *_dest[4], int dstW, int chrDstW) \
262 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
263 **chrUSrc = (const int32_t **) _chrUSrc, \
264 **chrVSrc = (const int32_t **) _chrVSrc, \
265 **alpSrc = (const int32_t **) _alpSrc; \
266 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
267 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
268 alpSrc, (uint16_t **) _dest, \
269 dstW, chrDstW, is_be, bits); \
278 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
279 const int16_t **lumSrc, int lumFilterSize,
280 const int16_t *chrFilter, const int16_t **chrUSrc,
281 const int16_t **chrVSrc,
282 int chrFilterSize, const int16_t **alpSrc,
283 uint8_t *dest[4], int dstW, int chrDstW)
285 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
286 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
289 //FIXME Optimize (just quickly written not optimized..)
290 for (i=0; i<dstW; i++) {
293 for (j=0; j<lumFilterSize; j++)
294 val += lumSrc[j][i] * lumFilter[j];
296 yDest[i]= av_clip_uint8(val>>19);
300 for (i=0; i<chrDstW; i++) {
304 for (j=0; j<chrFilterSize; j++) {
305 u += chrUSrc[j][i] * chrFilter[j];
306 v += chrVSrc[j][i] * chrFilter[j];
309 uDest[i]= av_clip_uint8(u>>19);
310 vDest[i]= av_clip_uint8(v>>19);
313 if (CONFIG_SWSCALE_ALPHA && aDest)
314 for (i=0; i<dstW; i++) {
317 for (j=0; j<lumFilterSize; j++)
318 val += alpSrc[j][i] * lumFilter[j];
320 aDest[i]= av_clip_uint8(val>>19);
324 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
325 const int16_t *chrUSrc, const int16_t *chrVSrc,
326 const int16_t *alpSrc,
327 uint8_t *dest[4], int dstW, int chrDstW)
329 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
330 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
333 for (i=0; i<dstW; i++) {
334 int val= (lumSrc[i]+64)>>7;
335 yDest[i]= av_clip_uint8(val);
339 for (i=0; i<chrDstW; i++) {
340 int u=(chrUSrc[i]+64)>>7;
341 int v=(chrVSrc[i]+64)>>7;
342 uDest[i]= av_clip_uint8(u);
343 vDest[i]= av_clip_uint8(v);
346 if (CONFIG_SWSCALE_ALPHA && aDest)
347 for (i=0; i<dstW; i++) {
348 int val= (alpSrc[i]+64)>>7;
349 aDest[i]= av_clip_uint8(val);
353 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
354 const int16_t **lumSrc, int lumFilterSize,
355 const int16_t *chrFilter, const int16_t **chrUSrc,
356 const int16_t **chrVSrc, int chrFilterSize,
357 const int16_t **alpSrc, uint8_t *dest[4],
358 int dstW, int chrDstW)
360 uint8_t *yDest = dest[0], *uDest = dest[1];
361 enum PixelFormat dstFormat = c->dstFormat;
363 //FIXME Optimize (just quickly written not optimized..)
365 for (i=0; i<dstW; i++) {
368 for (j=0; j<lumFilterSize; j++)
369 val += lumSrc[j][i] * lumFilter[j];
371 yDest[i]= av_clip_uint8(val>>19);
377 if (dstFormat == PIX_FMT_NV12)
378 for (i=0; i<chrDstW; i++) {
382 for (j=0; j<chrFilterSize; j++) {
383 u += chrUSrc[j][i] * chrFilter[j];
384 v += chrVSrc[j][i] * chrFilter[j];
387 uDest[2*i]= av_clip_uint8(u>>19);
388 uDest[2*i+1]= av_clip_uint8(v>>19);
391 for (i=0; i<chrDstW; i++) {
395 for (j=0; j<chrFilterSize; j++) {
396 u += chrUSrc[j][i] * chrFilter[j];
397 v += chrVSrc[j][i] * chrFilter[j];
400 uDest[2*i]= av_clip_uint8(v>>19);
401 uDest[2*i+1]= av_clip_uint8(u>>19);
405 #define output_pixel(pos, val) \
406 if (target == PIX_FMT_GRAY16BE) { \
412 static av_always_inline void
413 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
414 const int32_t **lumSrc, int lumFilterSize,
415 const int16_t *chrFilter, const int32_t **chrUSrc,
416 const int32_t **chrVSrc, int chrFilterSize,
417 const int32_t **alpSrc, uint16_t *dest, int dstW,
418 int y, enum PixelFormat target)
422 for (i = 0; i < (dstW >> 1); i++) {
427 for (j = 0; j < lumFilterSize; j++) {
428 Y1 += lumSrc[j][i * 2] * lumFilter[j];
429 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
433 if ((Y1 | Y2) & 0x10000) {
434 Y1 = av_clip_uint16(Y1);
435 Y2 = av_clip_uint16(Y2);
437 output_pixel(&dest[i * 2 + 0], Y1);
438 output_pixel(&dest[i * 2 + 1], Y2);
442 static av_always_inline void
443 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
444 const int32_t *ubuf[2], const int32_t *vbuf[2],
445 const int32_t *abuf[2], uint16_t *dest, int dstW,
446 int yalpha, int uvalpha, int y,
447 enum PixelFormat target)
449 int yalpha1 = 4095 - yalpha;
451 const int32_t *buf0 = buf[0], *buf1 = buf[1];
453 for (i = 0; i < (dstW >> 1); i++) {
454 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
455 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
457 output_pixel(&dest[i * 2 + 0], Y1);
458 output_pixel(&dest[i * 2 + 1], Y2);
462 static av_always_inline void
463 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
464 const int32_t *ubuf[2], const int32_t *vbuf[2],
465 const int32_t *abuf0, uint16_t *dest, int dstW,
466 int uvalpha, int y, enum PixelFormat target)
470 for (i = 0; i < (dstW >> 1); i++) {
471 int Y1 = buf0[i * 2 ] << 1;
472 int Y2 = buf0[i * 2 + 1] << 1;
474 output_pixel(&dest[i * 2 + 0], Y1);
475 output_pixel(&dest[i * 2 + 1], Y2);
481 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
482 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
483 const int16_t **_lumSrc, int lumFilterSize, \
484 const int16_t *chrFilter, const int16_t **_chrUSrc, \
485 const int16_t **_chrVSrc, int chrFilterSize, \
486 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
489 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
490 **chrUSrc = (const int32_t **) _chrUSrc, \
491 **chrVSrc = (const int32_t **) _chrVSrc, \
492 **alpSrc = (const int32_t **) _alpSrc; \
493 uint16_t *dest = (uint16_t *) _dest; \
494 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
495 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
496 alpSrc, dest, dstW, y, fmt); \
499 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
500 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
501 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
502 int yalpha, int uvalpha, int y) \
504 const int32_t **buf = (const int32_t **) _buf, \
505 **ubuf = (const int32_t **) _ubuf, \
506 **vbuf = (const int32_t **) _vbuf, \
507 **abuf = (const int32_t **) _abuf; \
508 uint16_t *dest = (uint16_t *) _dest; \
509 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
510 dest, dstW, yalpha, uvalpha, y, fmt); \
513 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
514 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
515 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
516 int uvalpha, int y) \
518 const int32_t *buf0 = (const int32_t *) _buf0, \
519 **ubuf = (const int32_t **) _ubuf, \
520 **vbuf = (const int32_t **) _vbuf, \
521 *abuf0 = (const int32_t *) _abuf0; \
522 uint16_t *dest = (uint16_t *) _dest; \
523 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
524 dstW, uvalpha, y, fmt); \
527 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
528 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
530 #define output_pixel(pos, acc) \
531 if (target == PIX_FMT_MONOBLACK) { \
537 static av_always_inline void
538 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
539 const int16_t **lumSrc, int lumFilterSize,
540 const int16_t *chrFilter, const int16_t **chrUSrc,
541 const int16_t **chrVSrc, int chrFilterSize,
542 const int16_t **alpSrc, uint8_t *dest, int dstW,
543 int y, enum PixelFormat target)
545 const uint8_t * const d128=dither_8x8_220[y&7];
546 uint8_t *g = c->table_gU[128] + c->table_gV[128];
550 for (i = 0; i < dstW - 1; i += 2) {
555 for (j = 0; j < lumFilterSize; j++) {
556 Y1 += lumSrc[j][i] * lumFilter[j];
557 Y2 += lumSrc[j][i+1] * lumFilter[j];
561 if ((Y1 | Y2) & 0x100) {
562 Y1 = av_clip_uint8(Y1);
563 Y2 = av_clip_uint8(Y2);
565 acc += acc + g[Y1 + d128[(i + 0) & 7]];
566 acc += acc + g[Y2 + d128[(i + 1) & 7]];
568 output_pixel(*dest++, acc);
573 static av_always_inline void
574 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
575 const int16_t *ubuf[2], const int16_t *vbuf[2],
576 const int16_t *abuf[2], uint8_t *dest, int dstW,
577 int yalpha, int uvalpha, int y,
578 enum PixelFormat target)
580 const int16_t *buf0 = buf[0], *buf1 = buf[1];
581 const uint8_t * const d128 = dither_8x8_220[y & 7];
582 uint8_t *g = c->table_gU[128] + c->table_gV[128];
583 int yalpha1 = 4095 - yalpha;
586 for (i = 0; i < dstW - 7; i += 8) {
587 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
588 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
589 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
590 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
591 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
592 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
593 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
594 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
595 output_pixel(*dest++, acc);
599 static av_always_inline void
600 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
601 const int16_t *ubuf[2], const int16_t *vbuf[2],
602 const int16_t *abuf0, uint8_t *dest, int dstW,
603 int uvalpha, int y, enum PixelFormat target)
605 const uint8_t * const d128 = dither_8x8_220[y & 7];
606 uint8_t *g = c->table_gU[128] + c->table_gV[128];
609 for (i = 0; i < dstW - 7; i += 8) {
610 int acc = g[(buf0[i ] >> 7) + d128[0]];
611 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
612 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
613 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
614 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
615 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
616 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
617 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
618 output_pixel(*dest++, acc);
624 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
625 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
626 const int16_t **lumSrc, int lumFilterSize, \
627 const int16_t *chrFilter, const int16_t **chrUSrc, \
628 const int16_t **chrVSrc, int chrFilterSize, \
629 const int16_t **alpSrc, uint8_t *dest, int dstW, \
632 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
633 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
634 alpSrc, dest, dstW, y, fmt); \
637 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
638 const int16_t *ubuf[2], const int16_t *vbuf[2], \
639 const int16_t *abuf[2], uint8_t *dest, int dstW, \
640 int yalpha, int uvalpha, int y) \
642 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
643 dest, dstW, yalpha, uvalpha, y, fmt); \
646 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
647 const int16_t *ubuf[2], const int16_t *vbuf[2], \
648 const int16_t *abuf0, uint8_t *dest, int dstW, \
649 int uvalpha, int y) \
651 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
652 abuf0, dest, dstW, uvalpha, \
656 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
657 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
659 #define output_pixels(pos, Y1, U, Y2, V) \
660 if (target == PIX_FMT_YUYV422) { \
661 dest[pos + 0] = Y1; \
663 dest[pos + 2] = Y2; \
667 dest[pos + 1] = Y1; \
669 dest[pos + 3] = Y2; \
672 static av_always_inline void
673 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
674 const int16_t **lumSrc, int lumFilterSize,
675 const int16_t *chrFilter, const int16_t **chrUSrc,
676 const int16_t **chrVSrc, int chrFilterSize,
677 const int16_t **alpSrc, uint8_t *dest, int dstW,
678 int y, enum PixelFormat target)
682 for (i = 0; i < (dstW >> 1); i++) {
689 for (j = 0; j < lumFilterSize; j++) {
690 Y1 += lumSrc[j][i * 2] * lumFilter[j];
691 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
693 for (j = 0; j < chrFilterSize; j++) {
694 U += chrUSrc[j][i] * chrFilter[j];
695 V += chrVSrc[j][i] * chrFilter[j];
701 if ((Y1 | Y2 | U | V) & 0x100) {
702 Y1 = av_clip_uint8(Y1);
703 Y2 = av_clip_uint8(Y2);
704 U = av_clip_uint8(U);
705 V = av_clip_uint8(V);
707 output_pixels(4*i, Y1, U, Y2, V);
711 static av_always_inline void
712 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
713 const int16_t *ubuf[2], const int16_t *vbuf[2],
714 const int16_t *abuf[2], uint8_t *dest, int dstW,
715 int yalpha, int uvalpha, int y,
716 enum PixelFormat target)
718 const int16_t *buf0 = buf[0], *buf1 = buf[1],
719 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
720 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
721 int yalpha1 = 4095 - yalpha;
722 int uvalpha1 = 4095 - uvalpha;
725 for (i = 0; i < (dstW >> 1); i++) {
726 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
727 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
728 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
729 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
731 output_pixels(i * 4, Y1, U, Y2, V);
735 static av_always_inline void
736 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
737 const int16_t *ubuf[2], const int16_t *vbuf[2],
738 const int16_t *abuf0, uint8_t *dest, int dstW,
739 int uvalpha, int y, enum PixelFormat target)
741 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
742 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
745 if (uvalpha < 2048) {
746 for (i = 0; i < (dstW >> 1); i++) {
747 int Y1 = buf0[i * 2] >> 7;
748 int Y2 = buf0[i * 2 + 1] >> 7;
749 int U = ubuf1[i] >> 7;
750 int V = vbuf1[i] >> 7;
752 output_pixels(i * 4, Y1, U, Y2, V);
755 for (i = 0; i < (dstW >> 1); i++) {
756 int Y1 = buf0[i * 2] >> 7;
757 int Y2 = buf0[i * 2 + 1] >> 7;
758 int U = (ubuf0[i] + ubuf1[i]) >> 8;
759 int V = (vbuf0[i] + vbuf1[i]) >> 8;
761 output_pixels(i * 4, Y1, U, Y2, V);
768 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
769 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
771 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
772 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
773 #define output_pixel(pos, val) \
774 if (isBE(target)) { \
780 static av_always_inline void
781 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
782 const int32_t **lumSrc, int lumFilterSize,
783 const int16_t *chrFilter, const int32_t **chrUSrc,
784 const int32_t **chrVSrc, int chrFilterSize,
785 const int32_t **alpSrc, uint16_t *dest, int dstW,
786 int y, enum PixelFormat target)
790 for (i = 0; i < (dstW >> 1); i++) {
794 int U = -128 << 23; // 19
798 for (j = 0; j < lumFilterSize; j++) {
799 Y1 += lumSrc[j][i * 2] * lumFilter[j];
800 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
802 for (j = 0; j < chrFilterSize; j++) {
803 U += chrUSrc[j][i] * chrFilter[j];
804 V += chrVSrc[j][i] * chrFilter[j];
807 // 8bit: 12+15=27; 16-bit: 12+19=31
813 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
814 Y1 -= c->yuv2rgb_y_offset;
815 Y2 -= c->yuv2rgb_y_offset;
816 Y1 *= c->yuv2rgb_y_coeff;
817 Y2 *= c->yuv2rgb_y_coeff;
820 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
822 R = V * c->yuv2rgb_v2r_coeff;
823 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
824 B = U * c->yuv2rgb_u2b_coeff;
826 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
827 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
828 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
829 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
830 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
831 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
832 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
837 static av_always_inline void
838 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
839 const int32_t *ubuf[2], const int32_t *vbuf[2],
840 const int32_t *abuf[2], uint16_t *dest, int dstW,
841 int yalpha, int uvalpha, int y,
842 enum PixelFormat target)
844 const int32_t *buf0 = buf[0], *buf1 = buf[1],
845 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
846 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
847 int yalpha1 = 4095 - yalpha;
848 int uvalpha1 = 4095 - uvalpha;
851 for (i = 0; i < (dstW >> 1); i++) {
852 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
853 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
854 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
855 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
858 Y1 -= c->yuv2rgb_y_offset;
859 Y2 -= c->yuv2rgb_y_offset;
860 Y1 *= c->yuv2rgb_y_coeff;
861 Y2 *= c->yuv2rgb_y_coeff;
865 R = V * c->yuv2rgb_v2r_coeff;
866 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
867 B = U * c->yuv2rgb_u2b_coeff;
869 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
870 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
871 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
872 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
873 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
874 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
879 static av_always_inline void
880 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
881 const int32_t *ubuf[2], const int32_t *vbuf[2],
882 const int32_t *abuf0, uint16_t *dest, int dstW,
883 int uvalpha, int y, enum PixelFormat target)
885 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
886 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
889 if (uvalpha < 2048) {
890 for (i = 0; i < (dstW >> 1); i++) {
891 int Y1 = (buf0[i * 2] ) >> 2;
892 int Y2 = (buf0[i * 2 + 1]) >> 2;
893 int U = (ubuf0[i] + (-128 << 11)) >> 2;
894 int V = (vbuf0[i] + (-128 << 11)) >> 2;
897 Y1 -= c->yuv2rgb_y_offset;
898 Y2 -= c->yuv2rgb_y_offset;
899 Y1 *= c->yuv2rgb_y_coeff;
900 Y2 *= c->yuv2rgb_y_coeff;
904 R = V * c->yuv2rgb_v2r_coeff;
905 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
906 B = U * c->yuv2rgb_u2b_coeff;
908 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
909 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
910 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
911 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
912 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
913 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
917 for (i = 0; i < (dstW >> 1); i++) {
918 int Y1 = (buf0[i * 2] ) >> 2;
919 int Y2 = (buf0[i * 2 + 1]) >> 2;
920 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
921 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
924 Y1 -= c->yuv2rgb_y_offset;
925 Y2 -= c->yuv2rgb_y_offset;
926 Y1 *= c->yuv2rgb_y_coeff;
927 Y2 *= c->yuv2rgb_y_coeff;
931 R = V * c->yuv2rgb_v2r_coeff;
932 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
933 B = U * c->yuv2rgb_u2b_coeff;
935 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
936 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
937 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
938 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
939 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
940 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
950 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
951 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
952 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
953 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
955 static av_always_inline void
956 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
957 int U, int V, int A1, int A2,
958 const void *_r, const void *_g, const void *_b, int y,
959 enum PixelFormat target, int hasAlpha)
961 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
962 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
963 uint32_t *dest = (uint32_t *) _dest;
964 const uint32_t *r = (const uint32_t *) _r;
965 const uint32_t *g = (const uint32_t *) _g;
966 const uint32_t *b = (const uint32_t *) _b;
969 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
971 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
972 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
975 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
977 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
978 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
980 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
981 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
984 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
985 uint8_t *dest = (uint8_t *) _dest;
986 const uint8_t *r = (const uint8_t *) _r;
987 const uint8_t *g = (const uint8_t *) _g;
988 const uint8_t *b = (const uint8_t *) _b;
990 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
991 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
992 dest[i * 6 + 0] = r_b[Y1];
993 dest[i * 6 + 1] = g[Y1];
994 dest[i * 6 + 2] = b_r[Y1];
995 dest[i * 6 + 3] = r_b[Y2];
996 dest[i * 6 + 4] = g[Y2];
997 dest[i * 6 + 5] = b_r[Y2];
1000 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1001 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1002 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1003 uint16_t *dest = (uint16_t *) _dest;
1004 const uint16_t *r = (const uint16_t *) _r;
1005 const uint16_t *g = (const uint16_t *) _g;
1006 const uint16_t *b = (const uint16_t *) _b;
1007 int dr1, dg1, db1, dr2, dg2, db2;
1009 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1010 dr1 = dither_2x2_8[ y & 1 ][0];
1011 dg1 = dither_2x2_4[ y & 1 ][0];
1012 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1013 dr2 = dither_2x2_8[ y & 1 ][1];
1014 dg2 = dither_2x2_4[ y & 1 ][1];
1015 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1016 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1017 dr1 = dither_2x2_8[ y & 1 ][0];
1018 dg1 = dither_2x2_8[ y & 1 ][1];
1019 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1020 dr2 = dither_2x2_8[ y & 1 ][1];
1021 dg2 = dither_2x2_8[ y & 1 ][0];
1022 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1024 dr1 = dither_4x4_16[ y & 3 ][0];
1025 dg1 = dither_4x4_16[ y & 3 ][1];
1026 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1027 dr2 = dither_4x4_16[ y & 3 ][1];
1028 dg2 = dither_4x4_16[ y & 3 ][0];
1029 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1032 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1033 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1034 } else /* 8/4-bit */ {
1035 uint8_t *dest = (uint8_t *) _dest;
1036 const uint8_t *r = (const uint8_t *) _r;
1037 const uint8_t *g = (const uint8_t *) _g;
1038 const uint8_t *b = (const uint8_t *) _b;
1039 int dr1, dg1, db1, dr2, dg2, db2;
1041 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1042 const uint8_t * const d64 = dither_8x8_73[y & 7];
1043 const uint8_t * const d32 = dither_8x8_32[y & 7];
1044 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1045 db1 = d64[(i * 2 + 0) & 7];
1046 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1047 db2 = d64[(i * 2 + 1) & 7];
1049 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1050 const uint8_t * const d128 = dither_8x8_220[y & 7];
1051 dr1 = db1 = d128[(i * 2 + 0) & 7];
1052 dg1 = d64[(i * 2 + 0) & 7];
1053 dr2 = db2 = d128[(i * 2 + 1) & 7];
1054 dg2 = d64[(i * 2 + 1) & 7];
1057 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1058 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1059 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1061 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1062 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1067 static av_always_inline void
1068 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1069 const int16_t **lumSrc, int lumFilterSize,
1070 const int16_t *chrFilter, const int16_t **chrUSrc,
1071 const int16_t **chrVSrc, int chrFilterSize,
1072 const int16_t **alpSrc, uint8_t *dest, int dstW,
1073 int y, enum PixelFormat target, int hasAlpha)
1077 for (i = 0; i < (dstW >> 1); i++) {
1083 int av_unused A1, A2;
1084 const void *r, *g, *b;
1086 for (j = 0; j < lumFilterSize; j++) {
1087 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1088 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1090 for (j = 0; j < chrFilterSize; j++) {
1091 U += chrUSrc[j][i] * chrFilter[j];
1092 V += chrVSrc[j][i] * chrFilter[j];
1098 if ((Y1 | Y2 | U | V) & 0x100) {
1099 Y1 = av_clip_uint8(Y1);
1100 Y2 = av_clip_uint8(Y2);
1101 U = av_clip_uint8(U);
1102 V = av_clip_uint8(V);
1107 for (j = 0; j < lumFilterSize; j++) {
1108 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1109 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1113 if ((A1 | A2) & 0x100) {
1114 A1 = av_clip_uint8(A1);
1115 A2 = av_clip_uint8(A2);
1119 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1121 g = (c->table_gU[U] + c->table_gV[V]);
1124 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1125 r, g, b, y, target, hasAlpha);
1129 static av_always_inline void
1130 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1131 const int16_t *ubuf[2], const int16_t *vbuf[2],
1132 const int16_t *abuf[2], uint8_t *dest, int dstW,
1133 int yalpha, int uvalpha, int y,
1134 enum PixelFormat target, int hasAlpha)
1136 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1137 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1139 *abuf0 = hasAlpha ? abuf[0] : NULL,
1140 *abuf1 = hasAlpha ? abuf[1] : NULL;
1141 int yalpha1 = 4095 - yalpha;
1142 int uvalpha1 = 4095 - uvalpha;
1145 for (i = 0; i < (dstW >> 1); i++) {
1146 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1147 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1148 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1149 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1151 const void *r = c->table_rV[V],
1152 *g = (c->table_gU[U] + c->table_gV[V]),
1153 *b = c->table_bU[U];
1156 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1157 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1160 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1161 r, g, b, y, target, hasAlpha);
1165 static av_always_inline void
1166 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1167 const int16_t *ubuf[2], const int16_t *vbuf[2],
1168 const int16_t *abuf0, uint8_t *dest, int dstW,
1169 int uvalpha, int y, enum PixelFormat target,
1172 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1173 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1176 if (uvalpha < 2048) {
1177 for (i = 0; i < (dstW >> 1); i++) {
1178 int Y1 = buf0[i * 2] >> 7;
1179 int Y2 = buf0[i * 2 + 1] >> 7;
1180 int U = ubuf1[i] >> 7;
1181 int V = vbuf1[i] >> 7;
1183 const void *r = c->table_rV[V],
1184 *g = (c->table_gU[U] + c->table_gV[V]),
1185 *b = c->table_bU[U];
1188 A1 = abuf0[i * 2 ] >> 7;
1189 A2 = abuf0[i * 2 + 1] >> 7;
1192 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1193 r, g, b, y, target, hasAlpha);
1196 for (i = 0; i < (dstW >> 1); i++) {
1197 int Y1 = buf0[i * 2] >> 7;
1198 int Y2 = buf0[i * 2 + 1] >> 7;
1199 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1200 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1202 const void *r = c->table_rV[V],
1203 *g = (c->table_gU[U] + c->table_gV[V]),
1204 *b = c->table_bU[U];
1207 A1 = abuf0[i * 2 ] >> 7;
1208 A2 = abuf0[i * 2 + 1] >> 7;
1211 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1212 r, g, b, y, target, hasAlpha);
1217 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1218 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1219 const int16_t **lumSrc, int lumFilterSize, \
1220 const int16_t *chrFilter, const int16_t **chrUSrc, \
1221 const int16_t **chrVSrc, int chrFilterSize, \
1222 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1225 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1226 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1227 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1229 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1230 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1231 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1232 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1233 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1234 int yalpha, int uvalpha, int y) \
1236 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1237 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1240 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1241 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1242 const int16_t *abuf0, uint8_t *dest, int dstW, \
1243 int uvalpha, int y) \
1245 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1246 dstW, uvalpha, y, fmt, hasAlpha); \
1250 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1251 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1253 #if CONFIG_SWSCALE_ALPHA
1254 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1255 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1257 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1258 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1260 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1261 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1262 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1263 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1264 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1265 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1266 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1267 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1269 static av_always_inline void
1270 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1271 const int16_t **lumSrc, int lumFilterSize,
1272 const int16_t *chrFilter, const int16_t **chrUSrc,
1273 const int16_t **chrVSrc, int chrFilterSize,
1274 const int16_t **alpSrc, uint8_t *dest,
1275 int dstW, int y, enum PixelFormat target, int hasAlpha)
1278 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1280 for (i = 0; i < dstW; i++) {
1288 for (j = 0; j < lumFilterSize; j++) {
1289 Y += lumSrc[j][i] * lumFilter[j];
1291 for (j = 0; j < chrFilterSize; j++) {
1292 U += chrUSrc[j][i] * chrFilter[j];
1293 V += chrVSrc[j][i] * chrFilter[j];
1300 for (j = 0; j < lumFilterSize; j++) {
1301 A += alpSrc[j][i] * lumFilter[j];
1305 A = av_clip_uint8(A);
1307 Y -= c->yuv2rgb_y_offset;
1308 Y *= c->yuv2rgb_y_coeff;
1310 R = Y + V*c->yuv2rgb_v2r_coeff;
1311 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1312 B = Y + U*c->yuv2rgb_u2b_coeff;
1313 if ((R | G | B) & 0xC0000000) {
1314 R = av_clip_uintp2(R, 30);
1315 G = av_clip_uintp2(G, 30);
1316 B = av_clip_uintp2(B, 30);
1321 dest[0] = hasAlpha ? A : 255;
1335 dest[3] = hasAlpha ? A : 255;
1338 dest[0] = hasAlpha ? A : 255;
1353 dest[3] = hasAlpha ? A : 255;
1361 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1362 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1363 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1364 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1366 #if CONFIG_SWSCALE_ALPHA
1367 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1368 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1369 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1370 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1372 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1373 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1374 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1375 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1377 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1378 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1380 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1381 int width, int height,
1385 uint8_t *ptr = plane + stride*y;
1386 for (i=0; i<height; i++) {
1387 memset(ptr, val, width);
1392 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1394 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1395 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1397 static av_always_inline void
1398 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1399 enum PixelFormat origin)
1402 for (i = 0; i < width; i++) {
1403 unsigned int r_b = input_pixel(&src[i*3+0]);
1404 unsigned int g = input_pixel(&src[i*3+1]);
1405 unsigned int b_r = input_pixel(&src[i*3+2]);
1407 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1411 static av_always_inline void
1412 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1413 const uint16_t *src1, const uint16_t *src2,
1414 int width, enum PixelFormat origin)
1418 for (i = 0; i < width; i++) {
1419 int r_b = input_pixel(&src1[i*3+0]);
1420 int g = input_pixel(&src1[i*3+1]);
1421 int b_r = input_pixel(&src1[i*3+2]);
1423 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1424 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1428 static av_always_inline void
1429 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1430 const uint16_t *src1, const uint16_t *src2,
1431 int width, enum PixelFormat origin)
1435 for (i = 0; i < width; i++) {
1436 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1437 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1438 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1440 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1441 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1449 #define rgb48funcs(pattern, BE_LE, origin) \
1450 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1451 int width, uint32_t *unused) \
1453 const uint16_t *src = (const uint16_t *) _src; \
1454 uint16_t *dst = (uint16_t *) _dst; \
1455 rgb48ToY_c_template(dst, src, width, origin); \
1458 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1459 const uint8_t *_src1, const uint8_t *_src2, \
1460 int width, uint32_t *unused) \
1462 const uint16_t *src1 = (const uint16_t *) _src1, \
1463 *src2 = (const uint16_t *) _src2; \
1464 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1465 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1468 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1469 const uint8_t *_src1, const uint8_t *_src2, \
1470 int width, uint32_t *unused) \
1472 const uint16_t *src1 = (const uint16_t *) _src1, \
1473 *src2 = (const uint16_t *) _src2; \
1474 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1475 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1478 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1479 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1480 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1481 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1483 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1484 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1485 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1487 static av_always_inline void
1488 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1489 int width, enum PixelFormat origin,
1490 int shr, int shg, int shb, int shp,
1491 int maskr, int maskg, int maskb,
1492 int rsh, int gsh, int bsh, int S)
1494 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1495 rnd = 33 << (S - 1);
1498 for (i = 0; i < width; i++) {
1499 int px = input_pixel(i) >> shp;
1500 int b = (px & maskb) >> shb;
1501 int g = (px & maskg) >> shg;
1502 int r = (px & maskr) >> shr;
1504 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1508 static av_always_inline void
1509 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1510 const uint8_t *src, int width,
1511 enum PixelFormat origin,
1512 int shr, int shg, int shb, int shp,
1513 int maskr, int maskg, int maskb,
1514 int rsh, int gsh, int bsh, int S)
1516 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1517 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1518 rnd = 257 << (S - 1);
1521 for (i = 0; i < width; i++) {
1522 int px = input_pixel(i) >> shp;
1523 int b = (px & maskb) >> shb;
1524 int g = (px & maskg) >> shg;
1525 int r = (px & maskr) >> shr;
1527 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1528 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1532 static av_always_inline void
1533 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1534 const uint8_t *src, int width,
1535 enum PixelFormat origin,
1536 int shr, int shg, int shb, int shp,
1537 int maskr, int maskg, int maskb,
1538 int rsh, int gsh, int bsh, int S)
1540 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1541 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1542 rnd = 257 << S, maskgx = ~(maskr | maskb);
1545 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1546 for (i = 0; i < width; i++) {
1547 int px0 = input_pixel(2 * i + 0) >> shp;
1548 int px1 = input_pixel(2 * i + 1) >> shp;
1549 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1550 int rb = px0 + px1 - g;
1552 b = (rb & maskb) >> shb;
1553 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1554 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1557 g = (g & maskg) >> shg;
1559 r = (rb & maskr) >> shr;
1561 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1562 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1568 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1569 maskg, maskb, rsh, gsh, bsh, S) \
1570 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1571 int width, uint32_t *unused) \
1573 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1574 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1577 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1578 const uint8_t *src, const uint8_t *dummy, \
1579 int width, uint32_t *unused) \
1581 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1582 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1585 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1586 const uint8_t *src, const uint8_t *dummy, \
1587 int width, uint32_t *unused) \
1589 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1590 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1593 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1594 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1595 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1596 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1597 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1598 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1599 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1600 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1601 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1602 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1603 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1604 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1606 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1609 for (i=0; i<width; i++) {
1614 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1617 for (i=0; i<width; i++) {
1622 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1625 for (i=0; i<width; i++) {
1628 dst[i]= pal[d] & 0xFF;
1632 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1633 const uint8_t *src1, const uint8_t *src2,
1634 int width, uint32_t *pal)
1637 assert(src1 == src2);
1638 for (i=0; i<width; i++) {
1639 int p= pal[src1[i]];
1646 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1647 int width, uint32_t *unused)
1650 for (i=0; i<width/8; i++) {
1653 dst[8*i+j]= ((d>>(7-j))&1)*255;
1657 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1658 int width, uint32_t *unused)
1661 for (i=0; i<width/8; i++) {
1664 dst[8*i+j]= ((d>>(7-j))&1)*255;
1668 //FIXME yuy2* can read up to 7 samples too much
1670 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1674 for (i=0; i<width; i++)
1678 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1679 const uint8_t *src2, int width, uint32_t *unused)
1682 for (i=0; i<width; i++) {
1683 dstU[i]= src1[4*i + 1];
1684 dstV[i]= src1[4*i + 3];
1686 assert(src1 == src2);
1689 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1692 const uint16_t *src = (const uint16_t *) _src;
1693 uint16_t *dst = (uint16_t *) _dst;
1694 for (i=0; i<width; i++) {
1695 dst[i] = av_bswap16(src[i]);
1699 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1700 const uint8_t *_src2, int width, uint32_t *unused)
1703 const uint16_t *src1 = (const uint16_t *) _src1,
1704 *src2 = (const uint16_t *) _src2;
1705 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1706 for (i=0; i<width; i++) {
1707 dstU[i] = av_bswap16(src1[i]);
1708 dstV[i] = av_bswap16(src2[i]);
1712 /* This is almost identical to the previous, end exists only because
1713 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1714 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1718 for (i=0; i<width; i++)
1722 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1723 const uint8_t *src2, int width, uint32_t *unused)
1726 for (i=0; i<width; i++) {
1727 dstU[i]= src1[4*i + 0];
1728 dstV[i]= src1[4*i + 2];
1730 assert(src1 == src2);
1733 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1734 const uint8_t *src, int width)
1737 for (i = 0; i < width; i++) {
1738 dst1[i] = src[2*i+0];
1739 dst2[i] = src[2*i+1];
1743 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1744 const uint8_t *src1, const uint8_t *src2,
1745 int width, uint32_t *unused)
1747 nvXXtoUV_c(dstU, dstV, src1, width);
1750 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1751 const uint8_t *src1, const uint8_t *src2,
1752 int width, uint32_t *unused)
1754 nvXXtoUV_c(dstV, dstU, src1, width);
1757 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1759 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1760 int width, uint32_t *unused)
1763 for (i=0; i<width; i++) {
1768 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1772 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1773 const uint8_t *src2, int width, uint32_t *unused)
1776 for (i=0; i<width; i++) {
1777 int b= src1[3*i + 0];
1778 int g= src1[3*i + 1];
1779 int r= src1[3*i + 2];
1781 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1782 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1784 assert(src1 == src2);
1787 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1788 const uint8_t *src2, int width, uint32_t *unused)
1791 for (i=0; i<width; i++) {
1792 int b= src1[6*i + 0] + src1[6*i + 3];
1793 int g= src1[6*i + 1] + src1[6*i + 4];
1794 int r= src1[6*i + 2] + src1[6*i + 5];
1796 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1797 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1799 assert(src1 == src2);
1802 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1806 for (i=0; i<width; i++) {
1811 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1815 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1816 const uint8_t *src2, int width, uint32_t *unused)
1820 for (i=0; i<width; i++) {
1821 int r= src1[3*i + 0];
1822 int g= src1[3*i + 1];
1823 int b= src1[3*i + 2];
1825 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1826 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1830 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1831 const uint8_t *src2, int width, uint32_t *unused)
1835 for (i=0; i<width; i++) {
1836 int r= src1[6*i + 0] + src1[6*i + 3];
1837 int g= src1[6*i + 1] + src1[6*i + 4];
1838 int b= src1[6*i + 2] + src1[6*i + 5];
1840 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1841 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1845 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1846 const int16_t *filter,
1847 const int16_t *filterPos, int filterSize)
1850 int32_t *dst = (int32_t *) _dst;
1851 const uint16_t *src = (const uint16_t *) _src;
1852 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1853 int sh = (bits <= 7) ? 11 : (bits - 4);
1855 for (i = 0; i < dstW; i++) {
1857 int srcPos = filterPos[i];
1860 for (j = 0; j < filterSize; j++) {
1861 val += src[srcPos + j] * filter[filterSize * i + j];
1863 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1864 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1868 // bilinear / bicubic scaling
1869 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1870 const int16_t *filter, const int16_t *filterPos,
1874 for (i=0; i<dstW; i++) {
1876 int srcPos= filterPos[i];
1878 for (j=0; j<filterSize; j++) {
1879 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1881 //filter += hFilterSize;
1882 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1887 //FIXME all pal and rgb srcFormats could do this convertion as well
1888 //FIXME all scalers more complex than bilinear could do half of this transform
1889 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1892 for (i = 0; i < width; i++) {
1893 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1894 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1897 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1900 for (i = 0; i < width; i++) {
1901 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1902 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1905 static void lumRangeToJpeg_c(int16_t *dst, int width)
1908 for (i = 0; i < width; i++)
1909 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1911 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1914 for (i = 0; i < width; i++)
1915 dst[i] = (dst[i]*14071 + 33561947)>>14;
1918 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1921 int32_t *dstU = (int32_t *) _dstU;
1922 int32_t *dstV = (int32_t *) _dstV;
1923 for (i = 0; i < width; i++) {
1924 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1925 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1928 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1931 int32_t *dstU = (int32_t *) _dstU;
1932 int32_t *dstV = (int32_t *) _dstV;
1933 for (i = 0; i < width; i++) {
1934 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1935 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1938 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1941 int32_t *dst = (int32_t *) _dst;
1942 for (i = 0; i < width; i++)
1943 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1945 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1948 int32_t *dst = (int32_t *) _dst;
1949 for (i = 0; i < width; i++)
1950 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1953 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1954 const uint8_t *src, int srcW, int xInc)
1957 unsigned int xpos=0;
1958 for (i=0;i<dstWidth;i++) {
1959 register unsigned int xx=xpos>>16;
1960 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1961 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1966 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
1969 uint8_t *dst = (uint8_t *) _dst;
1970 for (i = len - 1; i >= 0; i--) {
1971 dst[i * 2] = dst[i * 2 + 1] = src[i];
1975 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
1978 for (i = 0; i < len; i++) {
1979 dst[i] = src[i] >> 4;
1983 // *** horizontal scale Y line to temp buffer
1984 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1985 const uint8_t *src, int srcW, int xInc,
1986 const int16_t *hLumFilter,
1987 const int16_t *hLumFilterPos, int hLumFilterSize,
1988 uint8_t *formatConvBuffer,
1989 uint32_t *pal, int isAlpha)
1991 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1992 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1995 toYV12(formatConvBuffer, src, srcW, pal);
1996 src= formatConvBuffer;
1999 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2000 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2001 src = formatConvBuffer;
2004 if (!c->hyscale_fast) {
2005 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2006 } else { // fast bilinear upscale / crap downscale
2007 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2011 convertRange(dst, dstWidth);
2013 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2014 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2018 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2019 int dstWidth, const uint8_t *src1,
2020 const uint8_t *src2, int srcW, int xInc)
2023 unsigned int xpos=0;
2024 for (i=0;i<dstWidth;i++) {
2025 register unsigned int xx=xpos>>16;
2026 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2027 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2028 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2033 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2034 const uint8_t *src1, const uint8_t *src2,
2035 int srcW, int xInc, const int16_t *hChrFilter,
2036 const int16_t *hChrFilterPos, int hChrFilterSize,
2037 uint8_t *formatConvBuffer, uint32_t *pal)
2040 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
2041 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2042 src1= formatConvBuffer;
2046 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2047 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
2048 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2049 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2050 src1 = formatConvBuffer;
2054 if (!c->hcscale_fast) {
2055 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2056 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2057 } else { // fast bilinear upscale / crap downscale
2058 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2061 if (c->chrConvertRange)
2062 c->chrConvertRange(dst1, dst2, dstWidth);
2064 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2065 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2066 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2070 static av_always_inline void
2071 find_c_packed_planar_out_funcs(SwsContext *c,
2072 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2073 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2074 yuv2packedX_fn *yuv2packedX)
2076 enum PixelFormat dstFormat = c->dstFormat;
2078 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2079 *yuv2yuvX = yuv2nv12X_c;
2080 } else if (is16BPS(dstFormat)) {
2081 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2082 } else if (is9_OR_10BPS(dstFormat)) {
2083 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2084 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2086 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2089 *yuv2yuv1 = yuv2yuv1_c;
2090 *yuv2yuvX = yuv2yuvX_c;
2092 if(c->flags & SWS_FULL_CHR_H_INT) {
2093 switch (dstFormat) {
2096 *yuv2packedX = yuv2rgba32_full_X_c;
2098 #if CONFIG_SWSCALE_ALPHA
2100 *yuv2packedX = yuv2rgba32_full_X_c;
2102 #endif /* CONFIG_SWSCALE_ALPHA */
2104 *yuv2packedX = yuv2rgbx32_full_X_c;
2106 #endif /* !CONFIG_SMALL */
2110 *yuv2packedX = yuv2argb32_full_X_c;
2112 #if CONFIG_SWSCALE_ALPHA
2114 *yuv2packedX = yuv2argb32_full_X_c;
2116 #endif /* CONFIG_SWSCALE_ALPHA */
2118 *yuv2packedX = yuv2xrgb32_full_X_c;
2120 #endif /* !CONFIG_SMALL */
2124 *yuv2packedX = yuv2bgra32_full_X_c;
2126 #if CONFIG_SWSCALE_ALPHA
2128 *yuv2packedX = yuv2bgra32_full_X_c;
2130 #endif /* CONFIG_SWSCALE_ALPHA */
2132 *yuv2packedX = yuv2bgrx32_full_X_c;
2134 #endif /* !CONFIG_SMALL */
2138 *yuv2packedX = yuv2abgr32_full_X_c;
2140 #if CONFIG_SWSCALE_ALPHA
2142 *yuv2packedX = yuv2abgr32_full_X_c;
2144 #endif /* CONFIG_SWSCALE_ALPHA */
2146 *yuv2packedX = yuv2xbgr32_full_X_c;
2148 #endif /* !CONFIG_SMALL */
2151 *yuv2packedX = yuv2rgb24_full_X_c;
2154 *yuv2packedX = yuv2bgr24_full_X_c;
2158 switch (dstFormat) {
2159 case PIX_FMT_GRAY16BE:
2160 *yuv2packed1 = yuv2gray16BE_1_c;
2161 *yuv2packed2 = yuv2gray16BE_2_c;
2162 *yuv2packedX = yuv2gray16BE_X_c;
2164 case PIX_FMT_GRAY16LE:
2165 *yuv2packed1 = yuv2gray16LE_1_c;
2166 *yuv2packed2 = yuv2gray16LE_2_c;
2167 *yuv2packedX = yuv2gray16LE_X_c;
2169 case PIX_FMT_MONOWHITE:
2170 *yuv2packed1 = yuv2monowhite_1_c;
2171 *yuv2packed2 = yuv2monowhite_2_c;
2172 *yuv2packedX = yuv2monowhite_X_c;
2174 case PIX_FMT_MONOBLACK:
2175 *yuv2packed1 = yuv2monoblack_1_c;
2176 *yuv2packed2 = yuv2monoblack_2_c;
2177 *yuv2packedX = yuv2monoblack_X_c;
2179 case PIX_FMT_YUYV422:
2180 *yuv2packed1 = yuv2yuyv422_1_c;
2181 *yuv2packed2 = yuv2yuyv422_2_c;
2182 *yuv2packedX = yuv2yuyv422_X_c;
2184 case PIX_FMT_UYVY422:
2185 *yuv2packed1 = yuv2uyvy422_1_c;
2186 *yuv2packed2 = yuv2uyvy422_2_c;
2187 *yuv2packedX = yuv2uyvy422_X_c;
2189 case PIX_FMT_RGB48LE:
2190 *yuv2packed1 = yuv2rgb48le_1_c;
2191 *yuv2packed2 = yuv2rgb48le_2_c;
2192 *yuv2packedX = yuv2rgb48le_X_c;
2194 case PIX_FMT_RGB48BE:
2195 *yuv2packed1 = yuv2rgb48be_1_c;
2196 *yuv2packed2 = yuv2rgb48be_2_c;
2197 *yuv2packedX = yuv2rgb48be_X_c;
2199 case PIX_FMT_BGR48LE:
2200 *yuv2packed1 = yuv2bgr48le_1_c;
2201 *yuv2packed2 = yuv2bgr48le_2_c;
2202 *yuv2packedX = yuv2bgr48le_X_c;
2204 case PIX_FMT_BGR48BE:
2205 *yuv2packed1 = yuv2bgr48be_1_c;
2206 *yuv2packed2 = yuv2bgr48be_2_c;
2207 *yuv2packedX = yuv2bgr48be_X_c;
2212 *yuv2packed1 = yuv2rgb32_1_c;
2213 *yuv2packed2 = yuv2rgb32_2_c;
2214 *yuv2packedX = yuv2rgb32_X_c;
2216 #if CONFIG_SWSCALE_ALPHA
2218 *yuv2packed1 = yuv2rgba32_1_c;
2219 *yuv2packed2 = yuv2rgba32_2_c;
2220 *yuv2packedX = yuv2rgba32_X_c;
2222 #endif /* CONFIG_SWSCALE_ALPHA */
2224 *yuv2packed1 = yuv2rgbx32_1_c;
2225 *yuv2packed2 = yuv2rgbx32_2_c;
2226 *yuv2packedX = yuv2rgbx32_X_c;
2228 #endif /* !CONFIG_SMALL */
2230 case PIX_FMT_RGB32_1:
2231 case PIX_FMT_BGR32_1:
2233 *yuv2packed1 = yuv2rgb32_1_1_c;
2234 *yuv2packed2 = yuv2rgb32_1_2_c;
2235 *yuv2packedX = yuv2rgb32_1_X_c;
2237 #if CONFIG_SWSCALE_ALPHA
2239 *yuv2packed1 = yuv2rgba32_1_1_c;
2240 *yuv2packed2 = yuv2rgba32_1_2_c;
2241 *yuv2packedX = yuv2rgba32_1_X_c;
2243 #endif /* CONFIG_SWSCALE_ALPHA */
2245 *yuv2packed1 = yuv2rgbx32_1_1_c;
2246 *yuv2packed2 = yuv2rgbx32_1_2_c;
2247 *yuv2packedX = yuv2rgbx32_1_X_c;
2249 #endif /* !CONFIG_SMALL */
2252 *yuv2packed1 = yuv2rgb24_1_c;
2253 *yuv2packed2 = yuv2rgb24_2_c;
2254 *yuv2packedX = yuv2rgb24_X_c;
2257 *yuv2packed1 = yuv2bgr24_1_c;
2258 *yuv2packed2 = yuv2bgr24_2_c;
2259 *yuv2packedX = yuv2bgr24_X_c;
2261 case PIX_FMT_RGB565LE:
2262 case PIX_FMT_RGB565BE:
2263 case PIX_FMT_BGR565LE:
2264 case PIX_FMT_BGR565BE:
2265 *yuv2packed1 = yuv2rgb16_1_c;
2266 *yuv2packed2 = yuv2rgb16_2_c;
2267 *yuv2packedX = yuv2rgb16_X_c;
2269 case PIX_FMT_RGB555LE:
2270 case PIX_FMT_RGB555BE:
2271 case PIX_FMT_BGR555LE:
2272 case PIX_FMT_BGR555BE:
2273 *yuv2packed1 = yuv2rgb15_1_c;
2274 *yuv2packed2 = yuv2rgb15_2_c;
2275 *yuv2packedX = yuv2rgb15_X_c;
2277 case PIX_FMT_RGB444LE:
2278 case PIX_FMT_RGB444BE:
2279 case PIX_FMT_BGR444LE:
2280 case PIX_FMT_BGR444BE:
2281 *yuv2packed1 = yuv2rgb12_1_c;
2282 *yuv2packed2 = yuv2rgb12_2_c;
2283 *yuv2packedX = yuv2rgb12_X_c;
2287 *yuv2packed1 = yuv2rgb8_1_c;
2288 *yuv2packed2 = yuv2rgb8_2_c;
2289 *yuv2packedX = yuv2rgb8_X_c;
2293 *yuv2packed1 = yuv2rgb4_1_c;
2294 *yuv2packed2 = yuv2rgb4_2_c;
2295 *yuv2packedX = yuv2rgb4_X_c;
2297 case PIX_FMT_RGB4_BYTE:
2298 case PIX_FMT_BGR4_BYTE:
2299 *yuv2packed1 = yuv2rgb4b_1_c;
2300 *yuv2packed2 = yuv2rgb4b_2_c;
2301 *yuv2packedX = yuv2rgb4b_X_c;
2307 #define DEBUG_SWSCALE_BUFFERS 0
2308 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2310 static int swScale(SwsContext *c, const uint8_t* src[],
2311 int srcStride[], int srcSliceY,
2312 int srcSliceH, uint8_t* dst[], int dstStride[])
2314 /* load a few things into local vars to make the code more readable? and faster */
2315 const int srcW= c->srcW;
2316 const int dstW= c->dstW;
2317 const int dstH= c->dstH;
2318 const int chrDstW= c->chrDstW;
2319 const int chrSrcW= c->chrSrcW;
2320 const int lumXInc= c->lumXInc;
2321 const int chrXInc= c->chrXInc;
2322 const enum PixelFormat dstFormat= c->dstFormat;
2323 const int flags= c->flags;
2324 int16_t *vLumFilterPos= c->vLumFilterPos;
2325 int16_t *vChrFilterPos= c->vChrFilterPos;
2326 int16_t *hLumFilterPos= c->hLumFilterPos;
2327 int16_t *hChrFilterPos= c->hChrFilterPos;
2328 int16_t *vLumFilter= c->vLumFilter;
2329 int16_t *vChrFilter= c->vChrFilter;
2330 int16_t *hLumFilter= c->hLumFilter;
2331 int16_t *hChrFilter= c->hChrFilter;
2332 int32_t *lumMmxFilter= c->lumMmxFilter;
2333 int32_t *chrMmxFilter= c->chrMmxFilter;
2334 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2335 const int vLumFilterSize= c->vLumFilterSize;
2336 const int vChrFilterSize= c->vChrFilterSize;
2337 const int hLumFilterSize= c->hLumFilterSize;
2338 const int hChrFilterSize= c->hChrFilterSize;
2339 int16_t **lumPixBuf= c->lumPixBuf;
2340 int16_t **chrUPixBuf= c->chrUPixBuf;
2341 int16_t **chrVPixBuf= c->chrVPixBuf;
2342 int16_t **alpPixBuf= c->alpPixBuf;
2343 const int vLumBufSize= c->vLumBufSize;
2344 const int vChrBufSize= c->vChrBufSize;
2345 uint8_t *formatConvBuffer= c->formatConvBuffer;
2346 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2347 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2349 uint32_t *pal=c->pal_yuv;
2350 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2351 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2352 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2353 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2354 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2356 /* vars which will change and which we need to store back in the context */
2358 int lumBufIndex= c->lumBufIndex;
2359 int chrBufIndex= c->chrBufIndex;
2360 int lastInLumBuf= c->lastInLumBuf;
2361 int lastInChrBuf= c->lastInChrBuf;
2363 if (isPacked(c->srcFormat)) {
2371 srcStride[3]= srcStride[0];
2373 srcStride[1]<<= c->vChrDrop;
2374 srcStride[2]<<= c->vChrDrop;
2376 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2377 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2378 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2379 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2380 srcSliceY, srcSliceH, dstY, dstH);
2381 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2382 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2384 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2385 static int warnedAlready=0; //FIXME move this into the context perhaps
2386 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2387 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2388 " ->cannot do aligned memory accesses anymore\n");
2393 /* Note the user might start scaling the picture in the middle so this
2394 will not get executed. This is not really intended but works
2395 currently, so people might do it. */
2396 if (srcSliceY ==0) {
2406 for (;dstY < dstH; dstY++) {
2407 const int chrDstY= dstY>>c->chrDstVSubSample;
2408 uint8_t *dest[4] = {
2409 dst[0] + dstStride[0] * dstY,
2410 dst[1] + dstStride[1] * chrDstY,
2411 dst[2] + dstStride[2] * chrDstY,
2412 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2415 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2416 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2417 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2418 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2419 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2420 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2423 //handle holes (FAST_BILINEAR & weird filters)
2424 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2425 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2426 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2427 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2429 DEBUG_BUFFERS("dstY: %d\n", dstY);
2430 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2431 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2432 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2433 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2435 // Do we have enough lines in this slice to output the dstY line
2436 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2438 if (!enough_lines) {
2439 lastLumSrcY = srcSliceY + srcSliceH - 1;
2440 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2441 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2442 lastLumSrcY, lastChrSrcY);
2445 //Do horizontal scaling
2446 while(lastInLumBuf < lastLumSrcY) {
2447 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2448 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2450 assert(lumBufIndex < 2*vLumBufSize);
2451 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2452 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2453 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2454 hLumFilter, hLumFilterPos, hLumFilterSize,
2457 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2458 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2459 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2463 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2464 lumBufIndex, lastInLumBuf);
2466 while(lastInChrBuf < lastChrSrcY) {
2467 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2468 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2470 assert(chrBufIndex < 2*vChrBufSize);
2471 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2472 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2473 //FIXME replace parameters through context struct (some at least)
2475 if (c->needs_hcscale)
2476 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2477 chrDstW, src1, src2, chrSrcW, chrXInc,
2478 hChrFilter, hChrFilterPos, hChrFilterSize,
2479 formatConvBuffer, pal);
2481 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2482 chrBufIndex, lastInChrBuf);
2484 //wrap buf index around to stay inside the ring buffer
2485 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2486 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2488 break; //we can't output a dstY line so let's try with the next slice
2491 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2493 if (dstY >= dstH-2) {
2494 // hmm looks like we can't use MMX here without overwriting this array's tail
2495 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2496 &yuv2packed1, &yuv2packed2,
2501 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2502 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2503 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2504 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2505 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2506 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2507 if ((dstY&chrSkipMask) || isGray(dstFormat))
2508 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2509 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2510 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2511 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2512 dest, dstW, chrDstW);
2513 } else { //General YV12
2514 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2515 lumSrcPtr, vLumFilterSize,
2516 vChrFilter + chrDstY * vChrFilterSize,
2517 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2518 alpSrcPtr, dest, dstW, chrDstW);
2521 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2522 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2523 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2524 int chrAlpha = vChrFilter[2 * dstY + 1];
2525 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2526 alpPixBuf ? *alpSrcPtr : NULL,
2527 dest[0], dstW, chrAlpha, dstY);
2528 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2529 int lumAlpha = vLumFilter[2 * dstY + 1];
2530 int chrAlpha = vChrFilter[2 * dstY + 1];
2532 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2534 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2535 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2536 alpPixBuf ? alpSrcPtr : NULL,
2537 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2538 } else { //general RGB
2539 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2540 lumSrcPtr, vLumFilterSize,
2541 vChrFilter + dstY * vChrFilterSize,
2542 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2543 alpSrcPtr, dest[0], dstW, dstY);
2549 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2550 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2553 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2554 __asm__ volatile("sfence":::"memory");
2558 /* store changed local vars back in the context */
2560 c->lumBufIndex= lumBufIndex;
2561 c->chrBufIndex= chrBufIndex;
2562 c->lastInLumBuf= lastInLumBuf;
2563 c->lastInChrBuf= lastInChrBuf;
2565 return dstY - lastDstY;
2568 static av_cold void sws_init_swScale_c(SwsContext *c)
2570 enum PixelFormat srcFormat = c->srcFormat;
2572 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2573 &c->yuv2packed1, &c->yuv2packed2,
2576 c->chrToYV12 = NULL;
2578 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2579 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2580 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2581 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2585 case PIX_FMT_BGR4_BYTE:
2586 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2588 case PIX_FMT_YUV444P9LE:
2589 case PIX_FMT_YUV420P9LE:
2590 case PIX_FMT_YUV422P10LE:
2591 case PIX_FMT_YUV444P10LE:
2592 case PIX_FMT_YUV420P10LE:
2593 case PIX_FMT_YUV420P16LE:
2594 case PIX_FMT_YUV422P16LE:
2595 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2597 case PIX_FMT_YUV444P9BE:
2598 case PIX_FMT_YUV420P9BE:
2599 case PIX_FMT_YUV444P10BE:
2600 case PIX_FMT_YUV422P10BE:
2601 case PIX_FMT_YUV420P10BE:
2602 case PIX_FMT_YUV420P16BE:
2603 case PIX_FMT_YUV422P16BE:
2604 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2607 if (c->chrSrcHSubSample) {
2609 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2610 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2611 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2612 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2613 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2614 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2615 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2616 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2617 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2618 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2619 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2620 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2621 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2622 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2623 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2624 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2625 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2626 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2630 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2631 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2632 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2633 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2634 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2635 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2636 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2637 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2638 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2639 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2640 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2641 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2642 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2643 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2644 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2645 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2646 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2647 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2651 c->lumToYV12 = NULL;
2652 c->alpToYV12 = NULL;
2653 switch (srcFormat) {
2655 case PIX_FMT_YUV444P9LE:
2656 case PIX_FMT_YUV420P9LE:
2657 case PIX_FMT_YUV444P10LE:
2658 case PIX_FMT_YUV422P10LE:
2659 case PIX_FMT_YUV420P10LE:
2660 case PIX_FMT_YUV420P16LE:
2661 case PIX_FMT_YUV422P16LE:
2662 case PIX_FMT_YUV444P16LE:
2663 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2665 case PIX_FMT_YUV444P9BE:
2666 case PIX_FMT_YUV420P9BE:
2667 case PIX_FMT_YUV444P10BE:
2668 case PIX_FMT_YUV422P10BE:
2669 case PIX_FMT_YUV420P10BE:
2670 case PIX_FMT_YUV420P16BE:
2671 case PIX_FMT_YUV422P16BE:
2672 case PIX_FMT_YUV444P16BE:
2673 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2675 case PIX_FMT_YUYV422 :
2676 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2677 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2678 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2679 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2680 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2681 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2682 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2683 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2684 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2685 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2686 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2687 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2691 case PIX_FMT_BGR4_BYTE:
2692 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2693 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2694 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2695 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2696 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2697 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2698 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2699 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2700 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2701 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2702 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2705 switch (srcFormat) {
2707 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2709 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2710 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2714 if (c->scalingBpp == 8) {
2715 c->hScale = hScale_c;
2716 if (c->flags & SWS_FAST_BILINEAR) {
2717 c->hyscale_fast = hyscale_fast_c;
2718 c->hcscale_fast = hcscale_fast_c;
2721 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2723 c->lumConvertRange = lumRangeFromJpeg_c;
2724 c->chrConvertRange = chrRangeFromJpeg_c;
2726 c->lumConvertRange = lumRangeToJpeg_c;
2727 c->chrConvertRange = chrRangeToJpeg_c;
2731 c->hScale = hScale16_c;
2732 c->scale19To15Fw = scale19To15Fw_c;
2733 c->scale8To16Rv = scale8To16Rv_c;
2735 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2737 c->lumConvertRange = lumRangeFromJpeg16_c;
2738 c->chrConvertRange = chrRangeFromJpeg16_c;
2740 c->lumConvertRange = lumRangeToJpeg16_c;
2741 c->chrConvertRange = chrRangeToJpeg16_c;
2746 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2747 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2748 c->needs_hcscale = 1;
2751 SwsFunc ff_getSwsFunc(SwsContext *c)
2753 sws_init_swScale_c(c);
2756 ff_sws_init_swScale_mmx(c);
2758 ff_sws_init_swScale_altivec(c);