2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 static av_always_inline void
199 yuv2yuvX16_c_template(const int16_t *filter, int filterSize,
200 const int32_t **src, uint16_t *dest, int dstW,
201 int big_endian, int output_bits)
203 #define output_pixel(pos, val) \
205 AV_WB16(pos, 0x8000 + av_clip_int16(val >> shift)); \
207 AV_WL16(pos, 0x8000 + av_clip_int16(val >> shift)); \
211 int shift = 15 + 16 - output_bits;
213 for (i = 0; i < dstW; i++) {
214 int val = 1 << (30-output_bits);
217 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
218 * filters (or anything with negative coeffs, the range can be slightly
219 * wider in both directions. To account for this overflow, we subtract
220 * a constant so it always fits in the signed range (assuming a
221 * reasonable filterSize), and re-add that at the end. */
223 for (j = 0; j < filterSize; j++)
224 val += src[j][i] * filter[j];
226 output_pixel(&dest[i], val);
231 #define output_pixel(pos, val) \
233 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
235 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
238 static av_always_inline void
239 yuv2yuvX10_c_template(const int16_t *filter, int filterSize,
240 const int16_t **src, uint16_t *dest, int dstW,
241 int big_endian, int output_bits)
244 int shift = 11 + 16 - output_bits;
246 for (i = 0; i < dstW; i++) {
247 int val = 1 << (26-output_bits);
250 for (j = 0; j < filterSize; j++)
251 val += src[j][i] * filter[j];
253 output_pixel(&dest[i], val);
259 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
260 static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
261 const int16_t **src, uint16_t *dest, int dstW, \
262 const uint8_t *dither, int offset)\
264 yuv2yuvX_template_fn(filter, filterSize, (const typeX_t **) src, \
265 dest, dstW, is_be, bits); \
267 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
268 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
269 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
270 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
271 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
272 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
274 static void yuv2yuvX_c(const int16_t *filter, int filterSize,
275 const int16_t **src, uint8_t *dest, int dstW,
276 const uint8_t *dither, int offset)
279 for (i=0; i<dstW; i++) {
280 int val = dither[(i + offset) & 7] << 12;
282 for (j=0; j<filterSize; j++)
283 val += src[j][i] * filter[j];
285 dest[i]= av_clip_uint8(val>>19);
289 static void yuv2yuv1_c(const int16_t *src, uint8_t *dest, int dstW,
290 const uint8_t *dither, int offset)
293 for (i=0; i<dstW; i++) {
294 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
295 dest[i]= av_clip_uint8(val);
299 static void yuv2nv12X_chroma_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
300 const int16_t **chrUSrc, const int16_t **chrVSrc,
301 uint8_t *dest, int chrDstW)
303 enum PixelFormat dstFormat = c->dstFormat;
304 const uint8_t *chrDither = c->chrDither8;
307 if (dstFormat == PIX_FMT_NV12)
308 for (i=0; i<chrDstW; i++) {
309 int u = chrDither[i & 7] << 12;
310 int v = chrDither[(i + 3) & 7] << 12;
312 for (j=0; j<chrFilterSize; j++) {
313 u += chrUSrc[j][i] * chrFilter[j];
314 v += chrVSrc[j][i] * chrFilter[j];
317 dest[2*i]= av_clip_uint8(u>>19);
318 dest[2*i+1]= av_clip_uint8(v>>19);
321 for (i=0; i<chrDstW; i++) {
322 int u = chrDither[i & 7] << 12;
323 int v = chrDither[(i + 3) & 7] << 12;
325 for (j=0; j<chrFilterSize; j++) {
326 u += chrUSrc[j][i] * chrFilter[j];
327 v += chrVSrc[j][i] * chrFilter[j];
330 dest[2*i]= av_clip_uint8(v>>19);
331 dest[2*i+1]= av_clip_uint8(u>>19);
335 #define output_pixel(pos, val) \
336 if (target == PIX_FMT_GRAY16BE) { \
342 static av_always_inline void
343 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
344 const int32_t **lumSrc, int lumFilterSize,
345 const int16_t *chrFilter, const int32_t **chrUSrc,
346 const int32_t **chrVSrc, int chrFilterSize,
347 const int32_t **alpSrc, uint16_t *dest, int dstW,
348 int y, enum PixelFormat target)
352 for (i = 0; i < (dstW >> 1); i++) {
357 for (j = 0; j < lumFilterSize; j++) {
358 Y1 += lumSrc[j][i * 2] * lumFilter[j];
359 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
363 if ((Y1 | Y2) & 0x10000) {
364 Y1 = av_clip_uint16(Y1);
365 Y2 = av_clip_uint16(Y2);
367 output_pixel(&dest[i * 2 + 0], Y1);
368 output_pixel(&dest[i * 2 + 1], Y2);
372 static av_always_inline void
373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374 const int32_t *ubuf[2], const int32_t *vbuf[2],
375 const int32_t *abuf[2], uint16_t *dest, int dstW,
376 int yalpha, int uvalpha, int y,
377 enum PixelFormat target)
379 int yalpha1 = 4095 - yalpha;
381 const int32_t *buf0 = buf[0], *buf1 = buf[1];
383 for (i = 0; i < (dstW >> 1); i++) {
384 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
387 output_pixel(&dest[i * 2 + 0], Y1);
388 output_pixel(&dest[i * 2 + 1], Y2);
392 static av_always_inline void
393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394 const int32_t *ubuf[2], const int32_t *vbuf[2],
395 const int32_t *abuf0, uint16_t *dest, int dstW,
396 int uvalpha, int y, enum PixelFormat target)
400 for (i = 0; i < (dstW >> 1); i++) {
401 int Y1 = buf0[i * 2 ] << 1;
402 int Y2 = buf0[i * 2 + 1] << 1;
404 output_pixel(&dest[i * 2 + 0], Y1);
405 output_pixel(&dest[i * 2 + 1], Y2);
411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413 const int16_t **_lumSrc, int lumFilterSize, \
414 const int16_t *chrFilter, const int16_t **_chrUSrc, \
415 const int16_t **_chrVSrc, int chrFilterSize, \
416 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420 **chrUSrc = (const int32_t **) _chrUSrc, \
421 **chrVSrc = (const int32_t **) _chrVSrc, \
422 **alpSrc = (const int32_t **) _alpSrc; \
423 uint16_t *dest = (uint16_t *) _dest; \
424 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426 alpSrc, dest, dstW, y, fmt); \
429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432 int yalpha, int uvalpha, int y) \
434 const int32_t **buf = (const int32_t **) _buf, \
435 **ubuf = (const int32_t **) _ubuf, \
436 **vbuf = (const int32_t **) _vbuf, \
437 **abuf = (const int32_t **) _abuf; \
438 uint16_t *dest = (uint16_t *) _dest; \
439 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440 dest, dstW, yalpha, uvalpha, y, fmt); \
443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446 int uvalpha, int y) \
448 const int32_t *buf0 = (const int32_t *) _buf0, \
449 **ubuf = (const int32_t **) _ubuf, \
450 **vbuf = (const int32_t **) _vbuf, \
451 *abuf0 = (const int32_t *) _abuf0; \
452 uint16_t *dest = (uint16_t *) _dest; \
453 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454 dstW, uvalpha, y, fmt); \
457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
460 #define output_pixel(pos, acc) \
461 if (target == PIX_FMT_MONOBLACK) { \
467 static av_always_inline void
468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469 const int16_t **lumSrc, int lumFilterSize,
470 const int16_t *chrFilter, const int16_t **chrUSrc,
471 const int16_t **chrVSrc, int chrFilterSize,
472 const int16_t **alpSrc, uint8_t *dest, int dstW,
473 int y, enum PixelFormat target)
475 const uint8_t * const d128=dither_8x8_220[y&7];
476 uint8_t *g = c->table_gU[128] + c->table_gV[128];
480 for (i = 0; i < dstW - 1; i += 2) {
485 for (j = 0; j < lumFilterSize; j++) {
486 Y1 += lumSrc[j][i] * lumFilter[j];
487 Y2 += lumSrc[j][i+1] * lumFilter[j];
491 if ((Y1 | Y2) & 0x100) {
492 Y1 = av_clip_uint8(Y1);
493 Y2 = av_clip_uint8(Y2);
495 acc += acc + g[Y1 + d128[(i + 0) & 7]];
496 acc += acc + g[Y2 + d128[(i + 1) & 7]];
498 output_pixel(*dest++, acc);
503 static av_always_inline void
504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505 const int16_t *ubuf[2], const int16_t *vbuf[2],
506 const int16_t *abuf[2], uint8_t *dest, int dstW,
507 int yalpha, int uvalpha, int y,
508 enum PixelFormat target)
510 const int16_t *buf0 = buf[0], *buf1 = buf[1];
511 const uint8_t * const d128 = dither_8x8_220[y & 7];
512 uint8_t *g = c->table_gU[128] + c->table_gV[128];
513 int yalpha1 = 4095 - yalpha;
516 for (i = 0; i < dstW - 7; i += 8) {
517 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525 output_pixel(*dest++, acc);
529 static av_always_inline void
530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531 const int16_t *ubuf[2], const int16_t *vbuf[2],
532 const int16_t *abuf0, uint8_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
535 const uint8_t * const d128 = dither_8x8_220[y & 7];
536 uint8_t *g = c->table_gU[128] + c->table_gV[128];
539 for (i = 0; i < dstW - 7; i += 8) {
540 int acc = g[(buf0[i ] >> 7) + d128[0]];
541 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548 output_pixel(*dest++, acc);
554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556 const int16_t **lumSrc, int lumFilterSize, \
557 const int16_t *chrFilter, const int16_t **chrUSrc, \
558 const int16_t **chrVSrc, int chrFilterSize, \
559 const int16_t **alpSrc, uint8_t *dest, int dstW, \
562 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564 alpSrc, dest, dstW, y, fmt); \
567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568 const int16_t *ubuf[2], const int16_t *vbuf[2], \
569 const int16_t *abuf[2], uint8_t *dest, int dstW, \
570 int yalpha, int uvalpha, int y) \
572 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573 dest, dstW, yalpha, uvalpha, y, fmt); \
576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577 const int16_t *ubuf[2], const int16_t *vbuf[2], \
578 const int16_t *abuf0, uint8_t *dest, int dstW, \
579 int uvalpha, int y) \
581 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582 abuf0, dest, dstW, uvalpha, \
586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
589 #define output_pixels(pos, Y1, U, Y2, V) \
590 if (target == PIX_FMT_YUYV422) { \
591 dest[pos + 0] = Y1; \
593 dest[pos + 2] = Y2; \
597 dest[pos + 1] = Y1; \
599 dest[pos + 3] = Y2; \
602 static av_always_inline void
603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604 const int16_t **lumSrc, int lumFilterSize,
605 const int16_t *chrFilter, const int16_t **chrUSrc,
606 const int16_t **chrVSrc, int chrFilterSize,
607 const int16_t **alpSrc, uint8_t *dest, int dstW,
608 int y, enum PixelFormat target)
612 for (i = 0; i < (dstW >> 1); i++) {
619 for (j = 0; j < lumFilterSize; j++) {
620 Y1 += lumSrc[j][i * 2] * lumFilter[j];
621 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
623 for (j = 0; j < chrFilterSize; j++) {
624 U += chrUSrc[j][i] * chrFilter[j];
625 V += chrVSrc[j][i] * chrFilter[j];
631 if ((Y1 | Y2 | U | V) & 0x100) {
632 Y1 = av_clip_uint8(Y1);
633 Y2 = av_clip_uint8(Y2);
634 U = av_clip_uint8(U);
635 V = av_clip_uint8(V);
637 output_pixels(4*i, Y1, U, Y2, V);
641 static av_always_inline void
642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643 const int16_t *ubuf[2], const int16_t *vbuf[2],
644 const int16_t *abuf[2], uint8_t *dest, int dstW,
645 int yalpha, int uvalpha, int y,
646 enum PixelFormat target)
648 const int16_t *buf0 = buf[0], *buf1 = buf[1],
649 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651 int yalpha1 = 4095 - yalpha;
652 int uvalpha1 = 4095 - uvalpha;
655 for (i = 0; i < (dstW >> 1); i++) {
656 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
661 output_pixels(i * 4, Y1, U, Y2, V);
665 static av_always_inline void
666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667 const int16_t *ubuf[2], const int16_t *vbuf[2],
668 const int16_t *abuf0, uint8_t *dest, int dstW,
669 int uvalpha, int y, enum PixelFormat target)
671 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
675 if (uvalpha < 2048) {
676 for (i = 0; i < (dstW >> 1); i++) {
677 int Y1 = buf0[i * 2] >> 7;
678 int Y2 = buf0[i * 2 + 1] >> 7;
679 int U = ubuf1[i] >> 7;
680 int V = vbuf1[i] >> 7;
682 output_pixels(i * 4, Y1, U, Y2, V);
685 for (i = 0; i < (dstW >> 1); i++) {
686 int Y1 = buf0[i * 2] >> 7;
687 int Y2 = buf0[i * 2 + 1] >> 7;
688 int U = (ubuf0[i] + ubuf1[i]) >> 8;
689 int V = (vbuf0[i] + vbuf1[i]) >> 8;
691 output_pixels(i * 4, Y1, U, Y2, V);
698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703 #define output_pixel(pos, val) \
704 if (isBE(target)) { \
710 static av_always_inline void
711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712 const int32_t **lumSrc, int lumFilterSize,
713 const int16_t *chrFilter, const int32_t **chrUSrc,
714 const int32_t **chrVSrc, int chrFilterSize,
715 const int32_t **alpSrc, uint16_t *dest, int dstW,
716 int y, enum PixelFormat target)
720 for (i = 0; i < (dstW >> 1); i++) {
724 int U = -128 << 23; // 19
728 for (j = 0; j < lumFilterSize; j++) {
729 Y1 += lumSrc[j][i * 2] * lumFilter[j];
730 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
732 for (j = 0; j < chrFilterSize; j++) {
733 U += chrUSrc[j][i] * chrFilter[j];
734 V += chrVSrc[j][i] * chrFilter[j];
737 // 8bit: 12+15=27; 16-bit: 12+19=31
743 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
744 Y1 -= c->yuv2rgb_y_offset;
745 Y2 -= c->yuv2rgb_y_offset;
746 Y1 *= c->yuv2rgb_y_coeff;
747 Y2 *= c->yuv2rgb_y_coeff;
750 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
752 R = V * c->yuv2rgb_v2r_coeff;
753 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
754 B = U * c->yuv2rgb_u2b_coeff;
756 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
757 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
758 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
759 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
760 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
761 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
762 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
767 static av_always_inline void
768 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
769 const int32_t *ubuf[2], const int32_t *vbuf[2],
770 const int32_t *abuf[2], uint16_t *dest, int dstW,
771 int yalpha, int uvalpha, int y,
772 enum PixelFormat target)
774 const int32_t *buf0 = buf[0], *buf1 = buf[1],
775 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
776 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
777 int yalpha1 = 4095 - yalpha;
778 int uvalpha1 = 4095 - uvalpha;
781 for (i = 0; i < (dstW >> 1); i++) {
782 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
783 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
784 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
785 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
788 Y1 -= c->yuv2rgb_y_offset;
789 Y2 -= c->yuv2rgb_y_offset;
790 Y1 *= c->yuv2rgb_y_coeff;
791 Y2 *= c->yuv2rgb_y_coeff;
795 R = V * c->yuv2rgb_v2r_coeff;
796 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
797 B = U * c->yuv2rgb_u2b_coeff;
799 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
800 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
801 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
802 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
803 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
804 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
809 static av_always_inline void
810 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
811 const int32_t *ubuf[2], const int32_t *vbuf[2],
812 const int32_t *abuf0, uint16_t *dest, int dstW,
813 int uvalpha, int y, enum PixelFormat target)
815 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
816 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
819 if (uvalpha < 2048) {
820 for (i = 0; i < (dstW >> 1); i++) {
821 int Y1 = (buf0[i * 2] ) >> 2;
822 int Y2 = (buf0[i * 2 + 1]) >> 2;
823 int U = (ubuf0[i] + (-128 << 11)) >> 2;
824 int V = (vbuf0[i] + (-128 << 11)) >> 2;
827 Y1 -= c->yuv2rgb_y_offset;
828 Y2 -= c->yuv2rgb_y_offset;
829 Y1 *= c->yuv2rgb_y_coeff;
830 Y2 *= c->yuv2rgb_y_coeff;
834 R = V * c->yuv2rgb_v2r_coeff;
835 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
836 B = U * c->yuv2rgb_u2b_coeff;
838 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
839 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
840 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
841 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
842 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
843 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
847 for (i = 0; i < (dstW >> 1); i++) {
848 int Y1 = (buf0[i * 2] ) >> 2;
849 int Y2 = (buf0[i * 2 + 1]) >> 2;
850 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
851 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
854 Y1 -= c->yuv2rgb_y_offset;
855 Y2 -= c->yuv2rgb_y_offset;
856 Y1 *= c->yuv2rgb_y_coeff;
857 Y2 *= c->yuv2rgb_y_coeff;
861 R = V * c->yuv2rgb_v2r_coeff;
862 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
863 B = U * c->yuv2rgb_u2b_coeff;
865 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
866 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
867 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
868 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
869 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
870 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
880 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
881 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
882 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
883 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
885 static av_always_inline void
886 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
887 int U, int V, int A1, int A2,
888 const void *_r, const void *_g, const void *_b, int y,
889 enum PixelFormat target, int hasAlpha)
891 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
892 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
893 uint32_t *dest = (uint32_t *) _dest;
894 const uint32_t *r = (const uint32_t *) _r;
895 const uint32_t *g = (const uint32_t *) _g;
896 const uint32_t *b = (const uint32_t *) _b;
899 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
901 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
902 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
905 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
907 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
908 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
910 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
911 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
914 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
915 uint8_t *dest = (uint8_t *) _dest;
916 const uint8_t *r = (const uint8_t *) _r;
917 const uint8_t *g = (const uint8_t *) _g;
918 const uint8_t *b = (const uint8_t *) _b;
920 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
921 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
922 dest[i * 6 + 0] = r_b[Y1];
923 dest[i * 6 + 1] = g[Y1];
924 dest[i * 6 + 2] = b_r[Y1];
925 dest[i * 6 + 3] = r_b[Y2];
926 dest[i * 6 + 4] = g[Y2];
927 dest[i * 6 + 5] = b_r[Y2];
930 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
931 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
932 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
933 uint16_t *dest = (uint16_t *) _dest;
934 const uint16_t *r = (const uint16_t *) _r;
935 const uint16_t *g = (const uint16_t *) _g;
936 const uint16_t *b = (const uint16_t *) _b;
937 int dr1, dg1, db1, dr2, dg2, db2;
939 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
940 dr1 = dither_2x2_8[ y & 1 ][0];
941 dg1 = dither_2x2_4[ y & 1 ][0];
942 db1 = dither_2x2_8[(y & 1) ^ 1][0];
943 dr2 = dither_2x2_8[ y & 1 ][1];
944 dg2 = dither_2x2_4[ y & 1 ][1];
945 db2 = dither_2x2_8[(y & 1) ^ 1][1];
946 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
947 dr1 = dither_2x2_8[ y & 1 ][0];
948 dg1 = dither_2x2_8[ y & 1 ][1];
949 db1 = dither_2x2_8[(y & 1) ^ 1][0];
950 dr2 = dither_2x2_8[ y & 1 ][1];
951 dg2 = dither_2x2_8[ y & 1 ][0];
952 db2 = dither_2x2_8[(y & 1) ^ 1][1];
954 dr1 = dither_4x4_16[ y & 3 ][0];
955 dg1 = dither_4x4_16[ y & 3 ][1];
956 db1 = dither_4x4_16[(y & 3) ^ 3][0];
957 dr2 = dither_4x4_16[ y & 3 ][1];
958 dg2 = dither_4x4_16[ y & 3 ][0];
959 db2 = dither_4x4_16[(y & 3) ^ 3][1];
962 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
963 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
964 } else /* 8/4-bit */ {
965 uint8_t *dest = (uint8_t *) _dest;
966 const uint8_t *r = (const uint8_t *) _r;
967 const uint8_t *g = (const uint8_t *) _g;
968 const uint8_t *b = (const uint8_t *) _b;
969 int dr1, dg1, db1, dr2, dg2, db2;
971 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
972 const uint8_t * const d64 = dither_8x8_73[y & 7];
973 const uint8_t * const d32 = dither_8x8_32[y & 7];
974 dr1 = dg1 = d32[(i * 2 + 0) & 7];
975 db1 = d64[(i * 2 + 0) & 7];
976 dr2 = dg2 = d32[(i * 2 + 1) & 7];
977 db2 = d64[(i * 2 + 1) & 7];
979 const uint8_t * const d64 = dither_8x8_73 [y & 7];
980 const uint8_t * const d128 = dither_8x8_220[y & 7];
981 dr1 = db1 = d128[(i * 2 + 0) & 7];
982 dg1 = d64[(i * 2 + 0) & 7];
983 dr2 = db2 = d128[(i * 2 + 1) & 7];
984 dg2 = d64[(i * 2 + 1) & 7];
987 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
988 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
989 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
991 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
992 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
997 static av_always_inline void
998 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
999 const int16_t **lumSrc, int lumFilterSize,
1000 const int16_t *chrFilter, const int16_t **chrUSrc,
1001 const int16_t **chrVSrc, int chrFilterSize,
1002 const int16_t **alpSrc, uint8_t *dest, int dstW,
1003 int y, enum PixelFormat target, int hasAlpha)
1007 for (i = 0; i < (dstW >> 1); i++) {
1013 int av_unused A1, A2;
1014 const void *r, *g, *b;
1016 for (j = 0; j < lumFilterSize; j++) {
1017 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1018 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1020 for (j = 0; j < chrFilterSize; j++) {
1021 U += chrUSrc[j][i] * chrFilter[j];
1022 V += chrVSrc[j][i] * chrFilter[j];
1028 if ((Y1 | Y2 | U | V) & 0x100) {
1029 Y1 = av_clip_uint8(Y1);
1030 Y2 = av_clip_uint8(Y2);
1031 U = av_clip_uint8(U);
1032 V = av_clip_uint8(V);
1037 for (j = 0; j < lumFilterSize; j++) {
1038 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1039 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1043 if ((A1 | A2) & 0x100) {
1044 A1 = av_clip_uint8(A1);
1045 A2 = av_clip_uint8(A2);
1049 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1051 g = (c->table_gU[U] + c->table_gV[V]);
1054 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1055 r, g, b, y, target, hasAlpha);
1059 static av_always_inline void
1060 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1061 const int16_t *ubuf[2], const int16_t *vbuf[2],
1062 const int16_t *abuf[2], uint8_t *dest, int dstW,
1063 int yalpha, int uvalpha, int y,
1064 enum PixelFormat target, int hasAlpha)
1066 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1067 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1068 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1069 *abuf0 = hasAlpha ? abuf[0] : NULL,
1070 *abuf1 = hasAlpha ? abuf[1] : NULL;
1071 int yalpha1 = 4095 - yalpha;
1072 int uvalpha1 = 4095 - uvalpha;
1075 for (i = 0; i < (dstW >> 1); i++) {
1076 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1077 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1078 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1079 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1081 const void *r = c->table_rV[V],
1082 *g = (c->table_gU[U] + c->table_gV[V]),
1083 *b = c->table_bU[U];
1086 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1087 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1090 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1091 r, g, b, y, target, hasAlpha);
1095 static av_always_inline void
1096 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1097 const int16_t *ubuf[2], const int16_t *vbuf[2],
1098 const int16_t *abuf0, uint8_t *dest, int dstW,
1099 int uvalpha, int y, enum PixelFormat target,
1102 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1103 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1106 if (uvalpha < 2048) {
1107 for (i = 0; i < (dstW >> 1); i++) {
1108 int Y1 = buf0[i * 2] >> 7;
1109 int Y2 = buf0[i * 2 + 1] >> 7;
1110 int U = ubuf1[i] >> 7;
1111 int V = vbuf1[i] >> 7;
1113 const void *r = c->table_rV[V],
1114 *g = (c->table_gU[U] + c->table_gV[V]),
1115 *b = c->table_bU[U];
1118 A1 = abuf0[i * 2 ] >> 7;
1119 A2 = abuf0[i * 2 + 1] >> 7;
1122 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1123 r, g, b, y, target, hasAlpha);
1126 for (i = 0; i < (dstW >> 1); i++) {
1127 int Y1 = buf0[i * 2] >> 7;
1128 int Y2 = buf0[i * 2 + 1] >> 7;
1129 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1130 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1132 const void *r = c->table_rV[V],
1133 *g = (c->table_gU[U] + c->table_gV[V]),
1134 *b = c->table_bU[U];
1137 A1 = abuf0[i * 2 ] >> 7;
1138 A2 = abuf0[i * 2 + 1] >> 7;
1141 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1142 r, g, b, y, target, hasAlpha);
1147 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1148 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1149 const int16_t **lumSrc, int lumFilterSize, \
1150 const int16_t *chrFilter, const int16_t **chrUSrc, \
1151 const int16_t **chrVSrc, int chrFilterSize, \
1152 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1155 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1156 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1157 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1159 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1160 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1161 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1162 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1163 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1164 int yalpha, int uvalpha, int y) \
1166 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1167 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1170 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1171 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1172 const int16_t *abuf0, uint8_t *dest, int dstW, \
1173 int uvalpha, int y) \
1175 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1176 dstW, uvalpha, y, fmt, hasAlpha); \
1180 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1181 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1183 #if CONFIG_SWSCALE_ALPHA
1184 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1185 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1187 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1188 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1190 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1191 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1192 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1193 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1194 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1195 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1196 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1197 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1199 static av_always_inline void
1200 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1201 const int16_t **lumSrc, int lumFilterSize,
1202 const int16_t *chrFilter, const int16_t **chrUSrc,
1203 const int16_t **chrVSrc, int chrFilterSize,
1204 const int16_t **alpSrc, uint8_t *dest,
1205 int dstW, int y, enum PixelFormat target, int hasAlpha)
1208 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1210 for (i = 0; i < dstW; i++) {
1218 for (j = 0; j < lumFilterSize; j++) {
1219 Y += lumSrc[j][i] * lumFilter[j];
1221 for (j = 0; j < chrFilterSize; j++) {
1222 U += chrUSrc[j][i] * chrFilter[j];
1223 V += chrVSrc[j][i] * chrFilter[j];
1230 for (j = 0; j < lumFilterSize; j++) {
1231 A += alpSrc[j][i] * lumFilter[j];
1235 A = av_clip_uint8(A);
1237 Y -= c->yuv2rgb_y_offset;
1238 Y *= c->yuv2rgb_y_coeff;
1240 R = Y + V*c->yuv2rgb_v2r_coeff;
1241 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1242 B = Y + U*c->yuv2rgb_u2b_coeff;
1243 if ((R | G | B) & 0xC0000000) {
1244 R = av_clip_uintp2(R, 30);
1245 G = av_clip_uintp2(G, 30);
1246 B = av_clip_uintp2(B, 30);
1251 dest[0] = hasAlpha ? A : 255;
1265 dest[3] = hasAlpha ? A : 255;
1268 dest[0] = hasAlpha ? A : 255;
1283 dest[3] = hasAlpha ? A : 255;
1291 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1292 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1293 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1294 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1296 #if CONFIG_SWSCALE_ALPHA
1297 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1298 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1299 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1300 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1303 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1305 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1307 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1308 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1310 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1311 int width, int height,
1315 uint8_t *ptr = plane + stride*y;
1316 for (i=0; i<height; i++) {
1317 memset(ptr, val, width);
1322 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1324 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1325 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1327 static av_always_inline void
1328 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1329 enum PixelFormat origin)
1332 for (i = 0; i < width; i++) {
1333 unsigned int r_b = input_pixel(&src[i*3+0]);
1334 unsigned int g = input_pixel(&src[i*3+1]);
1335 unsigned int b_r = input_pixel(&src[i*3+2]);
1337 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1341 static av_always_inline void
1342 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1343 const uint16_t *src1, const uint16_t *src2,
1344 int width, enum PixelFormat origin)
1348 for (i = 0; i < width; i++) {
1349 int r_b = input_pixel(&src1[i*3+0]);
1350 int g = input_pixel(&src1[i*3+1]);
1351 int b_r = input_pixel(&src1[i*3+2]);
1353 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1354 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1358 static av_always_inline void
1359 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1360 const uint16_t *src1, const uint16_t *src2,
1361 int width, enum PixelFormat origin)
1365 for (i = 0; i < width; i++) {
1366 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1367 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1368 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1370 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1371 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1379 #define rgb48funcs(pattern, BE_LE, origin) \
1380 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1381 int width, uint32_t *unused) \
1383 const uint16_t *src = (const uint16_t *) _src; \
1384 uint16_t *dst = (uint16_t *) _dst; \
1385 rgb48ToY_c_template(dst, src, width, origin); \
1388 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1389 const uint8_t *_src1, const uint8_t *_src2, \
1390 int width, uint32_t *unused) \
1392 const uint16_t *src1 = (const uint16_t *) _src1, \
1393 *src2 = (const uint16_t *) _src2; \
1394 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1395 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1398 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1399 const uint8_t *_src1, const uint8_t *_src2, \
1400 int width, uint32_t *unused) \
1402 const uint16_t *src1 = (const uint16_t *) _src1, \
1403 *src2 = (const uint16_t *) _src2; \
1404 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1408 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1409 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1410 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1411 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1413 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1414 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1415 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1417 static av_always_inline void
1418 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1419 int width, enum PixelFormat origin,
1420 int shr, int shg, int shb, int shp,
1421 int maskr, int maskg, int maskb,
1422 int rsh, int gsh, int bsh, int S)
1424 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1425 rnd = 33 << (S - 1);
1428 for (i = 0; i < width; i++) {
1429 int px = input_pixel(i) >> shp;
1430 int b = (px & maskb) >> shb;
1431 int g = (px & maskg) >> shg;
1432 int r = (px & maskr) >> shr;
1434 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1438 static av_always_inline void
1439 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1440 const uint8_t *src, int width,
1441 enum PixelFormat origin,
1442 int shr, int shg, int shb, int shp,
1443 int maskr, int maskg, int maskb,
1444 int rsh, int gsh, int bsh, int S)
1446 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1447 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1448 rnd = 257 << (S - 1);
1451 for (i = 0; i < width; i++) {
1452 int px = input_pixel(i) >> shp;
1453 int b = (px & maskb) >> shb;
1454 int g = (px & maskg) >> shg;
1455 int r = (px & maskr) >> shr;
1457 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1458 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1462 static av_always_inline void
1463 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1464 const uint8_t *src, int width,
1465 enum PixelFormat origin,
1466 int shr, int shg, int shb, int shp,
1467 int maskr, int maskg, int maskb,
1468 int rsh, int gsh, int bsh, int S)
1470 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1471 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1472 rnd = 257 << S, maskgx = ~(maskr | maskb);
1475 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1476 for (i = 0; i < width; i++) {
1477 int px0 = input_pixel(2 * i + 0) >> shp;
1478 int px1 = input_pixel(2 * i + 1) >> shp;
1479 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1480 int rb = px0 + px1 - g;
1482 b = (rb & maskb) >> shb;
1483 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1484 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1487 g = (g & maskg) >> shg;
1489 r = (rb & maskr) >> shr;
1491 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1492 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1498 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1499 maskg, maskb, rsh, gsh, bsh, S) \
1500 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1501 int width, uint32_t *unused) \
1503 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1504 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1507 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1508 const uint8_t *src, const uint8_t *dummy, \
1509 int width, uint32_t *unused) \
1511 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1512 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1515 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1516 const uint8_t *src, const uint8_t *dummy, \
1517 int width, uint32_t *unused) \
1519 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1520 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1523 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1524 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1525 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1526 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1527 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1528 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1529 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1530 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1531 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1532 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1533 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1534 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1536 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1539 for (i=0; i<width; i++) {
1544 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1547 for (i=0; i<width; i++) {
1552 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1555 for (i=0; i<width; i++) {
1558 dst[i]= pal[d] & 0xFF;
1562 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1563 const uint8_t *src1, const uint8_t *src2,
1564 int width, uint32_t *pal)
1567 assert(src1 == src2);
1568 for (i=0; i<width; i++) {
1569 int p= pal[src1[i]];
1576 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1577 int width, uint32_t *unused)
1580 for (i=0; i<width/8; i++) {
1583 dst[8*i+j]= ((d>>(7-j))&1)*255;
1587 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1588 int width, uint32_t *unused)
1591 for (i=0; i<width/8; i++) {
1594 dst[8*i+j]= ((d>>(7-j))&1)*255;
1598 //FIXME yuy2* can read up to 7 samples too much
1600 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1604 for (i=0; i<width; i++)
1608 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1609 const uint8_t *src2, int width, uint32_t *unused)
1612 for (i=0; i<width; i++) {
1613 dstU[i]= src1[4*i + 1];
1614 dstV[i]= src1[4*i + 3];
1616 assert(src1 == src2);
1619 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1622 const uint16_t *src = (const uint16_t *) _src;
1623 uint16_t *dst = (uint16_t *) _dst;
1624 for (i=0; i<width; i++) {
1625 dst[i] = av_bswap16(src[i]);
1629 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1630 const uint8_t *_src2, int width, uint32_t *unused)
1633 const uint16_t *src1 = (const uint16_t *) _src1,
1634 *src2 = (const uint16_t *) _src2;
1635 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1636 for (i=0; i<width; i++) {
1637 dstU[i] = av_bswap16(src1[i]);
1638 dstV[i] = av_bswap16(src2[i]);
1642 /* This is almost identical to the previous, end exists only because
1643 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1644 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1648 for (i=0; i<width; i++)
1652 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1653 const uint8_t *src2, int width, uint32_t *unused)
1656 for (i=0; i<width; i++) {
1657 dstU[i]= src1[4*i + 0];
1658 dstV[i]= src1[4*i + 2];
1660 assert(src1 == src2);
1663 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1664 const uint8_t *src, int width)
1667 for (i = 0; i < width; i++) {
1668 dst1[i] = src[2*i+0];
1669 dst2[i] = src[2*i+1];
1673 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1674 const uint8_t *src1, const uint8_t *src2,
1675 int width, uint32_t *unused)
1677 nvXXtoUV_c(dstU, dstV, src1, width);
1680 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1681 const uint8_t *src1, const uint8_t *src2,
1682 int width, uint32_t *unused)
1684 nvXXtoUV_c(dstV, dstU, src1, width);
1687 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1689 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1690 int width, uint32_t *unused)
1693 for (i=0; i<width; i++) {
1698 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1702 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1703 const uint8_t *src2, int width, uint32_t *unused)
1706 for (i=0; i<width; i++) {
1707 int b= src1[3*i + 0];
1708 int g= src1[3*i + 1];
1709 int r= src1[3*i + 2];
1711 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1712 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1714 assert(src1 == src2);
1717 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1718 const uint8_t *src2, int width, uint32_t *unused)
1721 for (i=0; i<width; i++) {
1722 int b= src1[6*i + 0] + src1[6*i + 3];
1723 int g= src1[6*i + 1] + src1[6*i + 4];
1724 int r= src1[6*i + 2] + src1[6*i + 5];
1726 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1727 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1729 assert(src1 == src2);
1732 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1736 for (i=0; i<width; i++) {
1741 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1745 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1746 const uint8_t *src2, int width, uint32_t *unused)
1750 for (i=0; i<width; i++) {
1751 int r= src1[3*i + 0];
1752 int g= src1[3*i + 1];
1753 int b= src1[3*i + 2];
1755 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1756 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1760 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1761 const uint8_t *src2, int width, uint32_t *unused)
1765 for (i=0; i<width; i++) {
1766 int r= src1[6*i + 0] + src1[6*i + 3];
1767 int g= src1[6*i + 1] + src1[6*i + 4];
1768 int b= src1[6*i + 2] + src1[6*i + 5];
1770 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1771 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1775 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1776 const int16_t *filter,
1777 const int16_t *filterPos, int filterSize)
1780 int32_t *dst = (int32_t *) _dst;
1781 const uint16_t *src = (const uint16_t *) _src;
1782 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1785 for (i = 0; i < dstW; i++) {
1787 int srcPos = filterPos[i];
1790 for (j = 0; j < filterSize; j++) {
1791 val += src[srcPos + j] * filter[filterSize * i + j];
1793 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1794 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1798 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1799 const int16_t *filter,
1800 const int16_t *filterPos, int filterSize)
1803 const uint16_t *src = (const uint16_t *) _src;
1804 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1806 for (i = 0; i < dstW; i++) {
1808 int srcPos = filterPos[i];
1811 for (j = 0; j < filterSize; j++) {
1812 val += src[srcPos + j] * filter[filterSize * i + j];
1814 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1815 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1819 // bilinear / bicubic scaling
1820 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1821 const int16_t *filter, const int16_t *filterPos,
1825 for (i=0; i<dstW; i++) {
1827 int srcPos= filterPos[i];
1829 for (j=0; j<filterSize; j++) {
1830 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1832 //filter += hFilterSize;
1833 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1838 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1839 const int16_t *filter, const int16_t *filterPos,
1843 int32_t *dst = (int32_t *) _dst;
1844 for (i=0; i<dstW; i++) {
1846 int srcPos= filterPos[i];
1848 for (j=0; j<filterSize; j++) {
1849 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1851 //filter += hFilterSize;
1852 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1857 //FIXME all pal and rgb srcFormats could do this convertion as well
1858 //FIXME all scalers more complex than bilinear could do half of this transform
1859 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1862 for (i = 0; i < width; i++) {
1863 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1864 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1867 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1870 for (i = 0; i < width; i++) {
1871 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1872 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1875 static void lumRangeToJpeg_c(int16_t *dst, int width)
1878 for (i = 0; i < width; i++)
1879 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1881 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1884 for (i = 0; i < width; i++)
1885 dst[i] = (dst[i]*14071 + 33561947)>>14;
1888 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1891 int32_t *dstU = (int32_t *) _dstU;
1892 int32_t *dstV = (int32_t *) _dstV;
1893 for (i = 0; i < width; i++) {
1894 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1895 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1898 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1901 int32_t *dstU = (int32_t *) _dstU;
1902 int32_t *dstV = (int32_t *) _dstV;
1903 for (i = 0; i < width; i++) {
1904 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1905 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1908 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1911 int32_t *dst = (int32_t *) _dst;
1912 for (i = 0; i < width; i++)
1913 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1915 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1918 int32_t *dst = (int32_t *) _dst;
1919 for (i = 0; i < width; i++)
1920 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1923 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1924 const uint8_t *src, int srcW, int xInc)
1927 unsigned int xpos=0;
1928 for (i=0;i<dstWidth;i++) {
1929 register unsigned int xx=xpos>>16;
1930 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1931 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1936 // *** horizontal scale Y line to temp buffer
1937 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1938 const uint8_t *src, int srcW, int xInc,
1939 const int16_t *hLumFilter,
1940 const int16_t *hLumFilterPos, int hLumFilterSize,
1941 uint8_t *formatConvBuffer,
1942 uint32_t *pal, int isAlpha)
1944 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1945 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1948 toYV12(formatConvBuffer, src, srcW, pal);
1949 src= formatConvBuffer;
1952 if (!c->hyscale_fast) {
1953 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1954 } else { // fast bilinear upscale / crap downscale
1955 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1959 convertRange(dst, dstWidth);
1962 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1963 int dstWidth, const uint8_t *src1,
1964 const uint8_t *src2, int srcW, int xInc)
1967 unsigned int xpos=0;
1968 for (i=0;i<dstWidth;i++) {
1969 register unsigned int xx=xpos>>16;
1970 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1971 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1972 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1977 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
1978 const uint8_t *src1, const uint8_t *src2,
1979 int srcW, int xInc, const int16_t *hChrFilter,
1980 const int16_t *hChrFilterPos, int hChrFilterSize,
1981 uint8_t *formatConvBuffer, uint32_t *pal)
1984 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
1985 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1986 src1= formatConvBuffer;
1990 if (!c->hcscale_fast) {
1991 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
1992 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
1993 } else { // fast bilinear upscale / crap downscale
1994 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1997 if (c->chrConvertRange)
1998 c->chrConvertRange(dst1, dst2, dstWidth);
2001 static av_always_inline void
2002 find_c_packed_planar_out_funcs(SwsContext *c,
2003 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2planeX_luma,
2004 yuv2planarX_fn *yuv2planeX_chroma, yuv2interleavedX_fn *yuv2nv12X_chroma,
2005 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2006 yuv2packedX_fn *yuv2packedX)
2008 enum PixelFormat dstFormat = c->dstFormat;
2010 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2011 *yuv2planeX_luma = yuv2yuvX_c;
2012 *yuv2nv12X_chroma = yuv2nv12X_chroma_c;
2013 } else if (is16BPS(dstFormat)) {
2014 *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2015 } else if (is9_OR_10BPS(dstFormat)) {
2016 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2017 *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2019 *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2022 *yuv2yuv1 = yuv2yuv1_c;
2023 *yuv2planeX_luma = *yuv2planeX_chroma = yuv2yuvX_c;
2026 if(c->flags & SWS_FULL_CHR_H_INT) {
2027 switch (dstFormat) {
2030 *yuv2packedX = yuv2rgba32_full_X_c;
2032 #if CONFIG_SWSCALE_ALPHA
2034 *yuv2packedX = yuv2rgba32_full_X_c;
2036 #endif /* CONFIG_SWSCALE_ALPHA */
2038 *yuv2packedX = yuv2rgbx32_full_X_c;
2040 #endif /* !CONFIG_SMALL */
2044 *yuv2packedX = yuv2argb32_full_X_c;
2046 #if CONFIG_SWSCALE_ALPHA
2048 *yuv2packedX = yuv2argb32_full_X_c;
2050 #endif /* CONFIG_SWSCALE_ALPHA */
2052 *yuv2packedX = yuv2xrgb32_full_X_c;
2054 #endif /* !CONFIG_SMALL */
2058 *yuv2packedX = yuv2bgra32_full_X_c;
2060 #if CONFIG_SWSCALE_ALPHA
2062 *yuv2packedX = yuv2bgra32_full_X_c;
2064 #endif /* CONFIG_SWSCALE_ALPHA */
2066 *yuv2packedX = yuv2bgrx32_full_X_c;
2068 #endif /* !CONFIG_SMALL */
2072 *yuv2packedX = yuv2abgr32_full_X_c;
2074 #if CONFIG_SWSCALE_ALPHA
2076 *yuv2packedX = yuv2abgr32_full_X_c;
2078 #endif /* CONFIG_SWSCALE_ALPHA */
2080 *yuv2packedX = yuv2xbgr32_full_X_c;
2082 #endif /* !CONFIG_SMALL */
2085 *yuv2packedX = yuv2rgb24_full_X_c;
2088 *yuv2packedX = yuv2bgr24_full_X_c;
2092 switch (dstFormat) {
2093 case PIX_FMT_GRAY16BE:
2094 *yuv2packed1 = yuv2gray16BE_1_c;
2095 *yuv2packed2 = yuv2gray16BE_2_c;
2096 *yuv2packedX = yuv2gray16BE_X_c;
2098 case PIX_FMT_GRAY16LE:
2099 *yuv2packed1 = yuv2gray16LE_1_c;
2100 *yuv2packed2 = yuv2gray16LE_2_c;
2101 *yuv2packedX = yuv2gray16LE_X_c;
2103 case PIX_FMT_MONOWHITE:
2104 *yuv2packed1 = yuv2monowhite_1_c;
2105 *yuv2packed2 = yuv2monowhite_2_c;
2106 *yuv2packedX = yuv2monowhite_X_c;
2108 case PIX_FMT_MONOBLACK:
2109 *yuv2packed1 = yuv2monoblack_1_c;
2110 *yuv2packed2 = yuv2monoblack_2_c;
2111 *yuv2packedX = yuv2monoblack_X_c;
2113 case PIX_FMT_YUYV422:
2114 *yuv2packed1 = yuv2yuyv422_1_c;
2115 *yuv2packed2 = yuv2yuyv422_2_c;
2116 *yuv2packedX = yuv2yuyv422_X_c;
2118 case PIX_FMT_UYVY422:
2119 *yuv2packed1 = yuv2uyvy422_1_c;
2120 *yuv2packed2 = yuv2uyvy422_2_c;
2121 *yuv2packedX = yuv2uyvy422_X_c;
2123 case PIX_FMT_RGB48LE:
2124 *yuv2packed1 = yuv2rgb48le_1_c;
2125 *yuv2packed2 = yuv2rgb48le_2_c;
2126 *yuv2packedX = yuv2rgb48le_X_c;
2128 case PIX_FMT_RGB48BE:
2129 *yuv2packed1 = yuv2rgb48be_1_c;
2130 *yuv2packed2 = yuv2rgb48be_2_c;
2131 *yuv2packedX = yuv2rgb48be_X_c;
2133 case PIX_FMT_BGR48LE:
2134 *yuv2packed1 = yuv2bgr48le_1_c;
2135 *yuv2packed2 = yuv2bgr48le_2_c;
2136 *yuv2packedX = yuv2bgr48le_X_c;
2138 case PIX_FMT_BGR48BE:
2139 *yuv2packed1 = yuv2bgr48be_1_c;
2140 *yuv2packed2 = yuv2bgr48be_2_c;
2141 *yuv2packedX = yuv2bgr48be_X_c;
2146 *yuv2packed1 = yuv2rgb32_1_c;
2147 *yuv2packed2 = yuv2rgb32_2_c;
2148 *yuv2packedX = yuv2rgb32_X_c;
2150 #if CONFIG_SWSCALE_ALPHA
2152 *yuv2packed1 = yuv2rgba32_1_c;
2153 *yuv2packed2 = yuv2rgba32_2_c;
2154 *yuv2packedX = yuv2rgba32_X_c;
2156 #endif /* CONFIG_SWSCALE_ALPHA */
2158 *yuv2packed1 = yuv2rgbx32_1_c;
2159 *yuv2packed2 = yuv2rgbx32_2_c;
2160 *yuv2packedX = yuv2rgbx32_X_c;
2162 #endif /* !CONFIG_SMALL */
2164 case PIX_FMT_RGB32_1:
2165 case PIX_FMT_BGR32_1:
2167 *yuv2packed1 = yuv2rgb32_1_1_c;
2168 *yuv2packed2 = yuv2rgb32_1_2_c;
2169 *yuv2packedX = yuv2rgb32_1_X_c;
2171 #if CONFIG_SWSCALE_ALPHA
2173 *yuv2packed1 = yuv2rgba32_1_1_c;
2174 *yuv2packed2 = yuv2rgba32_1_2_c;
2175 *yuv2packedX = yuv2rgba32_1_X_c;
2177 #endif /* CONFIG_SWSCALE_ALPHA */
2179 *yuv2packed1 = yuv2rgbx32_1_1_c;
2180 *yuv2packed2 = yuv2rgbx32_1_2_c;
2181 *yuv2packedX = yuv2rgbx32_1_X_c;
2183 #endif /* !CONFIG_SMALL */
2186 *yuv2packed1 = yuv2rgb24_1_c;
2187 *yuv2packed2 = yuv2rgb24_2_c;
2188 *yuv2packedX = yuv2rgb24_X_c;
2191 *yuv2packed1 = yuv2bgr24_1_c;
2192 *yuv2packed2 = yuv2bgr24_2_c;
2193 *yuv2packedX = yuv2bgr24_X_c;
2195 case PIX_FMT_RGB565LE:
2196 case PIX_FMT_RGB565BE:
2197 case PIX_FMT_BGR565LE:
2198 case PIX_FMT_BGR565BE:
2199 *yuv2packed1 = yuv2rgb16_1_c;
2200 *yuv2packed2 = yuv2rgb16_2_c;
2201 *yuv2packedX = yuv2rgb16_X_c;
2203 case PIX_FMT_RGB555LE:
2204 case PIX_FMT_RGB555BE:
2205 case PIX_FMT_BGR555LE:
2206 case PIX_FMT_BGR555BE:
2207 *yuv2packed1 = yuv2rgb15_1_c;
2208 *yuv2packed2 = yuv2rgb15_2_c;
2209 *yuv2packedX = yuv2rgb15_X_c;
2211 case PIX_FMT_RGB444LE:
2212 case PIX_FMT_RGB444BE:
2213 case PIX_FMT_BGR444LE:
2214 case PIX_FMT_BGR444BE:
2215 *yuv2packed1 = yuv2rgb12_1_c;
2216 *yuv2packed2 = yuv2rgb12_2_c;
2217 *yuv2packedX = yuv2rgb12_X_c;
2221 *yuv2packed1 = yuv2rgb8_1_c;
2222 *yuv2packed2 = yuv2rgb8_2_c;
2223 *yuv2packedX = yuv2rgb8_X_c;
2227 *yuv2packed1 = yuv2rgb4_1_c;
2228 *yuv2packed2 = yuv2rgb4_2_c;
2229 *yuv2packedX = yuv2rgb4_X_c;
2231 case PIX_FMT_RGB4_BYTE:
2232 case PIX_FMT_BGR4_BYTE:
2233 *yuv2packed1 = yuv2rgb4b_1_c;
2234 *yuv2packed2 = yuv2rgb4b_2_c;
2235 *yuv2packedX = yuv2rgb4b_X_c;
2241 #define DEBUG_SWSCALE_BUFFERS 0
2242 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2244 static int swScale(SwsContext *c, const uint8_t* src[],
2245 int srcStride[], int srcSliceY,
2246 int srcSliceH, uint8_t* dst[], int dstStride[])
2248 /* load a few things into local vars to make the code more readable? and faster */
2249 const int srcW= c->srcW;
2250 const int dstW= c->dstW;
2251 const int dstH= c->dstH;
2252 const int chrDstW= c->chrDstW;
2253 const int chrSrcW= c->chrSrcW;
2254 const int lumXInc= c->lumXInc;
2255 const int chrXInc= c->chrXInc;
2256 const enum PixelFormat dstFormat= c->dstFormat;
2257 const int flags= c->flags;
2258 int16_t *vLumFilterPos= c->vLumFilterPos;
2259 int16_t *vChrFilterPos= c->vChrFilterPos;
2260 int16_t *hLumFilterPos= c->hLumFilterPos;
2261 int16_t *hChrFilterPos= c->hChrFilterPos;
2262 int16_t *vLumFilter= c->vLumFilter;
2263 int16_t *vChrFilter= c->vChrFilter;
2264 int16_t *hLumFilter= c->hLumFilter;
2265 int16_t *hChrFilter= c->hChrFilter;
2266 int32_t *lumMmxFilter= c->lumMmxFilter;
2267 int32_t *chrMmxFilter= c->chrMmxFilter;
2268 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2269 const int vLumFilterSize= c->vLumFilterSize;
2270 const int vChrFilterSize= c->vChrFilterSize;
2271 const int hLumFilterSize= c->hLumFilterSize;
2272 const int hChrFilterSize= c->hChrFilterSize;
2273 int16_t **lumPixBuf= c->lumPixBuf;
2274 int16_t **chrUPixBuf= c->chrUPixBuf;
2275 int16_t **chrVPixBuf= c->chrVPixBuf;
2276 int16_t **alpPixBuf= c->alpPixBuf;
2277 const int vLumBufSize= c->vLumBufSize;
2278 const int vChrBufSize= c->vChrBufSize;
2279 uint8_t *formatConvBuffer= c->formatConvBuffer;
2280 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2281 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2283 uint32_t *pal=c->pal_yuv;
2284 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2285 yuv2planarX_fn yuv2planeX_luma = c->yuv2planeX_luma;
2286 yuv2planarX_fn yuv2planeX_chroma = c->yuv2planeX_chroma;
2287 yuv2interleavedX_fn yuv2nv12X_chroma = c->yuv2nv12X_chroma;
2288 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2289 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2290 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2291 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2293 /* vars which will change and which we need to store back in the context */
2295 int lumBufIndex= c->lumBufIndex;
2296 int chrBufIndex= c->chrBufIndex;
2297 int lastInLumBuf= c->lastInLumBuf;
2298 int lastInChrBuf= c->lastInChrBuf;
2300 if (isPacked(c->srcFormat)) {
2308 srcStride[3]= srcStride[0];
2310 srcStride[1]<<= c->vChrDrop;
2311 srcStride[2]<<= c->vChrDrop;
2313 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2314 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2315 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2316 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2317 srcSliceY, srcSliceH, dstY, dstH);
2318 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2319 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2321 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2322 static int warnedAlready=0; //FIXME move this into the context perhaps
2323 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2324 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2325 " ->cannot do aligned memory accesses anymore\n");
2330 /* Note the user might start scaling the picture in the middle so this
2331 will not get executed. This is not really intended but works
2332 currently, so people might do it. */
2333 if (srcSliceY ==0) {
2341 if (!should_dither) {
2342 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2346 for (;dstY < dstH; dstY++) {
2347 const int chrDstY= dstY>>c->chrDstVSubSample;
2348 uint8_t *dest[4] = {
2349 dst[0] + dstStride[0] * dstY,
2350 dst[1] + dstStride[1] * chrDstY,
2351 dst[2] + dstStride[2] * chrDstY,
2352 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2355 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2356 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2357 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2358 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2359 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2360 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2363 //handle holes (FAST_BILINEAR & weird filters)
2364 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2365 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2366 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2367 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2369 DEBUG_BUFFERS("dstY: %d\n", dstY);
2370 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2371 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2372 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2373 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2375 // Do we have enough lines in this slice to output the dstY line
2376 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2378 if (!enough_lines) {
2379 lastLumSrcY = srcSliceY + srcSliceH - 1;
2380 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2381 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2382 lastLumSrcY, lastChrSrcY);
2385 //Do horizontal scaling
2386 while(lastInLumBuf < lastLumSrcY) {
2387 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2388 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2390 assert(lumBufIndex < 2*vLumBufSize);
2391 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2392 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2393 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2394 hLumFilter, hLumFilterPos, hLumFilterSize,
2397 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2398 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2399 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2403 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2404 lumBufIndex, lastInLumBuf);
2406 while(lastInChrBuf < lastChrSrcY) {
2407 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2408 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2410 assert(chrBufIndex < 2*vChrBufSize);
2411 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2412 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2413 //FIXME replace parameters through context struct (some at least)
2415 if (c->needs_hcscale)
2416 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2417 chrDstW, src1, src2, chrSrcW, chrXInc,
2418 hChrFilter, hChrFilterPos, hChrFilterSize,
2419 formatConvBuffer, pal);
2421 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2422 chrBufIndex, lastInChrBuf);
2424 //wrap buf index around to stay inside the ring buffer
2425 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2426 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2428 break; //we can't output a dstY line so let's try with the next slice
2431 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2433 if (should_dither) {
2434 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2435 c->lumDither8 = dither_8x8_128[dstY & 7];
2437 if (dstY >= dstH-2) {
2438 // hmm looks like we can't use MMX here without overwriting this array's tail
2439 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2planeX_luma, &yuv2planeX_chroma,
2440 &yuv2nv12X_chroma, &yuv2packed1, &yuv2packed2,
2445 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2446 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2447 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2448 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2449 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2450 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2451 if ((dstY&chrSkipMask) || isGray(dstFormat))
2452 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2453 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2455 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2456 yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2459 yuv2nv12X_chroma(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2461 } else if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2462 yuv2yuv1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2465 yuv2yuv1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2466 yuv2yuv1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2469 if (alpBuf && dest[3])
2470 yuv2yuv1(alpBuf, dest[3], dstW, c->lumDither8, 0);
2471 } else { //General YV12
2472 yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2475 yuv2planeX_chroma(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2476 yuv2planeX_chroma(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2479 if (alpBuf && dest[3])
2480 yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2483 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2484 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2485 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2486 int chrAlpha = vChrFilter[2 * dstY + 1];
2487 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2488 alpPixBuf ? *alpSrcPtr : NULL,
2489 dest[0], dstW, chrAlpha, dstY);
2490 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2491 int lumAlpha = vLumFilter[2 * dstY + 1];
2492 int chrAlpha = vChrFilter[2 * dstY + 1];
2494 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2496 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2497 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2498 alpPixBuf ? alpSrcPtr : NULL,
2499 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2500 } else { //general RGB
2501 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2502 lumSrcPtr, vLumFilterSize,
2503 vChrFilter + dstY * vChrFilterSize,
2504 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2505 alpSrcPtr, dest[0], dstW, dstY);
2511 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2512 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2515 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2516 __asm__ volatile("sfence":::"memory");
2520 /* store changed local vars back in the context */
2522 c->lumBufIndex= lumBufIndex;
2523 c->chrBufIndex= chrBufIndex;
2524 c->lastInLumBuf= lastInLumBuf;
2525 c->lastInChrBuf= lastInChrBuf;
2527 return dstY - lastDstY;
2530 static av_cold void sws_init_swScale_c(SwsContext *c)
2532 enum PixelFormat srcFormat = c->srcFormat;
2534 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2planeX_luma, &c->yuv2planeX_chroma,
2535 &c->yuv2nv12X_chroma, &c->yuv2packed1, &c->yuv2packed2,
2538 c->chrToYV12 = NULL;
2540 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2541 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2542 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2543 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2547 case PIX_FMT_BGR4_BYTE:
2548 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2550 case PIX_FMT_YUV444P9LE:
2551 case PIX_FMT_YUV422P9LE:
2552 case PIX_FMT_YUV420P9LE:
2553 case PIX_FMT_YUV422P10LE:
2554 case PIX_FMT_YUV444P10LE:
2555 case PIX_FMT_YUV420P10LE:
2556 case PIX_FMT_YUV420P16LE:
2557 case PIX_FMT_YUV422P16LE:
2558 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2560 case PIX_FMT_YUV444P9BE:
2561 case PIX_FMT_YUV422P9BE:
2562 case PIX_FMT_YUV420P9BE:
2563 case PIX_FMT_YUV444P10BE:
2564 case PIX_FMT_YUV422P10BE:
2565 case PIX_FMT_YUV420P10BE:
2566 case PIX_FMT_YUV420P16BE:
2567 case PIX_FMT_YUV422P16BE:
2568 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2571 if (c->chrSrcHSubSample) {
2573 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2574 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2575 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2576 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2577 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2578 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2579 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2580 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2581 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2582 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2583 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2584 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2585 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2586 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2587 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2588 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2589 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2590 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2594 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2595 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2596 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2597 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2598 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2599 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2600 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2601 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2602 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2603 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2604 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2605 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2606 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2607 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2608 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2609 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2610 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2611 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2615 c->lumToYV12 = NULL;
2616 c->alpToYV12 = NULL;
2617 switch (srcFormat) {
2619 case PIX_FMT_YUV444P9LE:
2620 case PIX_FMT_YUV422P9LE:
2621 case PIX_FMT_YUV420P9LE:
2622 case PIX_FMT_YUV444P10LE:
2623 case PIX_FMT_YUV422P10LE:
2624 case PIX_FMT_YUV420P10LE:
2625 case PIX_FMT_YUV420P16LE:
2626 case PIX_FMT_YUV422P16LE:
2627 case PIX_FMT_YUV444P16LE:
2628 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2630 case PIX_FMT_YUV444P9BE:
2631 case PIX_FMT_YUV422P9BE:
2632 case PIX_FMT_YUV420P9BE:
2633 case PIX_FMT_YUV444P10BE:
2634 case PIX_FMT_YUV422P10BE:
2635 case PIX_FMT_YUV420P10BE:
2636 case PIX_FMT_YUV420P16BE:
2637 case PIX_FMT_YUV422P16BE:
2638 case PIX_FMT_YUV444P16BE:
2639 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2641 case PIX_FMT_YUYV422 :
2642 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2643 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2644 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2645 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2646 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2647 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2648 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2649 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2650 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2651 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2652 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2653 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2657 case PIX_FMT_BGR4_BYTE:
2658 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2659 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2660 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2661 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2662 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2663 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2664 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2665 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2666 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2667 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2668 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2671 switch (srcFormat) {
2673 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2675 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2676 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2680 if (c->srcBpc == 8) {
2681 if (c->dstBpc <= 10) {
2682 c->hyScale = c->hcScale = hScale8To15_c;
2683 if (c->flags & SWS_FAST_BILINEAR) {
2684 c->hyscale_fast = hyscale_fast_c;
2685 c->hcscale_fast = hcscale_fast_c;
2688 c->hyScale = c->hcScale = hScale8To19_c;
2691 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2694 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2695 if (c->dstBpc <= 10) {
2697 c->lumConvertRange = lumRangeFromJpeg_c;
2698 c->chrConvertRange = chrRangeFromJpeg_c;
2700 c->lumConvertRange = lumRangeToJpeg_c;
2701 c->chrConvertRange = chrRangeToJpeg_c;
2705 c->lumConvertRange = lumRangeFromJpeg16_c;
2706 c->chrConvertRange = chrRangeFromJpeg16_c;
2708 c->lumConvertRange = lumRangeToJpeg16_c;
2709 c->chrConvertRange = chrRangeToJpeg16_c;
2714 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2715 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2716 c->needs_hcscale = 1;
2719 SwsFunc ff_getSwsFunc(SwsContext *c)
2721 sws_init_swScale_c(c);
2724 ff_sws_init_swScale_mmx(c);
2726 ff_sws_init_swScale_altivec(c);