2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
165 #define output_pixel(pos, val, bias, signedness) \
167 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
169 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
172 static av_always_inline void
173 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174 int big_endian, int output_bits)
177 int shift = 19 - output_bits;
179 for (i = 0; i < dstW; i++) {
180 int val = src[i] + (1 << (shift - 1));
181 output_pixel(&dest[i], val, 0, uint);
185 static av_always_inline void
186 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187 const int32_t **src, uint16_t *dest, int dstW,
188 int big_endian, int output_bits)
191 int shift = 15 + 16 - output_bits;
193 for (i = 0; i < dstW; i++) {
194 int val = 1 << (30-output_bits);
197 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198 * filters (or anything with negative coeffs, the range can be slightly
199 * wider in both directions. To account for this overflow, we subtract
200 * a constant so it always fits in the signed range (assuming a
201 * reasonable filterSize), and re-add that at the end. */
203 for (j = 0; j < filterSize; j++)
204 val += src[j][i] * filter[j];
206 output_pixel(&dest[i], val, 0x8000, int);
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
216 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 static av_always_inline void
220 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = src[i] + (1 << (shift - 1));
228 output_pixel(&dest[i], val);
232 static av_always_inline void
233 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234 const int16_t **src, uint16_t *dest, int dstW,
235 int big_endian, int output_bits)
238 int shift = 11 + 16 - output_bits;
240 for (i = 0; i < dstW; i++) {
241 int val = 1 << (26-output_bits);
244 for (j = 0; j < filterSize; j++)
245 val += src[j][i] * filter[j];
247 output_pixel(&dest[i], val);
253 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255 uint8_t *dest, int dstW, \
256 const uint8_t *dither, int offset)\
258 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259 (uint16_t *) dest, dstW, is_be, bits); \
261 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262 const int16_t **src, uint8_t *dest, int dstW, \
263 const uint8_t *dither, int offset)\
265 yuv2planeX_## template_size ## _c_template(filter, \
266 filterSize, (const typeX_t **) src, \
267 (uint16_t *) dest, dstW, is_be, bits); \
269 yuv2NBPS( 9, BE, 1, 10, int16_t)
270 yuv2NBPS( 9, LE, 0, 10, int16_t)
271 yuv2NBPS(10, BE, 1, 10, int16_t)
272 yuv2NBPS(10, LE, 0, 10, int16_t)
273 yuv2NBPS(16, BE, 1, 16, int32_t)
274 yuv2NBPS(16, LE, 0, 16, int32_t)
276 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277 const int16_t **src, uint8_t *dest, int dstW,
278 const uint8_t *dither, int offset)
281 for (i=0; i<dstW; i++) {
282 int val = dither[(i + offset) & 7] << 12;
284 for (j=0; j<filterSize; j++)
285 val += src[j][i] * filter[j];
287 dest[i]= av_clip_uint8(val>>19);
291 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292 const uint8_t *dither, int offset)
295 for (i=0; i<dstW; i++) {
296 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297 dest[i]= av_clip_uint8(val);
301 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302 const int16_t **chrUSrc, const int16_t **chrVSrc,
303 uint8_t *dest, int chrDstW)
305 enum PixelFormat dstFormat = c->dstFormat;
306 const uint8_t *chrDither = c->chrDither8;
309 if (dstFormat == PIX_FMT_NV12)
310 for (i=0; i<chrDstW; i++) {
311 int u = chrDither[i & 7] << 12;
312 int v = chrDither[(i + 3) & 7] << 12;
314 for (j=0; j<chrFilterSize; j++) {
315 u += chrUSrc[j][i] * chrFilter[j];
316 v += chrVSrc[j][i] * chrFilter[j];
319 dest[2*i]= av_clip_uint8(u>>19);
320 dest[2*i+1]= av_clip_uint8(v>>19);
323 for (i=0; i<chrDstW; i++) {
324 int u = chrDither[i & 7] << 12;
325 int v = chrDither[(i + 3) & 7] << 12;
327 for (j=0; j<chrFilterSize; j++) {
328 u += chrUSrc[j][i] * chrFilter[j];
329 v += chrVSrc[j][i] * chrFilter[j];
332 dest[2*i]= av_clip_uint8(v>>19);
333 dest[2*i+1]= av_clip_uint8(u>>19);
337 #define output_pixel(pos, val) \
338 if (target == PIX_FMT_GRAY16BE) { \
344 static av_always_inline void
345 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346 const int32_t **lumSrc, int lumFilterSize,
347 const int16_t *chrFilter, const int32_t **chrUSrc,
348 const int32_t **chrVSrc, int chrFilterSize,
349 const int32_t **alpSrc, uint16_t *dest, int dstW,
350 int y, enum PixelFormat target)
354 for (i = 0; i < (dstW >> 1); i++) {
356 int Y1 = (1 << 14) - 0x40000000;
357 int Y2 = (1 << 14) - 0x40000000;
359 for (j = 0; j < lumFilterSize; j++) {
360 Y1 += lumSrc[j][i * 2] * lumFilter[j];
361 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
365 Y1 = av_clip_int16(Y1);
366 Y2 = av_clip_int16(Y2);
367 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
372 static av_always_inline void
373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374 const int32_t *ubuf[2], const int32_t *vbuf[2],
375 const int32_t *abuf[2], uint16_t *dest, int dstW,
376 int yalpha, int uvalpha, int y,
377 enum PixelFormat target)
379 int yalpha1 = 4095 - yalpha;
381 const int32_t *buf0 = buf[0], *buf1 = buf[1];
383 for (i = 0; i < (dstW >> 1); i++) {
384 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
387 output_pixel(&dest[i * 2 + 0], Y1);
388 output_pixel(&dest[i * 2 + 1], Y2);
392 static av_always_inline void
393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394 const int32_t *ubuf[2], const int32_t *vbuf[2],
395 const int32_t *abuf0, uint16_t *dest, int dstW,
396 int uvalpha, int y, enum PixelFormat target)
400 for (i = 0; i < (dstW >> 1); i++) {
401 int Y1 = buf0[i * 2 ] << 1;
402 int Y2 = buf0[i * 2 + 1] << 1;
404 output_pixel(&dest[i * 2 + 0], Y1);
405 output_pixel(&dest[i * 2 + 1], Y2);
411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413 const int16_t **_lumSrc, int lumFilterSize, \
414 const int16_t *chrFilter, const int16_t **_chrUSrc, \
415 const int16_t **_chrVSrc, int chrFilterSize, \
416 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420 **chrUSrc = (const int32_t **) _chrUSrc, \
421 **chrVSrc = (const int32_t **) _chrVSrc, \
422 **alpSrc = (const int32_t **) _alpSrc; \
423 uint16_t *dest = (uint16_t *) _dest; \
424 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426 alpSrc, dest, dstW, y, fmt); \
429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432 int yalpha, int uvalpha, int y) \
434 const int32_t **buf = (const int32_t **) _buf, \
435 **ubuf = (const int32_t **) _ubuf, \
436 **vbuf = (const int32_t **) _vbuf, \
437 **abuf = (const int32_t **) _abuf; \
438 uint16_t *dest = (uint16_t *) _dest; \
439 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440 dest, dstW, yalpha, uvalpha, y, fmt); \
443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446 int uvalpha, int y) \
448 const int32_t *buf0 = (const int32_t *) _buf0, \
449 **ubuf = (const int32_t **) _ubuf, \
450 **vbuf = (const int32_t **) _vbuf, \
451 *abuf0 = (const int32_t *) _abuf0; \
452 uint16_t *dest = (uint16_t *) _dest; \
453 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454 dstW, uvalpha, y, fmt); \
457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
460 #define output_pixel(pos, acc) \
461 if (target == PIX_FMT_MONOBLACK) { \
467 static av_always_inline void
468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469 const int16_t **lumSrc, int lumFilterSize,
470 const int16_t *chrFilter, const int16_t **chrUSrc,
471 const int16_t **chrVSrc, int chrFilterSize,
472 const int16_t **alpSrc, uint8_t *dest, int dstW,
473 int y, enum PixelFormat target)
475 const uint8_t * const d128=dither_8x8_220[y&7];
476 uint8_t *g = c->table_gU[128] + c->table_gV[128];
480 for (i = 0; i < dstW - 1; i += 2) {
485 for (j = 0; j < lumFilterSize; j++) {
486 Y1 += lumSrc[j][i] * lumFilter[j];
487 Y2 += lumSrc[j][i+1] * lumFilter[j];
491 if ((Y1 | Y2) & 0x100) {
492 Y1 = av_clip_uint8(Y1);
493 Y2 = av_clip_uint8(Y2);
495 acc += acc + g[Y1 + d128[(i + 0) & 7]];
496 acc += acc + g[Y2 + d128[(i + 1) & 7]];
498 output_pixel(*dest++, acc);
503 static av_always_inline void
504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505 const int16_t *ubuf[2], const int16_t *vbuf[2],
506 const int16_t *abuf[2], uint8_t *dest, int dstW,
507 int yalpha, int uvalpha, int y,
508 enum PixelFormat target)
510 const int16_t *buf0 = buf[0], *buf1 = buf[1];
511 const uint8_t * const d128 = dither_8x8_220[y & 7];
512 uint8_t *g = c->table_gU[128] + c->table_gV[128];
513 int yalpha1 = 4095 - yalpha;
516 for (i = 0; i < dstW - 7; i += 8) {
517 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525 output_pixel(*dest++, acc);
529 static av_always_inline void
530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531 const int16_t *ubuf[2], const int16_t *vbuf[2],
532 const int16_t *abuf0, uint8_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
535 const uint8_t * const d128 = dither_8x8_220[y & 7];
536 uint8_t *g = c->table_gU[128] + c->table_gV[128];
539 for (i = 0; i < dstW - 7; i += 8) {
540 int acc = g[(buf0[i ] >> 7) + d128[0]];
541 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548 output_pixel(*dest++, acc);
554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556 const int16_t **lumSrc, int lumFilterSize, \
557 const int16_t *chrFilter, const int16_t **chrUSrc, \
558 const int16_t **chrVSrc, int chrFilterSize, \
559 const int16_t **alpSrc, uint8_t *dest, int dstW, \
562 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564 alpSrc, dest, dstW, y, fmt); \
567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568 const int16_t *ubuf[2], const int16_t *vbuf[2], \
569 const int16_t *abuf[2], uint8_t *dest, int dstW, \
570 int yalpha, int uvalpha, int y) \
572 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573 dest, dstW, yalpha, uvalpha, y, fmt); \
576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577 const int16_t *ubuf[2], const int16_t *vbuf[2], \
578 const int16_t *abuf0, uint8_t *dest, int dstW, \
579 int uvalpha, int y) \
581 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582 abuf0, dest, dstW, uvalpha, \
586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
589 #define output_pixels(pos, Y1, U, Y2, V) \
590 if (target == PIX_FMT_YUYV422) { \
591 dest[pos + 0] = Y1; \
593 dest[pos + 2] = Y2; \
597 dest[pos + 1] = Y1; \
599 dest[pos + 3] = Y2; \
602 static av_always_inline void
603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604 const int16_t **lumSrc, int lumFilterSize,
605 const int16_t *chrFilter, const int16_t **chrUSrc,
606 const int16_t **chrVSrc, int chrFilterSize,
607 const int16_t **alpSrc, uint8_t *dest, int dstW,
608 int y, enum PixelFormat target)
612 for (i = 0; i < (dstW >> 1); i++) {
619 for (j = 0; j < lumFilterSize; j++) {
620 Y1 += lumSrc[j][i * 2] * lumFilter[j];
621 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
623 for (j = 0; j < chrFilterSize; j++) {
624 U += chrUSrc[j][i] * chrFilter[j];
625 V += chrVSrc[j][i] * chrFilter[j];
631 if ((Y1 | Y2 | U | V) & 0x100) {
632 Y1 = av_clip_uint8(Y1);
633 Y2 = av_clip_uint8(Y2);
634 U = av_clip_uint8(U);
635 V = av_clip_uint8(V);
637 output_pixels(4*i, Y1, U, Y2, V);
641 static av_always_inline void
642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643 const int16_t *ubuf[2], const int16_t *vbuf[2],
644 const int16_t *abuf[2], uint8_t *dest, int dstW,
645 int yalpha, int uvalpha, int y,
646 enum PixelFormat target)
648 const int16_t *buf0 = buf[0], *buf1 = buf[1],
649 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651 int yalpha1 = 4095 - yalpha;
652 int uvalpha1 = 4095 - uvalpha;
655 for (i = 0; i < (dstW >> 1); i++) {
656 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
661 output_pixels(i * 4, Y1, U, Y2, V);
665 static av_always_inline void
666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667 const int16_t *ubuf[2], const int16_t *vbuf[2],
668 const int16_t *abuf0, uint8_t *dest, int dstW,
669 int uvalpha, int y, enum PixelFormat target)
671 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
675 if (uvalpha < 2048) {
676 for (i = 0; i < (dstW >> 1); i++) {
677 int Y1 = buf0[i * 2] >> 7;
678 int Y2 = buf0[i * 2 + 1] >> 7;
679 int U = ubuf1[i] >> 7;
680 int V = vbuf1[i] >> 7;
682 output_pixels(i * 4, Y1, U, Y2, V);
685 for (i = 0; i < (dstW >> 1); i++) {
686 int Y1 = buf0[i * 2] >> 7;
687 int Y2 = buf0[i * 2 + 1] >> 7;
688 int U = (ubuf0[i] + ubuf1[i]) >> 8;
689 int V = (vbuf0[i] + vbuf1[i]) >> 8;
691 output_pixels(i * 4, Y1, U, Y2, V);
698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703 #define output_pixel(pos, val) \
704 if (isBE(target)) { \
710 static av_always_inline void
711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712 const int32_t **lumSrc, int lumFilterSize,
713 const int16_t *chrFilter, const int32_t **chrUSrc,
714 const int32_t **chrVSrc, int chrFilterSize,
715 const int32_t **alpSrc, uint16_t *dest, int dstW,
716 int y, enum PixelFormat target)
720 for (i = 0; i < (dstW >> 1); i++) {
722 int Y1 = -0x40000000;
723 int Y2 = -0x40000000;
724 int U = -128 << 23; // 19
728 for (j = 0; j < lumFilterSize; j++) {
729 Y1 += lumSrc[j][i * 2] * lumFilter[j];
730 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
732 for (j = 0; j < chrFilterSize; j++) {
733 U += chrUSrc[j][i] * chrFilter[j];
734 V += chrVSrc[j][i] * chrFilter[j];
737 // 8bit: 12+15=27; 16-bit: 12+19=31
745 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746 Y1 -= c->yuv2rgb_y_offset;
747 Y2 -= c->yuv2rgb_y_offset;
748 Y1 *= c->yuv2rgb_y_coeff;
749 Y2 *= c->yuv2rgb_y_coeff;
752 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
754 R = V * c->yuv2rgb_v2r_coeff;
755 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756 B = U * c->yuv2rgb_u2b_coeff;
758 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
769 static av_always_inline void
770 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771 const int32_t *ubuf[2], const int32_t *vbuf[2],
772 const int32_t *abuf[2], uint16_t *dest, int dstW,
773 int yalpha, int uvalpha, int y,
774 enum PixelFormat target)
776 const int32_t *buf0 = buf[0], *buf1 = buf[1],
777 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779 int yalpha1 = 4095 - yalpha;
780 int uvalpha1 = 4095 - uvalpha;
783 for (i = 0; i < (dstW >> 1); i++) {
784 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
790 Y1 -= c->yuv2rgb_y_offset;
791 Y2 -= c->yuv2rgb_y_offset;
792 Y1 *= c->yuv2rgb_y_coeff;
793 Y2 *= c->yuv2rgb_y_coeff;
797 R = V * c->yuv2rgb_v2r_coeff;
798 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799 B = U * c->yuv2rgb_u2b_coeff;
801 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
811 static av_always_inline void
812 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813 const int32_t *ubuf[2], const int32_t *vbuf[2],
814 const int32_t *abuf0, uint16_t *dest, int dstW,
815 int uvalpha, int y, enum PixelFormat target)
817 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
821 if (uvalpha < 2048) {
822 for (i = 0; i < (dstW >> 1); i++) {
823 int Y1 = (buf0[i * 2] ) >> 2;
824 int Y2 = (buf0[i * 2 + 1]) >> 2;
825 int U = (ubuf0[i] + (-128 << 11)) >> 2;
826 int V = (vbuf0[i] + (-128 << 11)) >> 2;
829 Y1 -= c->yuv2rgb_y_offset;
830 Y2 -= c->yuv2rgb_y_offset;
831 Y1 *= c->yuv2rgb_y_coeff;
832 Y2 *= c->yuv2rgb_y_coeff;
836 R = V * c->yuv2rgb_v2r_coeff;
837 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838 B = U * c->yuv2rgb_u2b_coeff;
840 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
849 for (i = 0; i < (dstW >> 1); i++) {
850 int Y1 = (buf0[i * 2] ) >> 2;
851 int Y2 = (buf0[i * 2 + 1]) >> 2;
852 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
856 Y1 -= c->yuv2rgb_y_offset;
857 Y2 -= c->yuv2rgb_y_offset;
858 Y1 *= c->yuv2rgb_y_coeff;
859 Y2 *= c->yuv2rgb_y_coeff;
863 R = V * c->yuv2rgb_v2r_coeff;
864 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865 B = U * c->yuv2rgb_u2b_coeff;
867 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
887 static av_always_inline void
888 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
889 unsigned U, unsigned V, unsigned A1, unsigned A2,
890 const void *_r, const void *_g, const void *_b, int y,
891 enum PixelFormat target, int hasAlpha)
893 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
894 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
895 uint32_t *dest = (uint32_t *) _dest;
896 const uint32_t *r = (const uint32_t *) _r;
897 const uint32_t *g = (const uint32_t *) _g;
898 const uint32_t *b = (const uint32_t *) _b;
901 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
903 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
904 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
907 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
909 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
910 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
912 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
913 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
916 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
917 uint8_t *dest = (uint8_t *) _dest;
918 const uint8_t *r = (const uint8_t *) _r;
919 const uint8_t *g = (const uint8_t *) _g;
920 const uint8_t *b = (const uint8_t *) _b;
922 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
923 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
924 dest[i * 6 + 0] = r_b[Y1];
925 dest[i * 6 + 1] = g[Y1];
926 dest[i * 6 + 2] = b_r[Y1];
927 dest[i * 6 + 3] = r_b[Y2];
928 dest[i * 6 + 4] = g[Y2];
929 dest[i * 6 + 5] = b_r[Y2];
932 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
933 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
934 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
935 uint16_t *dest = (uint16_t *) _dest;
936 const uint16_t *r = (const uint16_t *) _r;
937 const uint16_t *g = (const uint16_t *) _g;
938 const uint16_t *b = (const uint16_t *) _b;
939 int dr1, dg1, db1, dr2, dg2, db2;
941 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
942 dr1 = dither_2x2_8[ y & 1 ][0];
943 dg1 = dither_2x2_4[ y & 1 ][0];
944 db1 = dither_2x2_8[(y & 1) ^ 1][0];
945 dr2 = dither_2x2_8[ y & 1 ][1];
946 dg2 = dither_2x2_4[ y & 1 ][1];
947 db2 = dither_2x2_8[(y & 1) ^ 1][1];
948 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
949 dr1 = dither_2x2_8[ y & 1 ][0];
950 dg1 = dither_2x2_8[ y & 1 ][1];
951 db1 = dither_2x2_8[(y & 1) ^ 1][0];
952 dr2 = dither_2x2_8[ y & 1 ][1];
953 dg2 = dither_2x2_8[ y & 1 ][0];
954 db2 = dither_2x2_8[(y & 1) ^ 1][1];
956 dr1 = dither_4x4_16[ y & 3 ][0];
957 dg1 = dither_4x4_16[ y & 3 ][1];
958 db1 = dither_4x4_16[(y & 3) ^ 3][0];
959 dr2 = dither_4x4_16[ y & 3 ][1];
960 dg2 = dither_4x4_16[ y & 3 ][0];
961 db2 = dither_4x4_16[(y & 3) ^ 3][1];
964 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
965 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
966 } else /* 8/4-bit */ {
967 uint8_t *dest = (uint8_t *) _dest;
968 const uint8_t *r = (const uint8_t *) _r;
969 const uint8_t *g = (const uint8_t *) _g;
970 const uint8_t *b = (const uint8_t *) _b;
971 int dr1, dg1, db1, dr2, dg2, db2;
973 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
974 const uint8_t * const d64 = dither_8x8_73[y & 7];
975 const uint8_t * const d32 = dither_8x8_32[y & 7];
976 dr1 = dg1 = d32[(i * 2 + 0) & 7];
977 db1 = d64[(i * 2 + 0) & 7];
978 dr2 = dg2 = d32[(i * 2 + 1) & 7];
979 db2 = d64[(i * 2 + 1) & 7];
981 const uint8_t * const d64 = dither_8x8_73 [y & 7];
982 const uint8_t * const d128 = dither_8x8_220[y & 7];
983 dr1 = db1 = d128[(i * 2 + 0) & 7];
984 dg1 = d64[(i * 2 + 0) & 7];
985 dr2 = db2 = d128[(i * 2 + 1) & 7];
986 dg2 = d64[(i * 2 + 1) & 7];
989 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
990 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
991 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
993 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
994 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
999 static av_always_inline void
1000 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1001 const int16_t **lumSrc, int lumFilterSize,
1002 const int16_t *chrFilter, const int16_t **chrUSrc,
1003 const int16_t **chrVSrc, int chrFilterSize,
1004 const int16_t **alpSrc, uint8_t *dest, int dstW,
1005 int y, enum PixelFormat target, int hasAlpha)
1009 for (i = 0; i < (dstW >> 1); i++) {
1015 int av_unused A1, A2;
1016 const void *r, *g, *b;
1018 for (j = 0; j < lumFilterSize; j++) {
1019 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1020 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1022 for (j = 0; j < chrFilterSize; j++) {
1023 U += chrUSrc[j][i] * chrFilter[j];
1024 V += chrVSrc[j][i] * chrFilter[j];
1030 if ((Y1 | Y2 | U | V) & 0x100) {
1031 Y1 = av_clip_uint8(Y1);
1032 Y2 = av_clip_uint8(Y2);
1033 U = av_clip_uint8(U);
1034 V = av_clip_uint8(V);
1039 for (j = 0; j < lumFilterSize; j++) {
1040 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1041 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1045 if ((A1 | A2) & 0x100) {
1046 A1 = av_clip_uint8(A1);
1047 A2 = av_clip_uint8(A2);
1051 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1053 g = (c->table_gU[U] + c->table_gV[V]);
1056 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1057 r, g, b, y, target, hasAlpha);
1061 static av_always_inline void
1062 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1063 const int16_t *ubuf[2], const int16_t *vbuf[2],
1064 const int16_t *abuf[2], uint8_t *dest, int dstW,
1065 int yalpha, int uvalpha, int y,
1066 enum PixelFormat target, int hasAlpha)
1068 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1069 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1070 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1071 *abuf0 = hasAlpha ? abuf[0] : NULL,
1072 *abuf1 = hasAlpha ? abuf[1] : NULL;
1073 int yalpha1 = 4095 - yalpha;
1074 int uvalpha1 = 4095 - uvalpha;
1077 for (i = 0; i < (dstW >> 1); i++) {
1078 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1079 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1080 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1081 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1083 const void *r = c->table_rV[V],
1084 *g = (c->table_gU[U] + c->table_gV[V]),
1085 *b = c->table_bU[U];
1088 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1089 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1092 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1093 r, g, b, y, target, hasAlpha);
1097 static av_always_inline void
1098 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1099 const int16_t *ubuf[2], const int16_t *vbuf[2],
1100 const int16_t *abuf0, uint8_t *dest, int dstW,
1101 int uvalpha, int y, enum PixelFormat target,
1104 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1105 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1108 if (uvalpha < 2048) {
1109 for (i = 0; i < (dstW >> 1); i++) {
1110 int Y1 = buf0[i * 2] >> 7;
1111 int Y2 = buf0[i * 2 + 1] >> 7;
1112 int U = ubuf1[i] >> 7;
1113 int V = vbuf1[i] >> 7;
1115 const void *r = c->table_rV[V],
1116 *g = (c->table_gU[U] + c->table_gV[V]),
1117 *b = c->table_bU[U];
1120 A1 = abuf0[i * 2 ] >> 7;
1121 A2 = abuf0[i * 2 + 1] >> 7;
1124 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1125 r, g, b, y, target, hasAlpha);
1128 for (i = 0; i < (dstW >> 1); i++) {
1129 int Y1 = buf0[i * 2] >> 7;
1130 int Y2 = buf0[i * 2 + 1] >> 7;
1131 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1132 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1134 const void *r = c->table_rV[V],
1135 *g = (c->table_gU[U] + c->table_gV[V]),
1136 *b = c->table_bU[U];
1139 A1 = abuf0[i * 2 ] >> 7;
1140 A2 = abuf0[i * 2 + 1] >> 7;
1143 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1144 r, g, b, y, target, hasAlpha);
1149 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1150 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1151 const int16_t **lumSrc, int lumFilterSize, \
1152 const int16_t *chrFilter, const int16_t **chrUSrc, \
1153 const int16_t **chrVSrc, int chrFilterSize, \
1154 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1157 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1158 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1159 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1161 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1162 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1163 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1164 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1165 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1166 int yalpha, int uvalpha, int y) \
1168 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1169 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1172 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1173 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1174 const int16_t *abuf0, uint8_t *dest, int dstW, \
1175 int uvalpha, int y) \
1177 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1178 dstW, uvalpha, y, fmt, hasAlpha); \
1182 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1183 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1185 #if CONFIG_SWSCALE_ALPHA
1186 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1187 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1189 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1190 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1192 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1193 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1194 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1195 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1196 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1197 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1198 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1199 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1201 static av_always_inline void
1202 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1203 const int16_t **lumSrc, int lumFilterSize,
1204 const int16_t *chrFilter, const int16_t **chrUSrc,
1205 const int16_t **chrVSrc, int chrFilterSize,
1206 const int16_t **alpSrc, uint8_t *dest,
1207 int dstW, int y, enum PixelFormat target, int hasAlpha)
1210 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1212 for (i = 0; i < dstW; i++) {
1220 for (j = 0; j < lumFilterSize; j++) {
1221 Y += lumSrc[j][i] * lumFilter[j];
1223 for (j = 0; j < chrFilterSize; j++) {
1224 U += chrUSrc[j][i] * chrFilter[j];
1225 V += chrVSrc[j][i] * chrFilter[j];
1232 for (j = 0; j < lumFilterSize; j++) {
1233 A += alpSrc[j][i] * lumFilter[j];
1237 A = av_clip_uint8(A);
1239 Y -= c->yuv2rgb_y_offset;
1240 Y *= c->yuv2rgb_y_coeff;
1242 R = Y + V*c->yuv2rgb_v2r_coeff;
1243 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1244 B = Y + U*c->yuv2rgb_u2b_coeff;
1245 if ((R | G | B) & 0xC0000000) {
1246 R = av_clip_uintp2(R, 30);
1247 G = av_clip_uintp2(G, 30);
1248 B = av_clip_uintp2(B, 30);
1253 dest[0] = hasAlpha ? A : 255;
1267 dest[3] = hasAlpha ? A : 255;
1270 dest[0] = hasAlpha ? A : 255;
1285 dest[3] = hasAlpha ? A : 255;
1293 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1294 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1295 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1296 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1298 #if CONFIG_SWSCALE_ALPHA
1299 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1300 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1305 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1306 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1307 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1309 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1310 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1312 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1313 int width, int height,
1317 uint8_t *ptr = plane + stride*y;
1318 for (i=0; i<height; i++) {
1319 memset(ptr, val, width);
1324 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1326 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1327 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1329 static av_always_inline void
1330 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1331 enum PixelFormat origin)
1334 for (i = 0; i < width; i++) {
1335 unsigned int r_b = input_pixel(&src[i*3+0]);
1336 unsigned int g = input_pixel(&src[i*3+1]);
1337 unsigned int b_r = input_pixel(&src[i*3+2]);
1339 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1343 static av_always_inline void
1344 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1345 const uint16_t *src1, const uint16_t *src2,
1346 int width, enum PixelFormat origin)
1350 for (i = 0; i < width; i++) {
1351 int r_b = input_pixel(&src1[i*3+0]);
1352 int g = input_pixel(&src1[i*3+1]);
1353 int b_r = input_pixel(&src1[i*3+2]);
1355 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1356 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1360 static av_always_inline void
1361 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1362 const uint16_t *src1, const uint16_t *src2,
1363 int width, enum PixelFormat origin)
1367 for (i = 0; i < width; i++) {
1368 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1369 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1370 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1372 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1373 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381 #define rgb48funcs(pattern, BE_LE, origin) \
1382 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1383 int width, uint32_t *unused) \
1385 const uint16_t *src = (const uint16_t *) _src; \
1386 uint16_t *dst = (uint16_t *) _dst; \
1387 rgb48ToY_c_template(dst, src, width, origin); \
1390 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1391 const uint8_t *_src1, const uint8_t *_src2, \
1392 int width, uint32_t *unused) \
1394 const uint16_t *src1 = (const uint16_t *) _src1, \
1395 *src2 = (const uint16_t *) _src2; \
1396 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1397 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1400 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1401 const uint8_t *_src1, const uint8_t *_src2, \
1402 int width, uint32_t *unused) \
1404 const uint16_t *src1 = (const uint16_t *) _src1, \
1405 *src2 = (const uint16_t *) _src2; \
1406 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1407 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1410 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1411 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1412 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1413 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1415 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1416 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1417 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1419 static av_always_inline void
1420 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1421 int width, enum PixelFormat origin,
1422 int shr, int shg, int shb, int shp,
1423 int maskr, int maskg, int maskb,
1424 int rsh, int gsh, int bsh, int S)
1426 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1427 const unsigned rnd = 33u << (S - 1);
1430 for (i = 0; i < width; i++) {
1431 int px = input_pixel(i) >> shp;
1432 int b = (px & maskb) >> shb;
1433 int g = (px & maskg) >> shg;
1434 int r = (px & maskr) >> shr;
1436 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1440 static av_always_inline void
1441 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1442 const uint8_t *src, int width,
1443 enum PixelFormat origin,
1444 int shr, int shg, int shb, int shp,
1445 int maskr, int maskg, int maskb,
1446 int rsh, int gsh, int bsh, int S)
1448 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1449 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1450 const unsigned rnd = 257u << (S - 1);
1453 for (i = 0; i < width; i++) {
1454 int px = input_pixel(i) >> shp;
1455 int b = (px & maskb) >> shb;
1456 int g = (px & maskg) >> shg;
1457 int r = (px & maskr) >> shr;
1459 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1460 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1464 static av_always_inline void
1465 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1466 const uint8_t *src, int width,
1467 enum PixelFormat origin,
1468 int shr, int shg, int shb, int shp,
1469 int maskr, int maskg, int maskb,
1470 int rsh, int gsh, int bsh, int S)
1472 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1473 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1474 maskgx = ~(maskr | maskb);
1475 const unsigned rnd = 257u << S;
1478 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1479 for (i = 0; i < width; i++) {
1480 int px0 = input_pixel(2 * i + 0) >> shp;
1481 int px1 = input_pixel(2 * i + 1) >> shp;
1482 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1483 int rb = px0 + px1 - g;
1485 b = (rb & maskb) >> shb;
1486 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1487 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1490 g = (g & maskg) >> shg;
1492 r = (rb & maskr) >> shr;
1494 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1495 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1501 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1502 maskg, maskb, rsh, gsh, bsh, S) \
1503 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1504 int width, uint32_t *unused) \
1506 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1507 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1510 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1511 const uint8_t *src, const uint8_t *dummy, \
1512 int width, uint32_t *unused) \
1514 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1515 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1518 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1519 const uint8_t *src, const uint8_t *dummy, \
1520 int width, uint32_t *unused) \
1522 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1526 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1527 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1528 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1529 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1530 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1531 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1532 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1533 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1534 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1535 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1536 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1537 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1539 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1542 for (i=0; i<width; i++) {
1547 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1550 for (i=0; i<width; i++) {
1555 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1558 for (i=0; i<width; i++) {
1561 dst[i]= pal[d] & 0xFF;
1565 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1566 const uint8_t *src1, const uint8_t *src2,
1567 int width, uint32_t *pal)
1570 assert(src1 == src2);
1571 for (i=0; i<width; i++) {
1572 int p= pal[src1[i]];
1579 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1580 int width, uint32_t *unused)
1583 for (i=0; i<width/8; i++) {
1586 dst[8*i+j]= ((d>>(7-j))&1)*255;
1590 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1591 int width, uint32_t *unused)
1594 for (i=0; i<width/8; i++) {
1597 dst[8*i+j]= ((d>>(7-j))&1)*255;
1601 //FIXME yuy2* can read up to 7 samples too much
1603 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1607 for (i=0; i<width; i++)
1611 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1612 const uint8_t *src2, int width, uint32_t *unused)
1615 for (i=0; i<width; i++) {
1616 dstU[i]= src1[4*i + 1];
1617 dstV[i]= src1[4*i + 3];
1619 assert(src1 == src2);
1622 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1625 const uint16_t *src = (const uint16_t *) _src;
1626 uint16_t *dst = (uint16_t *) _dst;
1627 for (i=0; i<width; i++) {
1628 dst[i] = av_bswap16(src[i]);
1632 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1633 const uint8_t *_src2, int width, uint32_t *unused)
1636 const uint16_t *src1 = (const uint16_t *) _src1,
1637 *src2 = (const uint16_t *) _src2;
1638 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1639 for (i=0; i<width; i++) {
1640 dstU[i] = av_bswap16(src1[i]);
1641 dstV[i] = av_bswap16(src2[i]);
1645 /* This is almost identical to the previous, end exists only because
1646 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1647 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1651 for (i=0; i<width; i++)
1655 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1656 const uint8_t *src2, int width, uint32_t *unused)
1659 for (i=0; i<width; i++) {
1660 dstU[i]= src1[4*i + 0];
1661 dstV[i]= src1[4*i + 2];
1663 assert(src1 == src2);
1666 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1667 const uint8_t *src, int width)
1670 for (i = 0; i < width; i++) {
1671 dst1[i] = src[2*i+0];
1672 dst2[i] = src[2*i+1];
1676 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1677 const uint8_t *src1, const uint8_t *src2,
1678 int width, uint32_t *unused)
1680 nvXXtoUV_c(dstU, dstV, src1, width);
1683 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1684 const uint8_t *src1, const uint8_t *src2,
1685 int width, uint32_t *unused)
1687 nvXXtoUV_c(dstV, dstU, src1, width);
1690 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1692 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1693 int width, uint32_t *unused)
1696 for (i=0; i<width; i++) {
1701 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1705 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1706 const uint8_t *src2, int width, uint32_t *unused)
1709 for (i=0; i<width; i++) {
1710 int b= src1[3*i + 0];
1711 int g= src1[3*i + 1];
1712 int r= src1[3*i + 2];
1714 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1715 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1717 assert(src1 == src2);
1720 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1721 const uint8_t *src2, int width, uint32_t *unused)
1724 for (i=0; i<width; i++) {
1725 int b= src1[6*i + 0] + src1[6*i + 3];
1726 int g= src1[6*i + 1] + src1[6*i + 4];
1727 int r= src1[6*i + 2] + src1[6*i + 5];
1729 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1730 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1732 assert(src1 == src2);
1735 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1739 for (i=0; i<width; i++) {
1744 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1748 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1749 const uint8_t *src2, int width, uint32_t *unused)
1753 for (i=0; i<width; i++) {
1754 int r= src1[3*i + 0];
1755 int g= src1[3*i + 1];
1756 int b= src1[3*i + 2];
1758 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1759 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1763 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1764 const uint8_t *src2, int width, uint32_t *unused)
1768 for (i=0; i<width; i++) {
1769 int r= src1[6*i + 0] + src1[6*i + 3];
1770 int g= src1[6*i + 1] + src1[6*i + 4];
1771 int b= src1[6*i + 2] + src1[6*i + 5];
1773 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1774 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1778 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1781 for (i = 0; i < width; i++) {
1786 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1790 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1793 const uint16_t **src = (const uint16_t **) _src;
1794 uint16_t *dst = (uint16_t *) _dst;
1795 for (i = 0; i < width; i++) {
1796 int g = AV_RL16(src[0] + i);
1797 int b = AV_RL16(src[1] + i);
1798 int r = AV_RL16(src[2] + i);
1800 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1804 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1807 const uint16_t **src = (const uint16_t **) _src;
1808 uint16_t *dst = (uint16_t *) _dst;
1809 for (i = 0; i < width; i++) {
1810 int g = AV_RB16(src[0] + i);
1811 int b = AV_RB16(src[1] + i);
1812 int r = AV_RB16(src[2] + i);
1814 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1818 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1821 for (i = 0; i < width; i++) {
1826 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1827 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1831 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1834 const uint16_t **src = (const uint16_t **) _src;
1835 uint16_t *dstU = (uint16_t *) _dstU;
1836 uint16_t *dstV = (uint16_t *) _dstV;
1837 for (i = 0; i < width; i++) {
1838 int g = AV_RL16(src[0] + i);
1839 int b = AV_RL16(src[1] + i);
1840 int r = AV_RL16(src[2] + i);
1842 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1843 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1847 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1850 const uint16_t **src = (const uint16_t **) _src;
1851 uint16_t *dstU = (uint16_t *) _dstU;
1852 uint16_t *dstV = (uint16_t *) _dstV;
1853 for (i = 0; i < width; i++) {
1854 int g = AV_RB16(src[0] + i);
1855 int b = AV_RB16(src[1] + i);
1856 int r = AV_RB16(src[2] + i);
1858 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1859 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1863 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1864 const int16_t *filter,
1865 const int16_t *filterPos, int filterSize)
1868 int32_t *dst = (int32_t *) _dst;
1869 const uint16_t *src = (const uint16_t *) _src;
1870 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1873 for (i = 0; i < dstW; i++) {
1875 int srcPos = filterPos[i];
1878 for (j = 0; j < filterSize; j++) {
1879 val += src[srcPos + j] * filter[filterSize * i + j];
1881 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1882 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1886 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1887 const int16_t *filter,
1888 const int16_t *filterPos, int filterSize)
1891 const uint16_t *src = (const uint16_t *) _src;
1892 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1894 for (i = 0; i < dstW; i++) {
1896 int srcPos = filterPos[i];
1899 for (j = 0; j < filterSize; j++) {
1900 val += src[srcPos + j] * filter[filterSize * i + j];
1902 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1903 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1907 // bilinear / bicubic scaling
1908 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1909 const int16_t *filter, const int16_t *filterPos,
1913 for (i=0; i<dstW; i++) {
1915 int srcPos= filterPos[i];
1917 for (j=0; j<filterSize; j++) {
1918 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1920 //filter += hFilterSize;
1921 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1926 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1927 const int16_t *filter, const int16_t *filterPos,
1931 int32_t *dst = (int32_t *) _dst;
1932 for (i=0; i<dstW; i++) {
1934 int srcPos= filterPos[i];
1936 for (j=0; j<filterSize; j++) {
1937 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1939 //filter += hFilterSize;
1940 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1945 //FIXME all pal and rgb srcFormats could do this convertion as well
1946 //FIXME all scalers more complex than bilinear could do half of this transform
1947 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1950 for (i = 0; i < width; i++) {
1951 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1952 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1955 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1958 for (i = 0; i < width; i++) {
1959 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1960 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1963 static void lumRangeToJpeg_c(int16_t *dst, int width)
1966 for (i = 0; i < width; i++)
1967 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1969 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1972 for (i = 0; i < width; i++)
1973 dst[i] = (dst[i]*14071 + 33561947)>>14;
1976 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1979 int32_t *dstU = (int32_t *) _dstU;
1980 int32_t *dstV = (int32_t *) _dstV;
1981 for (i = 0; i < width; i++) {
1982 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1983 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1986 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1989 int32_t *dstU = (int32_t *) _dstU;
1990 int32_t *dstV = (int32_t *) _dstV;
1991 for (i = 0; i < width; i++) {
1992 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1993 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1996 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1999 int32_t *dst = (int32_t *) _dst;
2000 for (i = 0; i < width; i++)
2001 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2003 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2006 int32_t *dst = (int32_t *) _dst;
2007 for (i = 0; i < width; i++)
2008 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2011 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2012 const uint8_t *src, int srcW, int xInc)
2015 unsigned int xpos=0;
2016 for (i=0;i<dstWidth;i++) {
2017 register unsigned int xx=xpos>>16;
2018 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2019 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2024 // *** horizontal scale Y line to temp buffer
2025 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2026 const uint8_t *src_in[4], int srcW, int xInc,
2027 const int16_t *hLumFilter,
2028 const int16_t *hLumFilterPos, int hLumFilterSize,
2029 uint8_t *formatConvBuffer,
2030 uint32_t *pal, int isAlpha)
2032 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2033 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2034 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2037 toYV12(formatConvBuffer, src, srcW, pal);
2038 src= formatConvBuffer;
2039 } else if (c->readLumPlanar && !isAlpha) {
2040 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2041 src = formatConvBuffer;
2044 if (!c->hyscale_fast) {
2045 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2046 } else { // fast bilinear upscale / crap downscale
2047 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2051 convertRange(dst, dstWidth);
2054 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2055 int dstWidth, const uint8_t *src1,
2056 const uint8_t *src2, int srcW, int xInc)
2059 unsigned int xpos=0;
2060 for (i=0;i<dstWidth;i++) {
2061 register unsigned int xx=xpos>>16;
2062 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2063 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2064 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2069 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2070 const uint8_t *src_in[4],
2071 int srcW, int xInc, const int16_t *hChrFilter,
2072 const int16_t *hChrFilterPos, int hChrFilterSize,
2073 uint8_t *formatConvBuffer, uint32_t *pal)
2075 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2077 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2078 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2079 src1= formatConvBuffer;
2081 } else if (c->readChrPlanar) {
2082 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2083 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2084 src1= formatConvBuffer;
2088 if (!c->hcscale_fast) {
2089 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2090 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2091 } else { // fast bilinear upscale / crap downscale
2092 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2095 if (c->chrConvertRange)
2096 c->chrConvertRange(dst1, dst2, dstWidth);
2099 static av_always_inline void
2100 find_c_packed_planar_out_funcs(SwsContext *c,
2101 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2102 yuv2interleavedX_fn *yuv2nv12cX,
2103 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2104 yuv2packedX_fn *yuv2packedX)
2106 enum PixelFormat dstFormat = c->dstFormat;
2108 if (is16BPS(dstFormat)) {
2109 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2110 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2111 } else if (is9_OR_10BPS(dstFormat)) {
2112 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2113 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2114 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2116 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2117 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2120 *yuv2plane1 = yuv2plane1_8_c;
2121 *yuv2planeX = yuv2planeX_8_c;
2122 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2123 *yuv2nv12cX = yuv2nv12cX_c;
2126 if(c->flags & SWS_FULL_CHR_H_INT) {
2127 switch (dstFormat) {
2130 *yuv2packedX = yuv2rgba32_full_X_c;
2132 #if CONFIG_SWSCALE_ALPHA
2134 *yuv2packedX = yuv2rgba32_full_X_c;
2136 #endif /* CONFIG_SWSCALE_ALPHA */
2138 *yuv2packedX = yuv2rgbx32_full_X_c;
2140 #endif /* !CONFIG_SMALL */
2144 *yuv2packedX = yuv2argb32_full_X_c;
2146 #if CONFIG_SWSCALE_ALPHA
2148 *yuv2packedX = yuv2argb32_full_X_c;
2150 #endif /* CONFIG_SWSCALE_ALPHA */
2152 *yuv2packedX = yuv2xrgb32_full_X_c;
2154 #endif /* !CONFIG_SMALL */
2158 *yuv2packedX = yuv2bgra32_full_X_c;
2160 #if CONFIG_SWSCALE_ALPHA
2162 *yuv2packedX = yuv2bgra32_full_X_c;
2164 #endif /* CONFIG_SWSCALE_ALPHA */
2166 *yuv2packedX = yuv2bgrx32_full_X_c;
2168 #endif /* !CONFIG_SMALL */
2172 *yuv2packedX = yuv2abgr32_full_X_c;
2174 #if CONFIG_SWSCALE_ALPHA
2176 *yuv2packedX = yuv2abgr32_full_X_c;
2178 #endif /* CONFIG_SWSCALE_ALPHA */
2180 *yuv2packedX = yuv2xbgr32_full_X_c;
2182 #endif /* !CONFIG_SMALL */
2185 *yuv2packedX = yuv2rgb24_full_X_c;
2188 *yuv2packedX = yuv2bgr24_full_X_c;
2192 switch (dstFormat) {
2193 case PIX_FMT_RGB48LE:
2194 *yuv2packed1 = yuv2rgb48le_1_c;
2195 *yuv2packed2 = yuv2rgb48le_2_c;
2196 *yuv2packedX = yuv2rgb48le_X_c;
2198 case PIX_FMT_RGB48BE:
2199 *yuv2packed1 = yuv2rgb48be_1_c;
2200 *yuv2packed2 = yuv2rgb48be_2_c;
2201 *yuv2packedX = yuv2rgb48be_X_c;
2203 case PIX_FMT_BGR48LE:
2204 *yuv2packed1 = yuv2bgr48le_1_c;
2205 *yuv2packed2 = yuv2bgr48le_2_c;
2206 *yuv2packedX = yuv2bgr48le_X_c;
2208 case PIX_FMT_BGR48BE:
2209 *yuv2packed1 = yuv2bgr48be_1_c;
2210 *yuv2packed2 = yuv2bgr48be_2_c;
2211 *yuv2packedX = yuv2bgr48be_X_c;
2216 *yuv2packed1 = yuv2rgb32_1_c;
2217 *yuv2packed2 = yuv2rgb32_2_c;
2218 *yuv2packedX = yuv2rgb32_X_c;
2220 #if CONFIG_SWSCALE_ALPHA
2222 *yuv2packed1 = yuv2rgba32_1_c;
2223 *yuv2packed2 = yuv2rgba32_2_c;
2224 *yuv2packedX = yuv2rgba32_X_c;
2226 #endif /* CONFIG_SWSCALE_ALPHA */
2228 *yuv2packed1 = yuv2rgbx32_1_c;
2229 *yuv2packed2 = yuv2rgbx32_2_c;
2230 *yuv2packedX = yuv2rgbx32_X_c;
2232 #endif /* !CONFIG_SMALL */
2234 case PIX_FMT_RGB32_1:
2235 case PIX_FMT_BGR32_1:
2237 *yuv2packed1 = yuv2rgb32_1_1_c;
2238 *yuv2packed2 = yuv2rgb32_1_2_c;
2239 *yuv2packedX = yuv2rgb32_1_X_c;
2241 #if CONFIG_SWSCALE_ALPHA
2243 *yuv2packed1 = yuv2rgba32_1_1_c;
2244 *yuv2packed2 = yuv2rgba32_1_2_c;
2245 *yuv2packedX = yuv2rgba32_1_X_c;
2247 #endif /* CONFIG_SWSCALE_ALPHA */
2249 *yuv2packed1 = yuv2rgbx32_1_1_c;
2250 *yuv2packed2 = yuv2rgbx32_1_2_c;
2251 *yuv2packedX = yuv2rgbx32_1_X_c;
2253 #endif /* !CONFIG_SMALL */
2256 *yuv2packed1 = yuv2rgb24_1_c;
2257 *yuv2packed2 = yuv2rgb24_2_c;
2258 *yuv2packedX = yuv2rgb24_X_c;
2261 *yuv2packed1 = yuv2bgr24_1_c;
2262 *yuv2packed2 = yuv2bgr24_2_c;
2263 *yuv2packedX = yuv2bgr24_X_c;
2265 case PIX_FMT_RGB565LE:
2266 case PIX_FMT_RGB565BE:
2267 case PIX_FMT_BGR565LE:
2268 case PIX_FMT_BGR565BE:
2269 *yuv2packed1 = yuv2rgb16_1_c;
2270 *yuv2packed2 = yuv2rgb16_2_c;
2271 *yuv2packedX = yuv2rgb16_X_c;
2273 case PIX_FMT_RGB555LE:
2274 case PIX_FMT_RGB555BE:
2275 case PIX_FMT_BGR555LE:
2276 case PIX_FMT_BGR555BE:
2277 *yuv2packed1 = yuv2rgb15_1_c;
2278 *yuv2packed2 = yuv2rgb15_2_c;
2279 *yuv2packedX = yuv2rgb15_X_c;
2281 case PIX_FMT_RGB444LE:
2282 case PIX_FMT_RGB444BE:
2283 case PIX_FMT_BGR444LE:
2284 case PIX_FMT_BGR444BE:
2285 *yuv2packed1 = yuv2rgb12_1_c;
2286 *yuv2packed2 = yuv2rgb12_2_c;
2287 *yuv2packedX = yuv2rgb12_X_c;
2291 *yuv2packed1 = yuv2rgb8_1_c;
2292 *yuv2packed2 = yuv2rgb8_2_c;
2293 *yuv2packedX = yuv2rgb8_X_c;
2297 *yuv2packed1 = yuv2rgb4_1_c;
2298 *yuv2packed2 = yuv2rgb4_2_c;
2299 *yuv2packedX = yuv2rgb4_X_c;
2301 case PIX_FMT_RGB4_BYTE:
2302 case PIX_FMT_BGR4_BYTE:
2303 *yuv2packed1 = yuv2rgb4b_1_c;
2304 *yuv2packed2 = yuv2rgb4b_2_c;
2305 *yuv2packedX = yuv2rgb4b_X_c;
2309 switch (dstFormat) {
2310 case PIX_FMT_GRAY16BE:
2311 *yuv2packed1 = yuv2gray16BE_1_c;
2312 *yuv2packed2 = yuv2gray16BE_2_c;
2313 *yuv2packedX = yuv2gray16BE_X_c;
2315 case PIX_FMT_GRAY16LE:
2316 *yuv2packed1 = yuv2gray16LE_1_c;
2317 *yuv2packed2 = yuv2gray16LE_2_c;
2318 *yuv2packedX = yuv2gray16LE_X_c;
2320 case PIX_FMT_MONOWHITE:
2321 *yuv2packed1 = yuv2monowhite_1_c;
2322 *yuv2packed2 = yuv2monowhite_2_c;
2323 *yuv2packedX = yuv2monowhite_X_c;
2325 case PIX_FMT_MONOBLACK:
2326 *yuv2packed1 = yuv2monoblack_1_c;
2327 *yuv2packed2 = yuv2monoblack_2_c;
2328 *yuv2packedX = yuv2monoblack_X_c;
2330 case PIX_FMT_YUYV422:
2331 *yuv2packed1 = yuv2yuyv422_1_c;
2332 *yuv2packed2 = yuv2yuyv422_2_c;
2333 *yuv2packedX = yuv2yuyv422_X_c;
2335 case PIX_FMT_UYVY422:
2336 *yuv2packed1 = yuv2uyvy422_1_c;
2337 *yuv2packed2 = yuv2uyvy422_2_c;
2338 *yuv2packedX = yuv2uyvy422_X_c;
2343 #define DEBUG_SWSCALE_BUFFERS 0
2344 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2346 static int swScale(SwsContext *c, const uint8_t* src[],
2347 int srcStride[], int srcSliceY,
2348 int srcSliceH, uint8_t* dst[], int dstStride[])
2350 /* load a few things into local vars to make the code more readable? and faster */
2351 const int srcW= c->srcW;
2352 const int dstW= c->dstW;
2353 const int dstH= c->dstH;
2354 const int chrDstW= c->chrDstW;
2355 const int chrSrcW= c->chrSrcW;
2356 const int lumXInc= c->lumXInc;
2357 const int chrXInc= c->chrXInc;
2358 const enum PixelFormat dstFormat= c->dstFormat;
2359 const int flags= c->flags;
2360 int16_t *vLumFilterPos= c->vLumFilterPos;
2361 int16_t *vChrFilterPos= c->vChrFilterPos;
2362 int16_t *hLumFilterPos= c->hLumFilterPos;
2363 int16_t *hChrFilterPos= c->hChrFilterPos;
2364 int16_t *vLumFilter= c->vLumFilter;
2365 int16_t *vChrFilter= c->vChrFilter;
2366 int16_t *hLumFilter= c->hLumFilter;
2367 int16_t *hChrFilter= c->hChrFilter;
2368 int32_t *lumMmxFilter= c->lumMmxFilter;
2369 int32_t *chrMmxFilter= c->chrMmxFilter;
2370 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2371 const int vLumFilterSize= c->vLumFilterSize;
2372 const int vChrFilterSize= c->vChrFilterSize;
2373 const int hLumFilterSize= c->hLumFilterSize;
2374 const int hChrFilterSize= c->hChrFilterSize;
2375 int16_t **lumPixBuf= c->lumPixBuf;
2376 int16_t **chrUPixBuf= c->chrUPixBuf;
2377 int16_t **chrVPixBuf= c->chrVPixBuf;
2378 int16_t **alpPixBuf= c->alpPixBuf;
2379 const int vLumBufSize= c->vLumBufSize;
2380 const int vChrBufSize= c->vChrBufSize;
2381 uint8_t *formatConvBuffer= c->formatConvBuffer;
2382 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2383 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2385 uint32_t *pal=c->pal_yuv;
2386 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2387 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2388 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2389 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2390 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2391 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2392 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2394 /* vars which will change and which we need to store back in the context */
2396 int lumBufIndex= c->lumBufIndex;
2397 int chrBufIndex= c->chrBufIndex;
2398 int lastInLumBuf= c->lastInLumBuf;
2399 int lastInChrBuf= c->lastInChrBuf;
2401 if (isPacked(c->srcFormat)) {
2409 srcStride[3]= srcStride[0];
2411 srcStride[1]<<= c->vChrDrop;
2412 srcStride[2]<<= c->vChrDrop;
2414 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2415 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2416 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2417 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2418 srcSliceY, srcSliceH, dstY, dstH);
2419 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2420 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2422 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2423 static int warnedAlready=0; //FIXME move this into the context perhaps
2424 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2425 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2426 " ->cannot do aligned memory accesses anymore\n");
2431 /* Note the user might start scaling the picture in the middle so this
2432 will not get executed. This is not really intended but works
2433 currently, so people might do it. */
2434 if (srcSliceY ==0) {
2442 if (!should_dither) {
2443 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2447 for (;dstY < dstH; dstY++) {
2448 const int chrDstY= dstY>>c->chrDstVSubSample;
2449 uint8_t *dest[4] = {
2450 dst[0] + dstStride[0] * dstY,
2451 dst[1] + dstStride[1] * chrDstY,
2452 dst[2] + dstStride[2] * chrDstY,
2453 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2456 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2457 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2458 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2460 // Last line needed as input
2461 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
2462 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
2463 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
2466 //handle holes (FAST_BILINEAR & weird filters)
2467 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2468 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2469 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2470 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2472 DEBUG_BUFFERS("dstY: %d\n", dstY);
2473 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2474 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2475 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2476 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2478 // Do we have enough lines in this slice to output the dstY line
2479 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2481 if (!enough_lines) {
2482 lastLumSrcY = srcSliceY + srcSliceH - 1;
2483 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2484 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2485 lastLumSrcY, lastChrSrcY);
2488 //Do horizontal scaling
2489 while(lastInLumBuf < lastLumSrcY) {
2490 const uint8_t *src1[4] = {
2491 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2492 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2493 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2494 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2497 assert(lumBufIndex < 2*vLumBufSize);
2498 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2499 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2500 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2501 hLumFilter, hLumFilterPos, hLumFilterSize,
2504 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2505 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2506 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2510 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2511 lumBufIndex, lastInLumBuf);
2513 while(lastInChrBuf < lastChrSrcY) {
2514 const uint8_t *src1[4] = {
2515 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2516 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2517 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2518 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2521 assert(chrBufIndex < 2*vChrBufSize);
2522 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2523 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2524 //FIXME replace parameters through context struct (some at least)
2526 if (c->needs_hcscale)
2527 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2528 chrDstW, src1, chrSrcW, chrXInc,
2529 hChrFilter, hChrFilterPos, hChrFilterSize,
2530 formatConvBuffer, pal);
2532 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2533 chrBufIndex, lastInChrBuf);
2535 //wrap buf index around to stay inside the ring buffer
2536 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2537 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2539 break; //we can't output a dstY line so let's try with the next slice
2542 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2544 if (should_dither) {
2545 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2546 c->lumDither8 = dither_8x8_128[dstY & 7];
2548 if (dstY >= dstH-2) {
2549 // hmm looks like we can't use MMX here without overwriting this array's tail
2550 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2551 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2555 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2556 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2557 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2558 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2560 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2561 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2562 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2563 for (i = 0; i < neg; i++)
2564 tmpY[i] = lumSrcPtr[neg];
2565 for ( ; i < end; i++)
2566 tmpY[i] = lumSrcPtr[i];
2567 for ( ; i < vLumFilterSize; i++)
2568 tmpY[i] = tmpY[i-1];
2572 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2573 for (i = 0; i < neg; i++)
2574 tmpA[i] = alpSrcPtr[neg];
2575 for ( ; i < end; i++)
2576 tmpA[i] = alpSrcPtr[i];
2577 for ( ; i < vLumFilterSize; i++)
2578 tmpA[i] = tmpA[i - 1];
2582 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2583 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2584 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2585 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2586 for (i = 0; i < neg; i++) {
2587 tmpU[i] = chrUSrcPtr[neg];
2588 tmpV[i] = chrVSrcPtr[neg];
2590 for ( ; i < end; i++) {
2591 tmpU[i] = chrUSrcPtr[i];
2592 tmpV[i] = chrVSrcPtr[i];
2594 for ( ; i < vChrFilterSize; i++) {
2595 tmpU[i] = tmpU[i - 1];
2596 tmpV[i] = tmpV[i - 1];
2602 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2603 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2605 if (vLumFilterSize == 1) {
2606 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2608 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2609 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2612 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2614 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2615 } else if (vChrFilterSize == 1) {
2616 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2617 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2619 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2620 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2621 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2622 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2626 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2627 if (vLumFilterSize == 1) {
2628 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2630 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2631 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2635 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2636 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2637 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2638 int chrAlpha = vChrFilter[2 * dstY + 1];
2639 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2640 alpPixBuf ? *alpSrcPtr : NULL,
2641 dest[0], dstW, chrAlpha, dstY);
2642 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2643 int lumAlpha = vLumFilter[2 * dstY + 1];
2644 int chrAlpha = vChrFilter[2 * dstY + 1];
2646 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2648 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2649 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2650 alpPixBuf ? alpSrcPtr : NULL,
2651 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2652 } else { //general RGB
2653 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2654 lumSrcPtr, vLumFilterSize,
2655 vChrFilter + dstY * vChrFilterSize,
2656 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2657 alpSrcPtr, dest[0], dstW, dstY);
2663 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2664 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2667 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2668 __asm__ volatile("sfence":::"memory");
2672 /* store changed local vars back in the context */
2674 c->lumBufIndex= lumBufIndex;
2675 c->chrBufIndex= chrBufIndex;
2676 c->lastInLumBuf= lastInLumBuf;
2677 c->lastInChrBuf= lastInChrBuf;
2679 return dstY - lastDstY;
2682 static av_cold void sws_init_swScale_c(SwsContext *c)
2684 enum PixelFormat srcFormat = c->srcFormat;
2686 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2687 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2690 c->chrToYV12 = NULL;
2692 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2693 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2694 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2695 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2699 case PIX_FMT_BGR4_BYTE:
2700 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2701 case PIX_FMT_GBRP9LE:
2702 case PIX_FMT_GBRP10LE:
2703 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2704 case PIX_FMT_GBRP9BE:
2705 case PIX_FMT_GBRP10BE:
2706 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2707 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2709 case PIX_FMT_YUV444P9LE:
2710 case PIX_FMT_YUV422P9LE:
2711 case PIX_FMT_YUV420P9LE:
2712 case PIX_FMT_YUV422P10LE:
2713 case PIX_FMT_YUV444P10LE:
2714 case PIX_FMT_YUV420P10LE:
2715 case PIX_FMT_YUV420P16LE:
2716 case PIX_FMT_YUV422P16LE:
2717 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2719 case PIX_FMT_YUV444P9BE:
2720 case PIX_FMT_YUV422P9BE:
2721 case PIX_FMT_YUV420P9BE:
2722 case PIX_FMT_YUV444P10BE:
2723 case PIX_FMT_YUV422P10BE:
2724 case PIX_FMT_YUV420P10BE:
2725 case PIX_FMT_YUV420P16BE:
2726 case PIX_FMT_YUV422P16BE:
2727 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2730 if (c->chrSrcHSubSample) {
2732 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2733 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2734 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2735 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2736 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2737 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2738 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2739 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2740 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2741 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2742 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2743 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2744 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2745 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2746 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2747 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2748 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2749 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2753 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2754 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2755 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2756 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2757 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2758 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2759 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2760 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2761 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2762 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2763 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2764 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2765 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2766 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2767 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2768 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2769 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2770 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2774 c->lumToYV12 = NULL;
2775 c->alpToYV12 = NULL;
2776 switch (srcFormat) {
2777 case PIX_FMT_GBRP9LE:
2778 case PIX_FMT_GBRP10LE:
2779 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2780 case PIX_FMT_GBRP9BE:
2781 case PIX_FMT_GBRP10BE:
2782 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2783 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2785 case PIX_FMT_YUV444P9LE:
2786 case PIX_FMT_YUV422P9LE:
2787 case PIX_FMT_YUV420P9LE:
2788 case PIX_FMT_YUV444P10LE:
2789 case PIX_FMT_YUV422P10LE:
2790 case PIX_FMT_YUV420P10LE:
2791 case PIX_FMT_YUV420P16LE:
2792 case PIX_FMT_YUV422P16LE:
2793 case PIX_FMT_YUV444P16LE:
2794 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2796 case PIX_FMT_YUV444P9BE:
2797 case PIX_FMT_YUV422P9BE:
2798 case PIX_FMT_YUV420P9BE:
2799 case PIX_FMT_YUV444P10BE:
2800 case PIX_FMT_YUV422P10BE:
2801 case PIX_FMT_YUV420P10BE:
2802 case PIX_FMT_YUV420P16BE:
2803 case PIX_FMT_YUV422P16BE:
2804 case PIX_FMT_YUV444P16BE:
2805 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2807 case PIX_FMT_YUYV422 :
2808 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2809 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2810 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2811 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2812 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2813 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2814 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2815 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2816 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2817 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2818 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2819 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2823 case PIX_FMT_BGR4_BYTE:
2824 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2825 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2826 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2827 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2828 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2829 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2830 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2831 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2832 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2833 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2834 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2837 switch (srcFormat) {
2839 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2841 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2842 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2846 if (c->srcBpc == 8) {
2847 if (c->dstBpc <= 10) {
2848 c->hyScale = c->hcScale = hScale8To15_c;
2849 if (c->flags & SWS_FAST_BILINEAR) {
2850 c->hyscale_fast = hyscale_fast_c;
2851 c->hcscale_fast = hcscale_fast_c;
2854 c->hyScale = c->hcScale = hScale8To19_c;
2857 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2860 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2861 if (c->dstBpc <= 10) {
2863 c->lumConvertRange = lumRangeFromJpeg_c;
2864 c->chrConvertRange = chrRangeFromJpeg_c;
2866 c->lumConvertRange = lumRangeToJpeg_c;
2867 c->chrConvertRange = chrRangeToJpeg_c;
2871 c->lumConvertRange = lumRangeFromJpeg16_c;
2872 c->chrConvertRange = chrRangeFromJpeg16_c;
2874 c->lumConvertRange = lumRangeToJpeg16_c;
2875 c->chrConvertRange = chrRangeToJpeg16_c;
2880 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2881 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2882 c->needs_hcscale = 1;
2885 SwsFunc ff_getSwsFunc(SwsContext *c)
2887 sws_init_swScale_c(c);
2890 ff_sws_init_swScale_mmx(c);
2892 ff_sws_init_swScale_altivec(c);