2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
165 #define output_pixel(pos, val, bias, signedness) \
167 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
169 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
172 static av_always_inline void
173 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174 int big_endian, int output_bits)
177 int shift = 19 - output_bits;
179 for (i = 0; i < dstW; i++) {
180 int val = src[i] + (1 << (shift - 1));
181 output_pixel(&dest[i], val, 0, uint);
185 static av_always_inline void
186 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187 const int32_t **src, uint16_t *dest, int dstW,
188 int big_endian, int output_bits)
191 int shift = 15 + 16 - output_bits;
193 for (i = 0; i < dstW; i++) {
194 int val = 1 << (30-output_bits);
197 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198 * filters (or anything with negative coeffs, the range can be slightly
199 * wider in both directions. To account for this overflow, we subtract
200 * a constant so it always fits in the signed range (assuming a
201 * reasonable filterSize), and re-add that at the end. */
203 for (j = 0; j < filterSize; j++)
204 val += src[j][i] * filter[j];
206 output_pixel(&dest[i], val, 0x8000, int);
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
216 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 static av_always_inline void
220 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = src[i] + (1 << (shift - 1));
228 output_pixel(&dest[i], val);
232 static av_always_inline void
233 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234 const int16_t **src, uint16_t *dest, int dstW,
235 int big_endian, int output_bits)
238 int shift = 11 + 16 - output_bits;
240 for (i = 0; i < dstW; i++) {
241 int val = 1 << (26-output_bits);
244 for (j = 0; j < filterSize; j++)
245 val += src[j][i] * filter[j];
247 output_pixel(&dest[i], val);
253 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255 uint8_t *dest, int dstW, \
256 const uint8_t *dither, int offset)\
258 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259 (uint16_t *) dest, dstW, is_be, bits); \
261 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262 const int16_t **src, uint8_t *dest, int dstW, \
263 const uint8_t *dither, int offset)\
265 yuv2planeX_## template_size ## _c_template(filter, \
266 filterSize, (const typeX_t **) src, \
267 (uint16_t *) dest, dstW, is_be, bits); \
269 yuv2NBPS( 9, BE, 1, 10, int16_t)
270 yuv2NBPS( 9, LE, 0, 10, int16_t)
271 yuv2NBPS(10, BE, 1, 10, int16_t)
272 yuv2NBPS(10, LE, 0, 10, int16_t)
273 yuv2NBPS(16, BE, 1, 16, int32_t)
274 yuv2NBPS(16, LE, 0, 16, int32_t)
276 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277 const int16_t **src, uint8_t *dest, int dstW,
278 const uint8_t *dither, int offset)
281 for (i=0; i<dstW; i++) {
282 int val = dither[(i + offset) & 7] << 12;
284 for (j=0; j<filterSize; j++)
285 val += src[j][i] * filter[j];
287 dest[i]= av_clip_uint8(val>>19);
291 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292 const uint8_t *dither, int offset)
295 for (i=0; i<dstW; i++) {
296 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297 dest[i]= av_clip_uint8(val);
301 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302 const int16_t **chrUSrc, const int16_t **chrVSrc,
303 uint8_t *dest, int chrDstW)
305 enum PixelFormat dstFormat = c->dstFormat;
306 const uint8_t *chrDither = c->chrDither8;
309 if (dstFormat == PIX_FMT_NV12)
310 for (i=0; i<chrDstW; i++) {
311 int u = chrDither[i & 7] << 12;
312 int v = chrDither[(i + 3) & 7] << 12;
314 for (j=0; j<chrFilterSize; j++) {
315 u += chrUSrc[j][i] * chrFilter[j];
316 v += chrVSrc[j][i] * chrFilter[j];
319 dest[2*i]= av_clip_uint8(u>>19);
320 dest[2*i+1]= av_clip_uint8(v>>19);
323 for (i=0; i<chrDstW; i++) {
324 int u = chrDither[i & 7] << 12;
325 int v = chrDither[(i + 3) & 7] << 12;
327 for (j=0; j<chrFilterSize; j++) {
328 u += chrUSrc[j][i] * chrFilter[j];
329 v += chrVSrc[j][i] * chrFilter[j];
332 dest[2*i]= av_clip_uint8(v>>19);
333 dest[2*i+1]= av_clip_uint8(u>>19);
337 #define output_pixel(pos, val) \
338 if (target == PIX_FMT_GRAY16BE) { \
344 static av_always_inline void
345 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346 const int32_t **lumSrc, int lumFilterSize,
347 const int16_t *chrFilter, const int32_t **chrUSrc,
348 const int32_t **chrVSrc, int chrFilterSize,
349 const int32_t **alpSrc, uint16_t *dest, int dstW,
350 int y, enum PixelFormat target)
354 for (i = 0; i < (dstW >> 1); i++) {
356 int Y1 = (1 << 14) - 0x40000000;
357 int Y2 = (1 << 14) - 0x40000000;
359 for (j = 0; j < lumFilterSize; j++) {
360 Y1 += lumSrc[j][i * 2] * lumFilter[j];
361 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
365 Y1 = av_clip_int16(Y1);
366 Y2 = av_clip_int16(Y2);
367 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
372 static av_always_inline void
373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374 const int32_t *ubuf[2], const int32_t *vbuf[2],
375 const int32_t *abuf[2], uint16_t *dest, int dstW,
376 int yalpha, int uvalpha, int y,
377 enum PixelFormat target)
379 int yalpha1 = 4095 - yalpha;
381 const int32_t *buf0 = buf[0], *buf1 = buf[1];
383 for (i = 0; i < (dstW >> 1); i++) {
384 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
387 output_pixel(&dest[i * 2 + 0], Y1);
388 output_pixel(&dest[i * 2 + 1], Y2);
392 static av_always_inline void
393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394 const int32_t *ubuf[2], const int32_t *vbuf[2],
395 const int32_t *abuf0, uint16_t *dest, int dstW,
396 int uvalpha, int y, enum PixelFormat target)
400 for (i = 0; i < (dstW >> 1); i++) {
401 int Y1 = buf0[i * 2 ] << 1;
402 int Y2 = buf0[i * 2 + 1] << 1;
404 output_pixel(&dest[i * 2 + 0], Y1);
405 output_pixel(&dest[i * 2 + 1], Y2);
411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413 const int16_t **_lumSrc, int lumFilterSize, \
414 const int16_t *chrFilter, const int16_t **_chrUSrc, \
415 const int16_t **_chrVSrc, int chrFilterSize, \
416 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420 **chrUSrc = (const int32_t **) _chrUSrc, \
421 **chrVSrc = (const int32_t **) _chrVSrc, \
422 **alpSrc = (const int32_t **) _alpSrc; \
423 uint16_t *dest = (uint16_t *) _dest; \
424 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426 alpSrc, dest, dstW, y, fmt); \
429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432 int yalpha, int uvalpha, int y) \
434 const int32_t **buf = (const int32_t **) _buf, \
435 **ubuf = (const int32_t **) _ubuf, \
436 **vbuf = (const int32_t **) _vbuf, \
437 **abuf = (const int32_t **) _abuf; \
438 uint16_t *dest = (uint16_t *) _dest; \
439 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440 dest, dstW, yalpha, uvalpha, y, fmt); \
443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446 int uvalpha, int y) \
448 const int32_t *buf0 = (const int32_t *) _buf0, \
449 **ubuf = (const int32_t **) _ubuf, \
450 **vbuf = (const int32_t **) _vbuf, \
451 *abuf0 = (const int32_t *) _abuf0; \
452 uint16_t *dest = (uint16_t *) _dest; \
453 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454 dstW, uvalpha, y, fmt); \
457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
460 #define output_pixel(pos, acc) \
461 if (target == PIX_FMT_MONOBLACK) { \
467 static av_always_inline void
468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469 const int16_t **lumSrc, int lumFilterSize,
470 const int16_t *chrFilter, const int16_t **chrUSrc,
471 const int16_t **chrVSrc, int chrFilterSize,
472 const int16_t **alpSrc, uint8_t *dest, int dstW,
473 int y, enum PixelFormat target)
475 const uint8_t * const d128=dither_8x8_220[y&7];
476 uint8_t *g = c->table_gU[128] + c->table_gV[128];
480 for (i = 0; i < dstW - 1; i += 2) {
485 for (j = 0; j < lumFilterSize; j++) {
486 Y1 += lumSrc[j][i] * lumFilter[j];
487 Y2 += lumSrc[j][i+1] * lumFilter[j];
491 if ((Y1 | Y2) & 0x100) {
492 Y1 = av_clip_uint8(Y1);
493 Y2 = av_clip_uint8(Y2);
495 acc += acc + g[Y1 + d128[(i + 0) & 7]];
496 acc += acc + g[Y2 + d128[(i + 1) & 7]];
498 output_pixel(*dest++, acc);
503 static av_always_inline void
504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505 const int16_t *ubuf[2], const int16_t *vbuf[2],
506 const int16_t *abuf[2], uint8_t *dest, int dstW,
507 int yalpha, int uvalpha, int y,
508 enum PixelFormat target)
510 const int16_t *buf0 = buf[0], *buf1 = buf[1];
511 const uint8_t * const d128 = dither_8x8_220[y & 7];
512 uint8_t *g = c->table_gU[128] + c->table_gV[128];
513 int yalpha1 = 4095 - yalpha;
516 for (i = 0; i < dstW - 7; i += 8) {
517 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525 output_pixel(*dest++, acc);
529 static av_always_inline void
530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531 const int16_t *ubuf[2], const int16_t *vbuf[2],
532 const int16_t *abuf0, uint8_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
535 const uint8_t * const d128 = dither_8x8_220[y & 7];
536 uint8_t *g = c->table_gU[128] + c->table_gV[128];
539 for (i = 0; i < dstW - 7; i += 8) {
540 int acc = g[(buf0[i ] >> 7) + d128[0]];
541 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548 output_pixel(*dest++, acc);
554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556 const int16_t **lumSrc, int lumFilterSize, \
557 const int16_t *chrFilter, const int16_t **chrUSrc, \
558 const int16_t **chrVSrc, int chrFilterSize, \
559 const int16_t **alpSrc, uint8_t *dest, int dstW, \
562 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564 alpSrc, dest, dstW, y, fmt); \
567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568 const int16_t *ubuf[2], const int16_t *vbuf[2], \
569 const int16_t *abuf[2], uint8_t *dest, int dstW, \
570 int yalpha, int uvalpha, int y) \
572 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573 dest, dstW, yalpha, uvalpha, y, fmt); \
576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577 const int16_t *ubuf[2], const int16_t *vbuf[2], \
578 const int16_t *abuf0, uint8_t *dest, int dstW, \
579 int uvalpha, int y) \
581 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582 abuf0, dest, dstW, uvalpha, \
586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
589 #define output_pixels(pos, Y1, U, Y2, V) \
590 if (target == PIX_FMT_YUYV422) { \
591 dest[pos + 0] = Y1; \
593 dest[pos + 2] = Y2; \
597 dest[pos + 1] = Y1; \
599 dest[pos + 3] = Y2; \
602 static av_always_inline void
603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604 const int16_t **lumSrc, int lumFilterSize,
605 const int16_t *chrFilter, const int16_t **chrUSrc,
606 const int16_t **chrVSrc, int chrFilterSize,
607 const int16_t **alpSrc, uint8_t *dest, int dstW,
608 int y, enum PixelFormat target)
612 for (i = 0; i < (dstW >> 1); i++) {
619 for (j = 0; j < lumFilterSize; j++) {
620 Y1 += lumSrc[j][i * 2] * lumFilter[j];
621 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
623 for (j = 0; j < chrFilterSize; j++) {
624 U += chrUSrc[j][i] * chrFilter[j];
625 V += chrVSrc[j][i] * chrFilter[j];
631 if ((Y1 | Y2 | U | V) & 0x100) {
632 Y1 = av_clip_uint8(Y1);
633 Y2 = av_clip_uint8(Y2);
634 U = av_clip_uint8(U);
635 V = av_clip_uint8(V);
637 output_pixels(4*i, Y1, U, Y2, V);
641 static av_always_inline void
642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643 const int16_t *ubuf[2], const int16_t *vbuf[2],
644 const int16_t *abuf[2], uint8_t *dest, int dstW,
645 int yalpha, int uvalpha, int y,
646 enum PixelFormat target)
648 const int16_t *buf0 = buf[0], *buf1 = buf[1],
649 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651 int yalpha1 = 4095 - yalpha;
652 int uvalpha1 = 4095 - uvalpha;
655 for (i = 0; i < (dstW >> 1); i++) {
656 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
661 output_pixels(i * 4, Y1, U, Y2, V);
665 static av_always_inline void
666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667 const int16_t *ubuf[2], const int16_t *vbuf[2],
668 const int16_t *abuf0, uint8_t *dest, int dstW,
669 int uvalpha, int y, enum PixelFormat target)
671 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
675 if (uvalpha < 2048) {
676 for (i = 0; i < (dstW >> 1); i++) {
677 int Y1 = buf0[i * 2] >> 7;
678 int Y2 = buf0[i * 2 + 1] >> 7;
679 int U = ubuf1[i] >> 7;
680 int V = vbuf1[i] >> 7;
682 output_pixels(i * 4, Y1, U, Y2, V);
685 for (i = 0; i < (dstW >> 1); i++) {
686 int Y1 = buf0[i * 2] >> 7;
687 int Y2 = buf0[i * 2 + 1] >> 7;
688 int U = (ubuf0[i] + ubuf1[i]) >> 8;
689 int V = (vbuf0[i] + vbuf1[i]) >> 8;
691 output_pixels(i * 4, Y1, U, Y2, V);
698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703 #define output_pixel(pos, val) \
704 if (isBE(target)) { \
710 static av_always_inline void
711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712 const int32_t **lumSrc, int lumFilterSize,
713 const int16_t *chrFilter, const int32_t **chrUSrc,
714 const int32_t **chrVSrc, int chrFilterSize,
715 const int32_t **alpSrc, uint16_t *dest, int dstW,
716 int y, enum PixelFormat target)
720 for (i = 0; i < (dstW >> 1); i++) {
722 int Y1 = -0x40000000;
723 int Y2 = -0x40000000;
724 int U = -128 << 23; // 19
728 for (j = 0; j < lumFilterSize; j++) {
729 Y1 += lumSrc[j][i * 2] * lumFilter[j];
730 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
732 for (j = 0; j < chrFilterSize; j++) {
733 U += chrUSrc[j][i] * chrFilter[j];
734 V += chrVSrc[j][i] * chrFilter[j];
737 // 8bit: 12+15=27; 16-bit: 12+19=31
745 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746 Y1 -= c->yuv2rgb_y_offset;
747 Y2 -= c->yuv2rgb_y_offset;
748 Y1 *= c->yuv2rgb_y_coeff;
749 Y2 *= c->yuv2rgb_y_coeff;
752 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
754 R = V * c->yuv2rgb_v2r_coeff;
755 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756 B = U * c->yuv2rgb_u2b_coeff;
758 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
769 static av_always_inline void
770 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771 const int32_t *ubuf[2], const int32_t *vbuf[2],
772 const int32_t *abuf[2], uint16_t *dest, int dstW,
773 int yalpha, int uvalpha, int y,
774 enum PixelFormat target)
776 const int32_t *buf0 = buf[0], *buf1 = buf[1],
777 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779 int yalpha1 = 4095 - yalpha;
780 int uvalpha1 = 4095 - uvalpha;
783 for (i = 0; i < (dstW >> 1); i++) {
784 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
790 Y1 -= c->yuv2rgb_y_offset;
791 Y2 -= c->yuv2rgb_y_offset;
792 Y1 *= c->yuv2rgb_y_coeff;
793 Y2 *= c->yuv2rgb_y_coeff;
797 R = V * c->yuv2rgb_v2r_coeff;
798 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799 B = U * c->yuv2rgb_u2b_coeff;
801 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
811 static av_always_inline void
812 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813 const int32_t *ubuf[2], const int32_t *vbuf[2],
814 const int32_t *abuf0, uint16_t *dest, int dstW,
815 int uvalpha, int y, enum PixelFormat target)
817 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
821 if (uvalpha < 2048) {
822 for (i = 0; i < (dstW >> 1); i++) {
823 int Y1 = (buf0[i * 2] ) >> 2;
824 int Y2 = (buf0[i * 2 + 1]) >> 2;
825 int U = (ubuf0[i] + (-128 << 11)) >> 2;
826 int V = (vbuf0[i] + (-128 << 11)) >> 2;
829 Y1 -= c->yuv2rgb_y_offset;
830 Y2 -= c->yuv2rgb_y_offset;
831 Y1 *= c->yuv2rgb_y_coeff;
832 Y2 *= c->yuv2rgb_y_coeff;
836 R = V * c->yuv2rgb_v2r_coeff;
837 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838 B = U * c->yuv2rgb_u2b_coeff;
840 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
849 for (i = 0; i < (dstW >> 1); i++) {
850 int Y1 = (buf0[i * 2] ) >> 2;
851 int Y2 = (buf0[i * 2 + 1]) >> 2;
852 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
856 Y1 -= c->yuv2rgb_y_offset;
857 Y2 -= c->yuv2rgb_y_offset;
858 Y1 *= c->yuv2rgb_y_coeff;
859 Y2 *= c->yuv2rgb_y_coeff;
863 R = V * c->yuv2rgb_v2r_coeff;
864 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865 B = U * c->yuv2rgb_u2b_coeff;
867 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
888 * Write out 2 RGB pixels in the target pixel format. This function takes a
889 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890 * things like endianness conversion and shifting. The caller takes care of
891 * setting the correct offset in these tables from the chroma (U/V) values.
892 * This function then uses the luminance (Y1/Y2) values to write out the
893 * correct RGB values into the destination buffer.
895 static av_always_inline void
896 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897 unsigned A1, unsigned A2,
898 const void *_r, const void *_g, const void *_b, int y,
899 enum PixelFormat target, int hasAlpha)
901 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903 uint32_t *dest = (uint32_t *) _dest;
904 const uint32_t *r = (const uint32_t *) _r;
905 const uint32_t *g = (const uint32_t *) _g;
906 const uint32_t *b = (const uint32_t *) _b;
909 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
911 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
915 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
917 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
920 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
924 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925 uint8_t *dest = (uint8_t *) _dest;
926 const uint8_t *r = (const uint8_t *) _r;
927 const uint8_t *g = (const uint8_t *) _g;
928 const uint8_t *b = (const uint8_t *) _b;
930 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
931 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
932 dest[i * 6 + 0] = r_b[Y1];
933 dest[i * 6 + 1] = g[Y1];
934 dest[i * 6 + 2] = b_r[Y1];
935 dest[i * 6 + 3] = r_b[Y2];
936 dest[i * 6 + 4] = g[Y2];
937 dest[i * 6 + 5] = b_r[Y2];
940 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943 uint16_t *dest = (uint16_t *) _dest;
944 const uint16_t *r = (const uint16_t *) _r;
945 const uint16_t *g = (const uint16_t *) _g;
946 const uint16_t *b = (const uint16_t *) _b;
947 int dr1, dg1, db1, dr2, dg2, db2;
949 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950 dr1 = dither_2x2_8[ y & 1 ][0];
951 dg1 = dither_2x2_4[ y & 1 ][0];
952 db1 = dither_2x2_8[(y & 1) ^ 1][0];
953 dr2 = dither_2x2_8[ y & 1 ][1];
954 dg2 = dither_2x2_4[ y & 1 ][1];
955 db2 = dither_2x2_8[(y & 1) ^ 1][1];
956 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957 dr1 = dither_2x2_8[ y & 1 ][0];
958 dg1 = dither_2x2_8[ y & 1 ][1];
959 db1 = dither_2x2_8[(y & 1) ^ 1][0];
960 dr2 = dither_2x2_8[ y & 1 ][1];
961 dg2 = dither_2x2_8[ y & 1 ][0];
962 db2 = dither_2x2_8[(y & 1) ^ 1][1];
964 dr1 = dither_4x4_16[ y & 3 ][0];
965 dg1 = dither_4x4_16[ y & 3 ][1];
966 db1 = dither_4x4_16[(y & 3) ^ 3][0];
967 dr2 = dither_4x4_16[ y & 3 ][1];
968 dg2 = dither_4x4_16[ y & 3 ][0];
969 db2 = dither_4x4_16[(y & 3) ^ 3][1];
972 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974 } else /* 8/4-bit */ {
975 uint8_t *dest = (uint8_t *) _dest;
976 const uint8_t *r = (const uint8_t *) _r;
977 const uint8_t *g = (const uint8_t *) _g;
978 const uint8_t *b = (const uint8_t *) _b;
979 int dr1, dg1, db1, dr2, dg2, db2;
981 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982 const uint8_t * const d64 = dither_8x8_73[y & 7];
983 const uint8_t * const d32 = dither_8x8_32[y & 7];
984 dr1 = dg1 = d32[(i * 2 + 0) & 7];
985 db1 = d64[(i * 2 + 0) & 7];
986 dr2 = dg2 = d32[(i * 2 + 1) & 7];
987 db2 = d64[(i * 2 + 1) & 7];
989 const uint8_t * const d64 = dither_8x8_73 [y & 7];
990 const uint8_t * const d128 = dither_8x8_220[y & 7];
991 dr1 = db1 = d128[(i * 2 + 0) & 7];
992 dg1 = d64[(i * 2 + 0) & 7];
993 dr2 = db2 = d128[(i * 2 + 1) & 7];
994 dg2 = d64[(i * 2 + 1) & 7];
997 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1001 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1007 static av_always_inline void
1008 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009 const int16_t **lumSrc, int lumFilterSize,
1010 const int16_t *chrFilter, const int16_t **chrUSrc,
1011 const int16_t **chrVSrc, int chrFilterSize,
1012 const int16_t **alpSrc, uint8_t *dest, int dstW,
1013 int y, enum PixelFormat target, int hasAlpha)
1017 for (i = 0; i < (dstW >> 1); i++) {
1023 int av_unused A1, A2;
1024 const void *r, *g, *b;
1026 for (j = 0; j < lumFilterSize; j++) {
1027 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1028 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1030 for (j = 0; j < chrFilterSize; j++) {
1031 U += chrUSrc[j][i] * chrFilter[j];
1032 V += chrVSrc[j][i] * chrFilter[j];
1038 if ((Y1 | Y2 | U | V) & 0x100) {
1039 Y1 = av_clip_uint8(Y1);
1040 Y2 = av_clip_uint8(Y2);
1041 U = av_clip_uint8(U);
1042 V = av_clip_uint8(V);
1047 for (j = 0; j < lumFilterSize; j++) {
1048 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1049 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1053 if ((A1 | A2) & 0x100) {
1054 A1 = av_clip_uint8(A1);
1055 A2 = av_clip_uint8(A2);
1059 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1061 g = (c->table_gU[U] + c->table_gV[V]);
1064 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065 r, g, b, y, target, hasAlpha);
1069 static av_always_inline void
1070 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071 const int16_t *ubuf[2], const int16_t *vbuf[2],
1072 const int16_t *abuf[2], uint8_t *dest, int dstW,
1073 int yalpha, int uvalpha, int y,
1074 enum PixelFormat target, int hasAlpha)
1076 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1077 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079 *abuf0 = hasAlpha ? abuf[0] : NULL,
1080 *abuf1 = hasAlpha ? abuf[1] : NULL;
1081 int yalpha1 = 4095 - yalpha;
1082 int uvalpha1 = 4095 - uvalpha;
1085 for (i = 0; i < (dstW >> 1); i++) {
1086 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1087 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1088 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1089 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1091 const void *r = c->table_rV[V],
1092 *g = (c->table_gU[U] + c->table_gV[V]),
1093 *b = c->table_bU[U];
1096 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1097 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1100 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1101 r, g, b, y, target, hasAlpha);
1105 static av_always_inline void
1106 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1107 const int16_t *ubuf[2], const int16_t *vbuf[2],
1108 const int16_t *abuf0, uint8_t *dest, int dstW,
1109 int uvalpha, int y, enum PixelFormat target,
1112 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1113 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1116 if (uvalpha < 2048) {
1117 for (i = 0; i < (dstW >> 1); i++) {
1118 int Y1 = buf0[i * 2] >> 7;
1119 int Y2 = buf0[i * 2 + 1] >> 7;
1120 int U = ubuf1[i] >> 7;
1121 int V = vbuf1[i] >> 7;
1123 const void *r = c->table_rV[V],
1124 *g = (c->table_gU[U] + c->table_gV[V]),
1125 *b = c->table_bU[U];
1128 A1 = abuf0[i * 2 ] >> 7;
1129 A2 = abuf0[i * 2 + 1] >> 7;
1132 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1133 r, g, b, y, target, hasAlpha);
1136 for (i = 0; i < (dstW >> 1); i++) {
1137 int Y1 = buf0[i * 2] >> 7;
1138 int Y2 = buf0[i * 2 + 1] >> 7;
1139 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1140 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1142 const void *r = c->table_rV[V],
1143 *g = (c->table_gU[U] + c->table_gV[V]),
1144 *b = c->table_bU[U];
1147 A1 = abuf0[i * 2 ] >> 7;
1148 A2 = abuf0[i * 2 + 1] >> 7;
1151 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152 r, g, b, y, target, hasAlpha);
1157 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159 const int16_t **lumSrc, int lumFilterSize, \
1160 const int16_t *chrFilter, const int16_t **chrUSrc, \
1161 const int16_t **chrVSrc, int chrFilterSize, \
1162 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1165 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1169 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174 int yalpha, int uvalpha, int y) \
1176 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1180 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182 const int16_t *abuf0, uint8_t *dest, int dstW, \
1183 int uvalpha, int y) \
1185 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186 dstW, uvalpha, y, fmt, hasAlpha); \
1190 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1191 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1193 #if CONFIG_SWSCALE_ALPHA
1194 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1195 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1197 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1198 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1200 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1201 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1202 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1203 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1204 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1205 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1206 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1207 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1209 static av_always_inline void
1210 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1211 const int16_t **lumSrc, int lumFilterSize,
1212 const int16_t *chrFilter, const int16_t **chrUSrc,
1213 const int16_t **chrVSrc, int chrFilterSize,
1214 const int16_t **alpSrc, uint8_t *dest,
1215 int dstW, int y, enum PixelFormat target, int hasAlpha)
1218 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1220 for (i = 0; i < dstW; i++) {
1228 for (j = 0; j < lumFilterSize; j++) {
1229 Y += lumSrc[j][i] * lumFilter[j];
1231 for (j = 0; j < chrFilterSize; j++) {
1232 U += chrUSrc[j][i] * chrFilter[j];
1233 V += chrVSrc[j][i] * chrFilter[j];
1240 for (j = 0; j < lumFilterSize; j++) {
1241 A += alpSrc[j][i] * lumFilter[j];
1245 A = av_clip_uint8(A);
1247 Y -= c->yuv2rgb_y_offset;
1248 Y *= c->yuv2rgb_y_coeff;
1250 R = Y + V*c->yuv2rgb_v2r_coeff;
1251 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252 B = Y + U*c->yuv2rgb_u2b_coeff;
1253 if ((R | G | B) & 0xC0000000) {
1254 R = av_clip_uintp2(R, 30);
1255 G = av_clip_uintp2(G, 30);
1256 B = av_clip_uintp2(B, 30);
1261 dest[0] = hasAlpha ? A : 255;
1275 dest[3] = hasAlpha ? A : 255;
1278 dest[0] = hasAlpha ? A : 255;
1293 dest[3] = hasAlpha ? A : 255;
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1306 #if CONFIG_SWSCALE_ALPHA
1307 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1308 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1309 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1310 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1312 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1313 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1314 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1315 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1317 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1318 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1320 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1321 int width, int height,
1325 uint8_t *ptr = plane + stride*y;
1326 for (i=0; i<height; i++) {
1327 memset(ptr, val, width);
1332 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1334 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1335 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1337 static av_always_inline void
1338 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1339 enum PixelFormat origin)
1342 for (i = 0; i < width; i++) {
1343 unsigned int r_b = input_pixel(&src[i*3+0]);
1344 unsigned int g = input_pixel(&src[i*3+1]);
1345 unsigned int b_r = input_pixel(&src[i*3+2]);
1347 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1351 static av_always_inline void
1352 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1353 const uint16_t *src1, const uint16_t *src2,
1354 int width, enum PixelFormat origin)
1358 for (i = 0; i < width; i++) {
1359 int r_b = input_pixel(&src1[i*3+0]);
1360 int g = input_pixel(&src1[i*3+1]);
1361 int b_r = input_pixel(&src1[i*3+2]);
1363 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1368 static av_always_inline void
1369 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1370 const uint16_t *src1, const uint16_t *src2,
1371 int width, enum PixelFormat origin)
1375 for (i = 0; i < width; i++) {
1376 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1377 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1378 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1380 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 #define rgb48funcs(pattern, BE_LE, origin) \
1390 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1391 int width, uint32_t *unused) \
1393 const uint16_t *src = (const uint16_t *) _src; \
1394 uint16_t *dst = (uint16_t *) _dst; \
1395 rgb48ToY_c_template(dst, src, width, origin); \
1398 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1399 const uint8_t *_src1, const uint8_t *_src2, \
1400 int width, uint32_t *unused) \
1402 const uint16_t *src1 = (const uint16_t *) _src1, \
1403 *src2 = (const uint16_t *) _src2; \
1404 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1408 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1409 const uint8_t *_src1, const uint8_t *_src2, \
1410 int width, uint32_t *unused) \
1412 const uint16_t *src1 = (const uint16_t *) _src1, \
1413 *src2 = (const uint16_t *) _src2; \
1414 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1415 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1418 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1419 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1420 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1421 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1423 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1424 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1425 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1427 static av_always_inline void
1428 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1429 int width, enum PixelFormat origin,
1430 int shr, int shg, int shb, int shp,
1431 int maskr, int maskg, int maskb,
1432 int rsh, int gsh, int bsh, int S)
1434 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1435 const unsigned rnd = 33u << (S - 1);
1438 for (i = 0; i < width; i++) {
1439 int px = input_pixel(i) >> shp;
1440 int b = (px & maskb) >> shb;
1441 int g = (px & maskg) >> shg;
1442 int r = (px & maskr) >> shr;
1444 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1448 static av_always_inline void
1449 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1450 const uint8_t *src, int width,
1451 enum PixelFormat origin,
1452 int shr, int shg, int shb, int shp,
1453 int maskr, int maskg, int maskb,
1454 int rsh, int gsh, int bsh, int S)
1456 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1457 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1458 const unsigned rnd = 257u << (S - 1);
1461 for (i = 0; i < width; i++) {
1462 int px = input_pixel(i) >> shp;
1463 int b = (px & maskb) >> shb;
1464 int g = (px & maskg) >> shg;
1465 int r = (px & maskr) >> shr;
1467 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1468 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1472 static av_always_inline void
1473 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1474 const uint8_t *src, int width,
1475 enum PixelFormat origin,
1476 int shr, int shg, int shb, int shp,
1477 int maskr, int maskg, int maskb,
1478 int rsh, int gsh, int bsh, int S)
1480 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1481 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1482 maskgx = ~(maskr | maskb);
1483 const unsigned rnd = 257u << S;
1486 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487 for (i = 0; i < width; i++) {
1488 int px0 = input_pixel(2 * i + 0) >> shp;
1489 int px1 = input_pixel(2 * i + 1) >> shp;
1490 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491 int rb = px0 + px1 - g;
1493 b = (rb & maskb) >> shb;
1494 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1498 g = (g & maskg) >> shg;
1500 r = (rb & maskr) >> shr;
1502 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1509 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510 maskg, maskb, rsh, gsh, bsh, S) \
1511 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512 int width, uint32_t *unused) \
1514 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1518 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519 const uint8_t *src, const uint8_t *dummy, \
1520 int width, uint32_t *unused) \
1522 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1526 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527 const uint8_t *src, const uint8_t *dummy, \
1528 int width, uint32_t *unused) \
1530 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1534 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1535 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1536 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1537 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1538 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1539 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1540 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1541 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1542 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1543 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1544 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1545 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1546 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1547 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1548 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1549 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1551 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1554 for (i=0; i<width; i++) {
1559 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1562 for (i=0; i<width; i++) {
1567 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1570 for (i=0; i<width; i++) {
1573 dst[i]= pal[d] & 0xFF;
1577 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1578 const uint8_t *src1, const uint8_t *src2,
1579 int width, uint32_t *pal)
1582 assert(src1 == src2);
1583 for (i=0; i<width; i++) {
1584 int p= pal[src1[i]];
1591 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1592 int width, uint32_t *unused)
1595 for (i=0; i<width/8; i++) {
1598 dst[8*i+j]= ((d>>(7-j))&1)*255;
1602 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1603 int width, uint32_t *unused)
1606 for (i=0; i<width/8; i++) {
1609 dst[8*i+j]= ((d>>(7-j))&1)*255;
1613 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1617 for (i=0; i<width; i++)
1621 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1622 const uint8_t *src2, int width, uint32_t *unused)
1625 for (i=0; i<width; i++) {
1626 dstU[i]= src1[4*i + 1];
1627 dstV[i]= src1[4*i + 3];
1629 assert(src1 == src2);
1632 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1635 const uint16_t *src = (const uint16_t *) _src;
1636 uint16_t *dst = (uint16_t *) _dst;
1637 for (i=0; i<width; i++) {
1638 dst[i] = av_bswap16(src[i]);
1642 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1643 const uint8_t *_src2, int width, uint32_t *unused)
1646 const uint16_t *src1 = (const uint16_t *) _src1,
1647 *src2 = (const uint16_t *) _src2;
1648 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1649 for (i=0; i<width; i++) {
1650 dstU[i] = av_bswap16(src1[i]);
1651 dstV[i] = av_bswap16(src2[i]);
1655 /* This is almost identical to the previous, end exists only because
1656 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1657 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1661 for (i=0; i<width; i++)
1665 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1666 const uint8_t *src2, int width, uint32_t *unused)
1669 for (i=0; i<width; i++) {
1670 dstU[i]= src1[4*i + 0];
1671 dstV[i]= src1[4*i + 2];
1673 assert(src1 == src2);
1676 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1677 const uint8_t *src, int width)
1680 for (i = 0; i < width; i++) {
1681 dst1[i] = src[2*i+0];
1682 dst2[i] = src[2*i+1];
1686 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1687 const uint8_t *src1, const uint8_t *src2,
1688 int width, uint32_t *unused)
1690 nvXXtoUV_c(dstU, dstV, src1, width);
1693 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1694 const uint8_t *src1, const uint8_t *src2,
1695 int width, uint32_t *unused)
1697 nvXXtoUV_c(dstV, dstU, src1, width);
1700 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1702 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1703 int width, uint32_t *unused)
1706 for (i=0; i<width; i++) {
1711 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1715 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1716 const uint8_t *src2, int width, uint32_t *unused)
1719 for (i=0; i<width; i++) {
1720 int b= src1[3*i + 0];
1721 int g= src1[3*i + 1];
1722 int r= src1[3*i + 2];
1724 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1725 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1727 assert(src1 == src2);
1730 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1731 const uint8_t *src2, int width, uint32_t *unused)
1734 for (i=0; i<width; i++) {
1735 int b= src1[6*i + 0] + src1[6*i + 3];
1736 int g= src1[6*i + 1] + src1[6*i + 4];
1737 int r= src1[6*i + 2] + src1[6*i + 5];
1739 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1740 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1742 assert(src1 == src2);
1745 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1749 for (i=0; i<width; i++) {
1754 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1758 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1759 const uint8_t *src2, int width, uint32_t *unused)
1763 for (i=0; i<width; i++) {
1764 int r= src1[3*i + 0];
1765 int g= src1[3*i + 1];
1766 int b= src1[3*i + 2];
1768 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1769 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1773 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1774 const uint8_t *src2, int width, uint32_t *unused)
1778 for (i=0; i<width; i++) {
1779 int r= src1[6*i + 0] + src1[6*i + 3];
1780 int g= src1[6*i + 1] + src1[6*i + 4];
1781 int b= src1[6*i + 2] + src1[6*i + 5];
1783 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1784 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1788 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1791 for (i = 0; i < width; i++) {
1796 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1800 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1803 const uint16_t **src = (const uint16_t **) _src;
1804 uint16_t *dst = (uint16_t *) _dst;
1805 for (i = 0; i < width; i++) {
1806 int g = AV_RL16(src[0] + i);
1807 int b = AV_RL16(src[1] + i);
1808 int r = AV_RL16(src[2] + i);
1810 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1814 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1817 const uint16_t **src = (const uint16_t **) _src;
1818 uint16_t *dst = (uint16_t *) _dst;
1819 for (i = 0; i < width; i++) {
1820 int g = AV_RB16(src[0] + i);
1821 int b = AV_RB16(src[1] + i);
1822 int r = AV_RB16(src[2] + i);
1824 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1828 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1831 for (i = 0; i < width; i++) {
1836 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1837 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1841 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1844 const uint16_t **src = (const uint16_t **) _src;
1845 uint16_t *dstU = (uint16_t *) _dstU;
1846 uint16_t *dstV = (uint16_t *) _dstV;
1847 for (i = 0; i < width; i++) {
1848 int g = AV_RL16(src[0] + i);
1849 int b = AV_RL16(src[1] + i);
1850 int r = AV_RL16(src[2] + i);
1852 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1853 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1857 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1860 const uint16_t **src = (const uint16_t **) _src;
1861 uint16_t *dstU = (uint16_t *) _dstU;
1862 uint16_t *dstV = (uint16_t *) _dstV;
1863 for (i = 0; i < width; i++) {
1864 int g = AV_RB16(src[0] + i);
1865 int b = AV_RB16(src[1] + i);
1866 int r = AV_RB16(src[2] + i);
1868 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1869 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1873 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1874 const int16_t *filter,
1875 const int16_t *filterPos, int filterSize)
1878 int32_t *dst = (int32_t *) _dst;
1879 const uint16_t *src = (const uint16_t *) _src;
1880 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1883 for (i = 0; i < dstW; i++) {
1885 int srcPos = filterPos[i];
1888 for (j = 0; j < filterSize; j++) {
1889 val += src[srcPos + j] * filter[filterSize * i + j];
1891 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1892 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1896 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1897 const int16_t *filter,
1898 const int16_t *filterPos, int filterSize)
1901 const uint16_t *src = (const uint16_t *) _src;
1902 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1904 for (i = 0; i < dstW; i++) {
1906 int srcPos = filterPos[i];
1909 for (j = 0; j < filterSize; j++) {
1910 val += src[srcPos + j] * filter[filterSize * i + j];
1912 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1913 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1917 // bilinear / bicubic scaling
1918 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1919 const int16_t *filter, const int16_t *filterPos,
1923 for (i=0; i<dstW; i++) {
1925 int srcPos= filterPos[i];
1927 for (j=0; j<filterSize; j++) {
1928 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1930 //filter += hFilterSize;
1931 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1936 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1937 const int16_t *filter, const int16_t *filterPos,
1941 int32_t *dst = (int32_t *) _dst;
1942 for (i=0; i<dstW; i++) {
1944 int srcPos= filterPos[i];
1946 for (j=0; j<filterSize; j++) {
1947 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1949 //filter += hFilterSize;
1950 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1955 //FIXME all pal and rgb srcFormats could do this convertion as well
1956 //FIXME all scalers more complex than bilinear could do half of this transform
1957 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1960 for (i = 0; i < width; i++) {
1961 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1962 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1965 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1968 for (i = 0; i < width; i++) {
1969 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1970 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1973 static void lumRangeToJpeg_c(int16_t *dst, int width)
1976 for (i = 0; i < width; i++)
1977 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1979 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1982 for (i = 0; i < width; i++)
1983 dst[i] = (dst[i]*14071 + 33561947)>>14;
1986 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1989 int32_t *dstU = (int32_t *) _dstU;
1990 int32_t *dstV = (int32_t *) _dstV;
1991 for (i = 0; i < width; i++) {
1992 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1993 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1996 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1999 int32_t *dstU = (int32_t *) _dstU;
2000 int32_t *dstV = (int32_t *) _dstV;
2001 for (i = 0; i < width; i++) {
2002 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2003 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2006 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2009 int32_t *dst = (int32_t *) _dst;
2010 for (i = 0; i < width; i++)
2011 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2013 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2016 int32_t *dst = (int32_t *) _dst;
2017 for (i = 0; i < width; i++)
2018 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2021 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2022 const uint8_t *src, int srcW, int xInc)
2025 unsigned int xpos=0;
2026 for (i=0;i<dstWidth;i++) {
2027 register unsigned int xx=xpos>>16;
2028 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2029 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2034 // *** horizontal scale Y line to temp buffer
2035 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2036 const uint8_t *src_in[4], int srcW, int xInc,
2037 const int16_t *hLumFilter,
2038 const int16_t *hLumFilterPos, int hLumFilterSize,
2039 uint8_t *formatConvBuffer,
2040 uint32_t *pal, int isAlpha)
2042 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2043 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2044 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2047 toYV12(formatConvBuffer, src, srcW, pal);
2048 src= formatConvBuffer;
2049 } else if (c->readLumPlanar && !isAlpha) {
2050 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2051 src = formatConvBuffer;
2054 if (!c->hyscale_fast) {
2055 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2056 } else { // fast bilinear upscale / crap downscale
2057 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2061 convertRange(dst, dstWidth);
2064 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2065 int dstWidth, const uint8_t *src1,
2066 const uint8_t *src2, int srcW, int xInc)
2069 unsigned int xpos=0;
2070 for (i=0;i<dstWidth;i++) {
2071 register unsigned int xx=xpos>>16;
2072 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2073 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2074 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2079 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2080 const uint8_t *src_in[4],
2081 int srcW, int xInc, const int16_t *hChrFilter,
2082 const int16_t *hChrFilterPos, int hChrFilterSize,
2083 uint8_t *formatConvBuffer, uint32_t *pal)
2085 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2087 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2088 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2089 src1= formatConvBuffer;
2091 } else if (c->readChrPlanar) {
2092 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2093 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2094 src1= formatConvBuffer;
2098 if (!c->hcscale_fast) {
2099 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2100 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2101 } else { // fast bilinear upscale / crap downscale
2102 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2105 if (c->chrConvertRange)
2106 c->chrConvertRange(dst1, dst2, dstWidth);
2109 static av_always_inline void
2110 find_c_packed_planar_out_funcs(SwsContext *c,
2111 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2112 yuv2interleavedX_fn *yuv2nv12cX,
2113 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2114 yuv2packedX_fn *yuv2packedX)
2116 enum PixelFormat dstFormat = c->dstFormat;
2118 if (is16BPS(dstFormat)) {
2119 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2120 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2121 } else if (is9_OR_10BPS(dstFormat)) {
2122 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2123 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2124 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2126 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2127 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2130 *yuv2plane1 = yuv2plane1_8_c;
2131 *yuv2planeX = yuv2planeX_8_c;
2132 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2133 *yuv2nv12cX = yuv2nv12cX_c;
2136 if(c->flags & SWS_FULL_CHR_H_INT) {
2137 switch (dstFormat) {
2140 *yuv2packedX = yuv2rgba32_full_X_c;
2142 #if CONFIG_SWSCALE_ALPHA
2144 *yuv2packedX = yuv2rgba32_full_X_c;
2146 #endif /* CONFIG_SWSCALE_ALPHA */
2148 *yuv2packedX = yuv2rgbx32_full_X_c;
2150 #endif /* !CONFIG_SMALL */
2154 *yuv2packedX = yuv2argb32_full_X_c;
2156 #if CONFIG_SWSCALE_ALPHA
2158 *yuv2packedX = yuv2argb32_full_X_c;
2160 #endif /* CONFIG_SWSCALE_ALPHA */
2162 *yuv2packedX = yuv2xrgb32_full_X_c;
2164 #endif /* !CONFIG_SMALL */
2168 *yuv2packedX = yuv2bgra32_full_X_c;
2170 #if CONFIG_SWSCALE_ALPHA
2172 *yuv2packedX = yuv2bgra32_full_X_c;
2174 #endif /* CONFIG_SWSCALE_ALPHA */
2176 *yuv2packedX = yuv2bgrx32_full_X_c;
2178 #endif /* !CONFIG_SMALL */
2182 *yuv2packedX = yuv2abgr32_full_X_c;
2184 #if CONFIG_SWSCALE_ALPHA
2186 *yuv2packedX = yuv2abgr32_full_X_c;
2188 #endif /* CONFIG_SWSCALE_ALPHA */
2190 *yuv2packedX = yuv2xbgr32_full_X_c;
2192 #endif /* !CONFIG_SMALL */
2195 *yuv2packedX = yuv2rgb24_full_X_c;
2198 *yuv2packedX = yuv2bgr24_full_X_c;
2202 switch (dstFormat) {
2203 case PIX_FMT_RGB48LE:
2204 *yuv2packed1 = yuv2rgb48le_1_c;
2205 *yuv2packed2 = yuv2rgb48le_2_c;
2206 *yuv2packedX = yuv2rgb48le_X_c;
2208 case PIX_FMT_RGB48BE:
2209 *yuv2packed1 = yuv2rgb48be_1_c;
2210 *yuv2packed2 = yuv2rgb48be_2_c;
2211 *yuv2packedX = yuv2rgb48be_X_c;
2213 case PIX_FMT_BGR48LE:
2214 *yuv2packed1 = yuv2bgr48le_1_c;
2215 *yuv2packed2 = yuv2bgr48le_2_c;
2216 *yuv2packedX = yuv2bgr48le_X_c;
2218 case PIX_FMT_BGR48BE:
2219 *yuv2packed1 = yuv2bgr48be_1_c;
2220 *yuv2packed2 = yuv2bgr48be_2_c;
2221 *yuv2packedX = yuv2bgr48be_X_c;
2226 *yuv2packed1 = yuv2rgb32_1_c;
2227 *yuv2packed2 = yuv2rgb32_2_c;
2228 *yuv2packedX = yuv2rgb32_X_c;
2230 #if CONFIG_SWSCALE_ALPHA
2232 *yuv2packed1 = yuv2rgba32_1_c;
2233 *yuv2packed2 = yuv2rgba32_2_c;
2234 *yuv2packedX = yuv2rgba32_X_c;
2236 #endif /* CONFIG_SWSCALE_ALPHA */
2238 *yuv2packed1 = yuv2rgbx32_1_c;
2239 *yuv2packed2 = yuv2rgbx32_2_c;
2240 *yuv2packedX = yuv2rgbx32_X_c;
2242 #endif /* !CONFIG_SMALL */
2244 case PIX_FMT_RGB32_1:
2245 case PIX_FMT_BGR32_1:
2247 *yuv2packed1 = yuv2rgb32_1_1_c;
2248 *yuv2packed2 = yuv2rgb32_1_2_c;
2249 *yuv2packedX = yuv2rgb32_1_X_c;
2251 #if CONFIG_SWSCALE_ALPHA
2253 *yuv2packed1 = yuv2rgba32_1_1_c;
2254 *yuv2packed2 = yuv2rgba32_1_2_c;
2255 *yuv2packedX = yuv2rgba32_1_X_c;
2257 #endif /* CONFIG_SWSCALE_ALPHA */
2259 *yuv2packed1 = yuv2rgbx32_1_1_c;
2260 *yuv2packed2 = yuv2rgbx32_1_2_c;
2261 *yuv2packedX = yuv2rgbx32_1_X_c;
2263 #endif /* !CONFIG_SMALL */
2266 *yuv2packed1 = yuv2rgb24_1_c;
2267 *yuv2packed2 = yuv2rgb24_2_c;
2268 *yuv2packedX = yuv2rgb24_X_c;
2271 *yuv2packed1 = yuv2bgr24_1_c;
2272 *yuv2packed2 = yuv2bgr24_2_c;
2273 *yuv2packedX = yuv2bgr24_X_c;
2275 case PIX_FMT_RGB565LE:
2276 case PIX_FMT_RGB565BE:
2277 case PIX_FMT_BGR565LE:
2278 case PIX_FMT_BGR565BE:
2279 *yuv2packed1 = yuv2rgb16_1_c;
2280 *yuv2packed2 = yuv2rgb16_2_c;
2281 *yuv2packedX = yuv2rgb16_X_c;
2283 case PIX_FMT_RGB555LE:
2284 case PIX_FMT_RGB555BE:
2285 case PIX_FMT_BGR555LE:
2286 case PIX_FMT_BGR555BE:
2287 *yuv2packed1 = yuv2rgb15_1_c;
2288 *yuv2packed2 = yuv2rgb15_2_c;
2289 *yuv2packedX = yuv2rgb15_X_c;
2291 case PIX_FMT_RGB444LE:
2292 case PIX_FMT_RGB444BE:
2293 case PIX_FMT_BGR444LE:
2294 case PIX_FMT_BGR444BE:
2295 *yuv2packed1 = yuv2rgb12_1_c;
2296 *yuv2packed2 = yuv2rgb12_2_c;
2297 *yuv2packedX = yuv2rgb12_X_c;
2301 *yuv2packed1 = yuv2rgb8_1_c;
2302 *yuv2packed2 = yuv2rgb8_2_c;
2303 *yuv2packedX = yuv2rgb8_X_c;
2307 *yuv2packed1 = yuv2rgb4_1_c;
2308 *yuv2packed2 = yuv2rgb4_2_c;
2309 *yuv2packedX = yuv2rgb4_X_c;
2311 case PIX_FMT_RGB4_BYTE:
2312 case PIX_FMT_BGR4_BYTE:
2313 *yuv2packed1 = yuv2rgb4b_1_c;
2314 *yuv2packed2 = yuv2rgb4b_2_c;
2315 *yuv2packedX = yuv2rgb4b_X_c;
2319 switch (dstFormat) {
2320 case PIX_FMT_GRAY16BE:
2321 *yuv2packed1 = yuv2gray16BE_1_c;
2322 *yuv2packed2 = yuv2gray16BE_2_c;
2323 *yuv2packedX = yuv2gray16BE_X_c;
2325 case PIX_FMT_GRAY16LE:
2326 *yuv2packed1 = yuv2gray16LE_1_c;
2327 *yuv2packed2 = yuv2gray16LE_2_c;
2328 *yuv2packedX = yuv2gray16LE_X_c;
2330 case PIX_FMT_MONOWHITE:
2331 *yuv2packed1 = yuv2monowhite_1_c;
2332 *yuv2packed2 = yuv2monowhite_2_c;
2333 *yuv2packedX = yuv2monowhite_X_c;
2335 case PIX_FMT_MONOBLACK:
2336 *yuv2packed1 = yuv2monoblack_1_c;
2337 *yuv2packed2 = yuv2monoblack_2_c;
2338 *yuv2packedX = yuv2monoblack_X_c;
2340 case PIX_FMT_YUYV422:
2341 *yuv2packed1 = yuv2yuyv422_1_c;
2342 *yuv2packed2 = yuv2yuyv422_2_c;
2343 *yuv2packedX = yuv2yuyv422_X_c;
2345 case PIX_FMT_UYVY422:
2346 *yuv2packed1 = yuv2uyvy422_1_c;
2347 *yuv2packed2 = yuv2uyvy422_2_c;
2348 *yuv2packedX = yuv2uyvy422_X_c;
2353 #define DEBUG_SWSCALE_BUFFERS 0
2354 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2356 static int swScale(SwsContext *c, const uint8_t* src[],
2357 int srcStride[], int srcSliceY,
2358 int srcSliceH, uint8_t* dst[], int dstStride[])
2360 /* load a few things into local vars to make the code more readable? and faster */
2361 const int srcW= c->srcW;
2362 const int dstW= c->dstW;
2363 const int dstH= c->dstH;
2364 const int chrDstW= c->chrDstW;
2365 const int chrSrcW= c->chrSrcW;
2366 const int lumXInc= c->lumXInc;
2367 const int chrXInc= c->chrXInc;
2368 const enum PixelFormat dstFormat= c->dstFormat;
2369 const int flags= c->flags;
2370 int16_t *vLumFilterPos= c->vLumFilterPos;
2371 int16_t *vChrFilterPos= c->vChrFilterPos;
2372 int16_t *hLumFilterPos= c->hLumFilterPos;
2373 int16_t *hChrFilterPos= c->hChrFilterPos;
2374 int16_t *vLumFilter= c->vLumFilter;
2375 int16_t *vChrFilter= c->vChrFilter;
2376 int16_t *hLumFilter= c->hLumFilter;
2377 int16_t *hChrFilter= c->hChrFilter;
2378 int32_t *lumMmxFilter= c->lumMmxFilter;
2379 int32_t *chrMmxFilter= c->chrMmxFilter;
2380 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2381 const int vLumFilterSize= c->vLumFilterSize;
2382 const int vChrFilterSize= c->vChrFilterSize;
2383 const int hLumFilterSize= c->hLumFilterSize;
2384 const int hChrFilterSize= c->hChrFilterSize;
2385 int16_t **lumPixBuf= c->lumPixBuf;
2386 int16_t **chrUPixBuf= c->chrUPixBuf;
2387 int16_t **chrVPixBuf= c->chrVPixBuf;
2388 int16_t **alpPixBuf= c->alpPixBuf;
2389 const int vLumBufSize= c->vLumBufSize;
2390 const int vChrBufSize= c->vChrBufSize;
2391 uint8_t *formatConvBuffer= c->formatConvBuffer;
2392 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2393 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2395 uint32_t *pal=c->pal_yuv;
2396 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2397 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2398 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2399 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2400 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2401 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2402 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2404 /* vars which will change and which we need to store back in the context */
2406 int lumBufIndex= c->lumBufIndex;
2407 int chrBufIndex= c->chrBufIndex;
2408 int lastInLumBuf= c->lastInLumBuf;
2409 int lastInChrBuf= c->lastInChrBuf;
2411 if (isPacked(c->srcFormat)) {
2419 srcStride[3]= srcStride[0];
2421 srcStride[1]<<= c->vChrDrop;
2422 srcStride[2]<<= c->vChrDrop;
2424 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2425 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2426 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2427 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2428 srcSliceY, srcSliceH, dstY, dstH);
2429 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2430 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2432 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2433 static int warnedAlready=0; //FIXME move this into the context perhaps
2434 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2435 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2436 " ->cannot do aligned memory accesses anymore\n");
2441 /* Note the user might start scaling the picture in the middle so this
2442 will not get executed. This is not really intended but works
2443 currently, so people might do it. */
2444 if (srcSliceY ==0) {
2452 if (!should_dither) {
2453 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2457 for (;dstY < dstH; dstY++) {
2458 const int chrDstY= dstY>>c->chrDstVSubSample;
2459 uint8_t *dest[4] = {
2460 dst[0] + dstStride[0] * dstY,
2461 dst[1] + dstStride[1] * chrDstY,
2462 dst[2] + dstStride[2] * chrDstY,
2463 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2466 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2467 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2468 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2470 // Last line needed as input
2471 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
2472 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
2473 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
2476 //handle holes (FAST_BILINEAR & weird filters)
2477 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2478 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2479 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2480 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2482 DEBUG_BUFFERS("dstY: %d\n", dstY);
2483 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2484 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2485 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2486 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2488 // Do we have enough lines in this slice to output the dstY line
2489 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2491 if (!enough_lines) {
2492 lastLumSrcY = srcSliceY + srcSliceH - 1;
2493 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2494 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2495 lastLumSrcY, lastChrSrcY);
2498 //Do horizontal scaling
2499 while(lastInLumBuf < lastLumSrcY) {
2500 const uint8_t *src1[4] = {
2501 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2502 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2503 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2504 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2507 assert(lumBufIndex < 2*vLumBufSize);
2508 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2509 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2510 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2511 hLumFilter, hLumFilterPos, hLumFilterSize,
2514 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2515 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2516 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2520 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2521 lumBufIndex, lastInLumBuf);
2523 while(lastInChrBuf < lastChrSrcY) {
2524 const uint8_t *src1[4] = {
2525 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2526 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2527 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2528 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2531 assert(chrBufIndex < 2*vChrBufSize);
2532 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2533 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2534 //FIXME replace parameters through context struct (some at least)
2536 if (c->needs_hcscale)
2537 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2538 chrDstW, src1, chrSrcW, chrXInc,
2539 hChrFilter, hChrFilterPos, hChrFilterSize,
2540 formatConvBuffer, pal);
2542 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2543 chrBufIndex, lastInChrBuf);
2545 //wrap buf index around to stay inside the ring buffer
2546 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2547 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2549 break; //we can't output a dstY line so let's try with the next slice
2552 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2554 if (should_dither) {
2555 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2556 c->lumDither8 = dither_8x8_128[dstY & 7];
2558 if (dstY >= dstH-2) {
2559 // hmm looks like we can't use MMX here without overwriting this array's tail
2560 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2561 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2565 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2566 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2567 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2568 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2570 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2571 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2572 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2573 for (i = 0; i < neg; i++)
2574 tmpY[i] = lumSrcPtr[neg];
2575 for ( ; i < end; i++)
2576 tmpY[i] = lumSrcPtr[i];
2577 for ( ; i < vLumFilterSize; i++)
2578 tmpY[i] = tmpY[i-1];
2582 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2583 for (i = 0; i < neg; i++)
2584 tmpA[i] = alpSrcPtr[neg];
2585 for ( ; i < end; i++)
2586 tmpA[i] = alpSrcPtr[i];
2587 for ( ; i < vLumFilterSize; i++)
2588 tmpA[i] = tmpA[i - 1];
2592 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2593 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2594 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2595 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2596 for (i = 0; i < neg; i++) {
2597 tmpU[i] = chrUSrcPtr[neg];
2598 tmpV[i] = chrVSrcPtr[neg];
2600 for ( ; i < end; i++) {
2601 tmpU[i] = chrUSrcPtr[i];
2602 tmpV[i] = chrVSrcPtr[i];
2604 for ( ; i < vChrFilterSize; i++) {
2605 tmpU[i] = tmpU[i - 1];
2606 tmpV[i] = tmpV[i - 1];
2612 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2613 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2615 if (vLumFilterSize == 1) {
2616 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2618 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2619 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2622 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2624 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2625 } else if (vChrFilterSize == 1) {
2626 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2627 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2629 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2630 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2631 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2632 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2636 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2637 if (vLumFilterSize == 1) {
2638 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2640 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2641 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2645 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2646 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2647 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2648 int chrAlpha = vChrFilter[2 * dstY + 1];
2649 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2650 alpPixBuf ? *alpSrcPtr : NULL,
2651 dest[0], dstW, chrAlpha, dstY);
2652 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2653 int lumAlpha = vLumFilter[2 * dstY + 1];
2654 int chrAlpha = vChrFilter[2 * dstY + 1];
2656 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2658 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2659 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2660 alpPixBuf ? alpSrcPtr : NULL,
2661 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2662 } else { //general RGB
2663 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2664 lumSrcPtr, vLumFilterSize,
2665 vChrFilter + dstY * vChrFilterSize,
2666 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2667 alpSrcPtr, dest[0], dstW, dstY);
2673 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
2674 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2677 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2678 __asm__ volatile("sfence":::"memory");
2682 /* store changed local vars back in the context */
2684 c->lumBufIndex= lumBufIndex;
2685 c->chrBufIndex= chrBufIndex;
2686 c->lastInLumBuf= lastInLumBuf;
2687 c->lastInChrBuf= lastInChrBuf;
2689 return dstY - lastDstY;
2692 static av_cold void sws_init_swScale_c(SwsContext *c)
2694 enum PixelFormat srcFormat = c->srcFormat;
2696 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2697 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2700 c->chrToYV12 = NULL;
2702 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2703 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2704 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2705 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2709 case PIX_FMT_BGR4_BYTE:
2710 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2711 case PIX_FMT_GBRP9LE:
2712 case PIX_FMT_GBRP10LE:
2713 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2714 case PIX_FMT_GBRP9BE:
2715 case PIX_FMT_GBRP10BE:
2716 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2717 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2719 case PIX_FMT_YUV444P9LE:
2720 case PIX_FMT_YUV422P9LE:
2721 case PIX_FMT_YUV420P9LE:
2722 case PIX_FMT_YUV422P10LE:
2723 case PIX_FMT_YUV444P10LE:
2724 case PIX_FMT_YUV420P10LE:
2725 case PIX_FMT_YUV420P16LE:
2726 case PIX_FMT_YUV422P16LE:
2727 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2729 case PIX_FMT_YUV444P9BE:
2730 case PIX_FMT_YUV422P9BE:
2731 case PIX_FMT_YUV420P9BE:
2732 case PIX_FMT_YUV444P10BE:
2733 case PIX_FMT_YUV422P10BE:
2734 case PIX_FMT_YUV420P10BE:
2735 case PIX_FMT_YUV420P16BE:
2736 case PIX_FMT_YUV422P16BE:
2737 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2740 if (c->chrSrcHSubSample) {
2742 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2743 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2744 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2745 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2746 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2747 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2748 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2749 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2750 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2751 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2752 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2753 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
2754 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
2755 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2756 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2757 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2758 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2759 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2760 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2761 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2762 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
2763 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
2767 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2768 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2769 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2770 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2771 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2772 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2773 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2774 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2775 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2776 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2777 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2778 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
2779 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
2780 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2781 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2782 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2783 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2784 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2785 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2786 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2787 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
2788 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
2792 c->lumToYV12 = NULL;
2793 c->alpToYV12 = NULL;
2794 switch (srcFormat) {
2795 case PIX_FMT_GBRP9LE:
2796 case PIX_FMT_GBRP10LE:
2797 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2798 case PIX_FMT_GBRP9BE:
2799 case PIX_FMT_GBRP10BE:
2800 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2801 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2803 case PIX_FMT_YUV444P9LE:
2804 case PIX_FMT_YUV422P9LE:
2805 case PIX_FMT_YUV420P9LE:
2806 case PIX_FMT_YUV444P10LE:
2807 case PIX_FMT_YUV422P10LE:
2808 case PIX_FMT_YUV420P10LE:
2809 case PIX_FMT_YUV420P16LE:
2810 case PIX_FMT_YUV422P16LE:
2811 case PIX_FMT_YUV444P16LE:
2812 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2814 case PIX_FMT_YUV444P9BE:
2815 case PIX_FMT_YUV422P9BE:
2816 case PIX_FMT_YUV420P9BE:
2817 case PIX_FMT_YUV444P10BE:
2818 case PIX_FMT_YUV422P10BE:
2819 case PIX_FMT_YUV420P10BE:
2820 case PIX_FMT_YUV420P16BE:
2821 case PIX_FMT_YUV422P16BE:
2822 case PIX_FMT_YUV444P16BE:
2823 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2825 case PIX_FMT_YUYV422 :
2826 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2827 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2828 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2829 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2830 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2831 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2832 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2833 case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
2834 case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
2835 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2836 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2837 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2838 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2839 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2840 case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
2841 case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
2845 case PIX_FMT_BGR4_BYTE:
2846 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2847 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2848 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2849 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2850 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2851 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2852 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2853 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2854 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2855 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2856 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2859 switch (srcFormat) {
2861 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2863 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2864 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2868 if (c->srcBpc == 8) {
2869 if (c->dstBpc <= 10) {
2870 c->hyScale = c->hcScale = hScale8To15_c;
2871 if (c->flags & SWS_FAST_BILINEAR) {
2872 c->hyscale_fast = hyscale_fast_c;
2873 c->hcscale_fast = hcscale_fast_c;
2876 c->hyScale = c->hcScale = hScale8To19_c;
2879 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2882 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2883 if (c->dstBpc <= 10) {
2885 c->lumConvertRange = lumRangeFromJpeg_c;
2886 c->chrConvertRange = chrRangeFromJpeg_c;
2888 c->lumConvertRange = lumRangeToJpeg_c;
2889 c->chrConvertRange = chrRangeToJpeg_c;
2893 c->lumConvertRange = lumRangeFromJpeg16_c;
2894 c->chrConvertRange = chrRangeFromJpeg16_c;
2896 c->lumConvertRange = lumRangeToJpeg16_c;
2897 c->chrConvertRange = chrRangeToJpeg16_c;
2902 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2903 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2904 c->needs_hcscale = 1;
2907 SwsFunc ff_getSwsFunc(SwsContext *c)
2909 sws_init_swScale_c(c);
2912 ff_sws_init_swScale_mmx(c);
2914 ff_sws_init_swScale_altivec(c);