2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
165 #define output_pixel(pos, val, bias, signedness) \
167 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
169 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
172 static av_always_inline void
173 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174 int big_endian, int output_bits)
177 int shift = 19 - output_bits;
179 for (i = 0; i < dstW; i++) {
180 int val = src[i] + (1 << (shift - 1));
181 output_pixel(&dest[i], val, 0, uint);
185 static av_always_inline void
186 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187 const int32_t **src, uint16_t *dest, int dstW,
188 int big_endian, int output_bits)
191 int shift = 15 + 16 - output_bits;
193 for (i = 0; i < dstW; i++) {
194 int val = 1 << (30-output_bits);
197 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198 * filters (or anything with negative coeffs, the range can be slightly
199 * wider in both directions. To account for this overflow, we subtract
200 * a constant so it always fits in the signed range (assuming a
201 * reasonable filterSize), and re-add that at the end. */
203 for (j = 0; j < filterSize; j++)
204 val += src[j][i] * filter[j];
206 output_pixel(&dest[i], val, 0x8000, int);
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
216 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 static av_always_inline void
220 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = src[i] + (1 << (shift - 1));
228 output_pixel(&dest[i], val);
232 static av_always_inline void
233 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234 const int16_t **src, uint16_t *dest, int dstW,
235 int big_endian, int output_bits)
238 int shift = 11 + 16 - output_bits;
240 for (i = 0; i < dstW; i++) {
241 int val = 1 << (26-output_bits);
244 for (j = 0; j < filterSize; j++)
245 val += src[j][i] * filter[j];
247 output_pixel(&dest[i], val);
253 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255 uint8_t *dest, int dstW, \
256 const uint8_t *dither, int offset)\
258 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259 (uint16_t *) dest, dstW, is_be, bits); \
261 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262 const int16_t **src, uint8_t *dest, int dstW, \
263 const uint8_t *dither, int offset)\
265 yuv2planeX_## template_size ## _c_template(filter, \
266 filterSize, (const typeX_t **) src, \
267 (uint16_t *) dest, dstW, is_be, bits); \
269 yuv2NBPS( 9, BE, 1, 10, int16_t)
270 yuv2NBPS( 9, LE, 0, 10, int16_t)
271 yuv2NBPS(10, BE, 1, 10, int16_t)
272 yuv2NBPS(10, LE, 0, 10, int16_t)
273 yuv2NBPS(16, BE, 1, 16, int32_t)
274 yuv2NBPS(16, LE, 0, 16, int32_t)
276 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277 const int16_t **src, uint8_t *dest, int dstW,
278 const uint8_t *dither, int offset)
281 for (i=0; i<dstW; i++) {
282 int val = dither[(i + offset) & 7] << 12;
284 for (j=0; j<filterSize; j++)
285 val += src[j][i] * filter[j];
287 dest[i]= av_clip_uint8(val>>19);
291 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292 const uint8_t *dither, int offset)
295 for (i=0; i<dstW; i++) {
296 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297 dest[i]= av_clip_uint8(val);
301 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302 const int16_t **chrUSrc, const int16_t **chrVSrc,
303 uint8_t *dest, int chrDstW)
305 enum PixelFormat dstFormat = c->dstFormat;
306 const uint8_t *chrDither = c->chrDither8;
309 if (dstFormat == PIX_FMT_NV12)
310 for (i=0; i<chrDstW; i++) {
311 int u = chrDither[i & 7] << 12;
312 int v = chrDither[(i + 3) & 7] << 12;
314 for (j=0; j<chrFilterSize; j++) {
315 u += chrUSrc[j][i] * chrFilter[j];
316 v += chrVSrc[j][i] * chrFilter[j];
319 dest[2*i]= av_clip_uint8(u>>19);
320 dest[2*i+1]= av_clip_uint8(v>>19);
323 for (i=0; i<chrDstW; i++) {
324 int u = chrDither[i & 7] << 12;
325 int v = chrDither[(i + 3) & 7] << 12;
327 for (j=0; j<chrFilterSize; j++) {
328 u += chrUSrc[j][i] * chrFilter[j];
329 v += chrVSrc[j][i] * chrFilter[j];
332 dest[2*i]= av_clip_uint8(v>>19);
333 dest[2*i+1]= av_clip_uint8(u>>19);
337 #define output_pixel(pos, val) \
338 if (target == PIX_FMT_GRAY16BE) { \
344 static av_always_inline void
345 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346 const int32_t **lumSrc, int lumFilterSize,
347 const int16_t *chrFilter, const int32_t **chrUSrc,
348 const int32_t **chrVSrc, int chrFilterSize,
349 const int32_t **alpSrc, uint16_t *dest, int dstW,
350 int y, enum PixelFormat target)
354 for (i = 0; i < (dstW >> 1); i++) {
356 int Y1 = (1 << 14) - 0x40000000;
357 int Y2 = (1 << 14) - 0x40000000;
359 for (j = 0; j < lumFilterSize; j++) {
360 Y1 += lumSrc[j][i * 2] * lumFilter[j];
361 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
365 Y1 = av_clip_int16(Y1);
366 Y2 = av_clip_int16(Y2);
367 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
372 static av_always_inline void
373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374 const int32_t *ubuf[2], const int32_t *vbuf[2],
375 const int32_t *abuf[2], uint16_t *dest, int dstW,
376 int yalpha, int uvalpha, int y,
377 enum PixelFormat target)
379 int yalpha1 = 4095 - yalpha;
381 const int32_t *buf0 = buf[0], *buf1 = buf[1];
383 for (i = 0; i < (dstW >> 1); i++) {
384 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
387 output_pixel(&dest[i * 2 + 0], Y1);
388 output_pixel(&dest[i * 2 + 1], Y2);
392 static av_always_inline void
393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394 const int32_t *ubuf[2], const int32_t *vbuf[2],
395 const int32_t *abuf0, uint16_t *dest, int dstW,
396 int uvalpha, int y, enum PixelFormat target)
400 for (i = 0; i < (dstW >> 1); i++) {
401 int Y1 = buf0[i * 2 ] << 1;
402 int Y2 = buf0[i * 2 + 1] << 1;
404 output_pixel(&dest[i * 2 + 0], Y1);
405 output_pixel(&dest[i * 2 + 1], Y2);
411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413 const int16_t **_lumSrc, int lumFilterSize, \
414 const int16_t *chrFilter, const int16_t **_chrUSrc, \
415 const int16_t **_chrVSrc, int chrFilterSize, \
416 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420 **chrUSrc = (const int32_t **) _chrUSrc, \
421 **chrVSrc = (const int32_t **) _chrVSrc, \
422 **alpSrc = (const int32_t **) _alpSrc; \
423 uint16_t *dest = (uint16_t *) _dest; \
424 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426 alpSrc, dest, dstW, y, fmt); \
429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432 int yalpha, int uvalpha, int y) \
434 const int32_t **buf = (const int32_t **) _buf, \
435 **ubuf = (const int32_t **) _ubuf, \
436 **vbuf = (const int32_t **) _vbuf, \
437 **abuf = (const int32_t **) _abuf; \
438 uint16_t *dest = (uint16_t *) _dest; \
439 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440 dest, dstW, yalpha, uvalpha, y, fmt); \
443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446 int uvalpha, int y) \
448 const int32_t *buf0 = (const int32_t *) _buf0, \
449 **ubuf = (const int32_t **) _ubuf, \
450 **vbuf = (const int32_t **) _vbuf, \
451 *abuf0 = (const int32_t *) _abuf0; \
452 uint16_t *dest = (uint16_t *) _dest; \
453 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454 dstW, uvalpha, y, fmt); \
457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
460 #define output_pixel(pos, acc) \
461 if (target == PIX_FMT_MONOBLACK) { \
467 static av_always_inline void
468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469 const int16_t **lumSrc, int lumFilterSize,
470 const int16_t *chrFilter, const int16_t **chrUSrc,
471 const int16_t **chrVSrc, int chrFilterSize,
472 const int16_t **alpSrc, uint8_t *dest, int dstW,
473 int y, enum PixelFormat target)
475 const uint8_t * const d128=dither_8x8_220[y&7];
476 uint8_t *g = c->table_gU[128] + c->table_gV[128];
480 for (i = 0; i < dstW - 1; i += 2) {
485 for (j = 0; j < lumFilterSize; j++) {
486 Y1 += lumSrc[j][i] * lumFilter[j];
487 Y2 += lumSrc[j][i+1] * lumFilter[j];
491 if ((Y1 | Y2) & 0x100) {
492 Y1 = av_clip_uint8(Y1);
493 Y2 = av_clip_uint8(Y2);
495 acc += acc + g[Y1 + d128[(i + 0) & 7]];
496 acc += acc + g[Y2 + d128[(i + 1) & 7]];
498 output_pixel(*dest++, acc);
503 static av_always_inline void
504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505 const int16_t *ubuf[2], const int16_t *vbuf[2],
506 const int16_t *abuf[2], uint8_t *dest, int dstW,
507 int yalpha, int uvalpha, int y,
508 enum PixelFormat target)
510 const int16_t *buf0 = buf[0], *buf1 = buf[1];
511 const uint8_t * const d128 = dither_8x8_220[y & 7];
512 uint8_t *g = c->table_gU[128] + c->table_gV[128];
513 int yalpha1 = 4095 - yalpha;
516 for (i = 0; i < dstW - 7; i += 8) {
517 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525 output_pixel(*dest++, acc);
529 static av_always_inline void
530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531 const int16_t *ubuf[2], const int16_t *vbuf[2],
532 const int16_t *abuf0, uint8_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
535 const uint8_t * const d128 = dither_8x8_220[y & 7];
536 uint8_t *g = c->table_gU[128] + c->table_gV[128];
539 for (i = 0; i < dstW - 7; i += 8) {
540 int acc = g[(buf0[i ] >> 7) + d128[0]];
541 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548 output_pixel(*dest++, acc);
554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556 const int16_t **lumSrc, int lumFilterSize, \
557 const int16_t *chrFilter, const int16_t **chrUSrc, \
558 const int16_t **chrVSrc, int chrFilterSize, \
559 const int16_t **alpSrc, uint8_t *dest, int dstW, \
562 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564 alpSrc, dest, dstW, y, fmt); \
567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568 const int16_t *ubuf[2], const int16_t *vbuf[2], \
569 const int16_t *abuf[2], uint8_t *dest, int dstW, \
570 int yalpha, int uvalpha, int y) \
572 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573 dest, dstW, yalpha, uvalpha, y, fmt); \
576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577 const int16_t *ubuf[2], const int16_t *vbuf[2], \
578 const int16_t *abuf0, uint8_t *dest, int dstW, \
579 int uvalpha, int y) \
581 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582 abuf0, dest, dstW, uvalpha, \
586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
589 #define output_pixels(pos, Y1, U, Y2, V) \
590 if (target == PIX_FMT_YUYV422) { \
591 dest[pos + 0] = Y1; \
593 dest[pos + 2] = Y2; \
597 dest[pos + 1] = Y1; \
599 dest[pos + 3] = Y2; \
602 static av_always_inline void
603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604 const int16_t **lumSrc, int lumFilterSize,
605 const int16_t *chrFilter, const int16_t **chrUSrc,
606 const int16_t **chrVSrc, int chrFilterSize,
607 const int16_t **alpSrc, uint8_t *dest, int dstW,
608 int y, enum PixelFormat target)
612 for (i = 0; i < (dstW >> 1); i++) {
619 for (j = 0; j < lumFilterSize; j++) {
620 Y1 += lumSrc[j][i * 2] * lumFilter[j];
621 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
623 for (j = 0; j < chrFilterSize; j++) {
624 U += chrUSrc[j][i] * chrFilter[j];
625 V += chrVSrc[j][i] * chrFilter[j];
631 if ((Y1 | Y2 | U | V) & 0x100) {
632 Y1 = av_clip_uint8(Y1);
633 Y2 = av_clip_uint8(Y2);
634 U = av_clip_uint8(U);
635 V = av_clip_uint8(V);
637 output_pixels(4*i, Y1, U, Y2, V);
641 static av_always_inline void
642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643 const int16_t *ubuf[2], const int16_t *vbuf[2],
644 const int16_t *abuf[2], uint8_t *dest, int dstW,
645 int yalpha, int uvalpha, int y,
646 enum PixelFormat target)
648 const int16_t *buf0 = buf[0], *buf1 = buf[1],
649 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651 int yalpha1 = 4095 - yalpha;
652 int uvalpha1 = 4095 - uvalpha;
655 for (i = 0; i < (dstW >> 1); i++) {
656 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
661 output_pixels(i * 4, Y1, U, Y2, V);
665 static av_always_inline void
666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667 const int16_t *ubuf[2], const int16_t *vbuf[2],
668 const int16_t *abuf0, uint8_t *dest, int dstW,
669 int uvalpha, int y, enum PixelFormat target)
671 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
675 if (uvalpha < 2048) {
676 for (i = 0; i < (dstW >> 1); i++) {
677 int Y1 = buf0[i * 2] >> 7;
678 int Y2 = buf0[i * 2 + 1] >> 7;
679 int U = ubuf1[i] >> 7;
680 int V = vbuf1[i] >> 7;
682 output_pixels(i * 4, Y1, U, Y2, V);
685 for (i = 0; i < (dstW >> 1); i++) {
686 int Y1 = buf0[i * 2] >> 7;
687 int Y2 = buf0[i * 2 + 1] >> 7;
688 int U = (ubuf0[i] + ubuf1[i]) >> 8;
689 int V = (vbuf0[i] + vbuf1[i]) >> 8;
691 output_pixels(i * 4, Y1, U, Y2, V);
698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703 #define output_pixel(pos, val) \
704 if (isBE(target)) { \
710 static av_always_inline void
711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712 const int32_t **lumSrc, int lumFilterSize,
713 const int16_t *chrFilter, const int32_t **chrUSrc,
714 const int32_t **chrVSrc, int chrFilterSize,
715 const int32_t **alpSrc, uint16_t *dest, int dstW,
716 int y, enum PixelFormat target)
720 for (i = 0; i < (dstW >> 1); i++) {
722 int Y1 = -0x40000000;
723 int Y2 = -0x40000000;
724 int U = -128 << 23; // 19
728 for (j = 0; j < lumFilterSize; j++) {
729 Y1 += lumSrc[j][i * 2] * lumFilter[j];
730 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
732 for (j = 0; j < chrFilterSize; j++) {
733 U += chrUSrc[j][i] * chrFilter[j];
734 V += chrVSrc[j][i] * chrFilter[j];
737 // 8bit: 12+15=27; 16-bit: 12+19=31
745 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746 Y1 -= c->yuv2rgb_y_offset;
747 Y2 -= c->yuv2rgb_y_offset;
748 Y1 *= c->yuv2rgb_y_coeff;
749 Y2 *= c->yuv2rgb_y_coeff;
752 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
754 R = V * c->yuv2rgb_v2r_coeff;
755 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756 B = U * c->yuv2rgb_u2b_coeff;
758 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
769 static av_always_inline void
770 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771 const int32_t *ubuf[2], const int32_t *vbuf[2],
772 const int32_t *abuf[2], uint16_t *dest, int dstW,
773 int yalpha, int uvalpha, int y,
774 enum PixelFormat target)
776 const int32_t *buf0 = buf[0], *buf1 = buf[1],
777 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779 int yalpha1 = 4095 - yalpha;
780 int uvalpha1 = 4095 - uvalpha;
783 for (i = 0; i < (dstW >> 1); i++) {
784 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
790 Y1 -= c->yuv2rgb_y_offset;
791 Y2 -= c->yuv2rgb_y_offset;
792 Y1 *= c->yuv2rgb_y_coeff;
793 Y2 *= c->yuv2rgb_y_coeff;
797 R = V * c->yuv2rgb_v2r_coeff;
798 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799 B = U * c->yuv2rgb_u2b_coeff;
801 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
811 static av_always_inline void
812 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813 const int32_t *ubuf[2], const int32_t *vbuf[2],
814 const int32_t *abuf0, uint16_t *dest, int dstW,
815 int uvalpha, int y, enum PixelFormat target)
817 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
821 if (uvalpha < 2048) {
822 for (i = 0; i < (dstW >> 1); i++) {
823 int Y1 = (buf0[i * 2] ) >> 2;
824 int Y2 = (buf0[i * 2 + 1]) >> 2;
825 int U = (ubuf0[i] + (-128 << 11)) >> 2;
826 int V = (vbuf0[i] + (-128 << 11)) >> 2;
829 Y1 -= c->yuv2rgb_y_offset;
830 Y2 -= c->yuv2rgb_y_offset;
831 Y1 *= c->yuv2rgb_y_coeff;
832 Y2 *= c->yuv2rgb_y_coeff;
836 R = V * c->yuv2rgb_v2r_coeff;
837 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838 B = U * c->yuv2rgb_u2b_coeff;
840 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
849 for (i = 0; i < (dstW >> 1); i++) {
850 int Y1 = (buf0[i * 2] ) >> 2;
851 int Y2 = (buf0[i * 2 + 1]) >> 2;
852 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
856 Y1 -= c->yuv2rgb_y_offset;
857 Y2 -= c->yuv2rgb_y_offset;
858 Y1 *= c->yuv2rgb_y_coeff;
859 Y2 *= c->yuv2rgb_y_coeff;
863 R = V * c->yuv2rgb_v2r_coeff;
864 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865 B = U * c->yuv2rgb_u2b_coeff;
867 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
888 * Write out 2 RGB pixels in the target pixel format. This function takes a
889 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890 * things like endianness conversion and shifting. The caller takes care of
891 * setting the correct offset in these tables from the chroma (U/V) values.
892 * This function then uses the luminance (Y1/Y2) values to write out the
893 * correct RGB values into the destination buffer.
895 static av_always_inline void
896 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897 unsigned A1, unsigned A2,
898 const void *_r, const void *_g, const void *_b, int y,
899 enum PixelFormat target, int hasAlpha)
901 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903 uint32_t *dest = (uint32_t *) _dest;
904 const uint32_t *r = (const uint32_t *) _r;
905 const uint32_t *g = (const uint32_t *) _g;
906 const uint32_t *b = (const uint32_t *) _b;
909 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
911 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
915 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
917 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
920 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
924 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925 uint8_t *dest = (uint8_t *) _dest;
926 const uint8_t *r = (const uint8_t *) _r;
927 const uint8_t *g = (const uint8_t *) _g;
928 const uint8_t *b = (const uint8_t *) _b;
930 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
931 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
932 dest[i * 6 + 0] = r_b[Y1];
933 dest[i * 6 + 1] = g[Y1];
934 dest[i * 6 + 2] = b_r[Y1];
935 dest[i * 6 + 3] = r_b[Y2];
936 dest[i * 6 + 4] = g[Y2];
937 dest[i * 6 + 5] = b_r[Y2];
940 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943 uint16_t *dest = (uint16_t *) _dest;
944 const uint16_t *r = (const uint16_t *) _r;
945 const uint16_t *g = (const uint16_t *) _g;
946 const uint16_t *b = (const uint16_t *) _b;
947 int dr1, dg1, db1, dr2, dg2, db2;
949 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950 dr1 = dither_2x2_8[ y & 1 ][0];
951 dg1 = dither_2x2_4[ y & 1 ][0];
952 db1 = dither_2x2_8[(y & 1) ^ 1][0];
953 dr2 = dither_2x2_8[ y & 1 ][1];
954 dg2 = dither_2x2_4[ y & 1 ][1];
955 db2 = dither_2x2_8[(y & 1) ^ 1][1];
956 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957 dr1 = dither_2x2_8[ y & 1 ][0];
958 dg1 = dither_2x2_8[ y & 1 ][1];
959 db1 = dither_2x2_8[(y & 1) ^ 1][0];
960 dr2 = dither_2x2_8[ y & 1 ][1];
961 dg2 = dither_2x2_8[ y & 1 ][0];
962 db2 = dither_2x2_8[(y & 1) ^ 1][1];
964 dr1 = dither_4x4_16[ y & 3 ][0];
965 dg1 = dither_4x4_16[ y & 3 ][1];
966 db1 = dither_4x4_16[(y & 3) ^ 3][0];
967 dr2 = dither_4x4_16[ y & 3 ][1];
968 dg2 = dither_4x4_16[ y & 3 ][0];
969 db2 = dither_4x4_16[(y & 3) ^ 3][1];
972 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974 } else /* 8/4-bit */ {
975 uint8_t *dest = (uint8_t *) _dest;
976 const uint8_t *r = (const uint8_t *) _r;
977 const uint8_t *g = (const uint8_t *) _g;
978 const uint8_t *b = (const uint8_t *) _b;
979 int dr1, dg1, db1, dr2, dg2, db2;
981 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982 const uint8_t * const d64 = dither_8x8_73[y & 7];
983 const uint8_t * const d32 = dither_8x8_32[y & 7];
984 dr1 = dg1 = d32[(i * 2 + 0) & 7];
985 db1 = d64[(i * 2 + 0) & 7];
986 dr2 = dg2 = d32[(i * 2 + 1) & 7];
987 db2 = d64[(i * 2 + 1) & 7];
989 const uint8_t * const d64 = dither_8x8_73 [y & 7];
990 const uint8_t * const d128 = dither_8x8_220[y & 7];
991 dr1 = db1 = d128[(i * 2 + 0) & 7];
992 dg1 = d64[(i * 2 + 0) & 7];
993 dr2 = db2 = d128[(i * 2 + 1) & 7];
994 dg2 = d64[(i * 2 + 1) & 7];
997 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1001 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1007 static av_always_inline void
1008 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009 const int16_t **lumSrc, int lumFilterSize,
1010 const int16_t *chrFilter, const int16_t **chrUSrc,
1011 const int16_t **chrVSrc, int chrFilterSize,
1012 const int16_t **alpSrc, uint8_t *dest, int dstW,
1013 int y, enum PixelFormat target, int hasAlpha)
1017 for (i = 0; i < (dstW >> 1); i++) {
1023 int av_unused A1, A2;
1024 const void *r, *g, *b;
1026 for (j = 0; j < lumFilterSize; j++) {
1027 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1028 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1030 for (j = 0; j < chrFilterSize; j++) {
1031 U += chrUSrc[j][i] * chrFilter[j];
1032 V += chrVSrc[j][i] * chrFilter[j];
1038 if ((Y1 | Y2 | U | V) & 0x100) {
1039 Y1 = av_clip_uint8(Y1);
1040 Y2 = av_clip_uint8(Y2);
1041 U = av_clip_uint8(U);
1042 V = av_clip_uint8(V);
1047 for (j = 0; j < lumFilterSize; j++) {
1048 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1049 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1053 if ((A1 | A2) & 0x100) {
1054 A1 = av_clip_uint8(A1);
1055 A2 = av_clip_uint8(A2);
1059 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1061 g = (c->table_gU[U] + c->table_gV[V]);
1064 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065 r, g, b, y, target, hasAlpha);
1069 static av_always_inline void
1070 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071 const int16_t *ubuf[2], const int16_t *vbuf[2],
1072 const int16_t *abuf[2], uint8_t *dest, int dstW,
1073 int yalpha, int uvalpha, int y,
1074 enum PixelFormat target, int hasAlpha)
1076 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1077 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079 *abuf0 = hasAlpha ? abuf[0] : NULL,
1080 *abuf1 = hasAlpha ? abuf[1] : NULL;
1081 int yalpha1 = 4095 - yalpha;
1082 int uvalpha1 = 4095 - uvalpha;
1085 for (i = 0; i < (dstW >> 1); i++) {
1086 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1087 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1088 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1089 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1091 const void *r = c->table_rV[V],
1092 *g = (c->table_gU[U] + c->table_gV[V]),
1093 *b = c->table_bU[U];
1096 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1097 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1100 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1101 r, g, b, y, target, hasAlpha);
1105 static av_always_inline void
1106 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1107 const int16_t *ubuf[2], const int16_t *vbuf[2],
1108 const int16_t *abuf0, uint8_t *dest, int dstW,
1109 int uvalpha, int y, enum PixelFormat target,
1112 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1113 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1116 if (uvalpha < 2048) {
1117 for (i = 0; i < (dstW >> 1); i++) {
1118 int Y1 = buf0[i * 2] >> 7;
1119 int Y2 = buf0[i * 2 + 1] >> 7;
1120 int U = ubuf1[i] >> 7;
1121 int V = vbuf1[i] >> 7;
1123 const void *r = c->table_rV[V],
1124 *g = (c->table_gU[U] + c->table_gV[V]),
1125 *b = c->table_bU[U];
1128 A1 = abuf0[i * 2 ] >> 7;
1129 A2 = abuf0[i * 2 + 1] >> 7;
1132 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1133 r, g, b, y, target, hasAlpha);
1136 for (i = 0; i < (dstW >> 1); i++) {
1137 int Y1 = buf0[i * 2] >> 7;
1138 int Y2 = buf0[i * 2 + 1] >> 7;
1139 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1140 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1142 const void *r = c->table_rV[V],
1143 *g = (c->table_gU[U] + c->table_gV[V]),
1144 *b = c->table_bU[U];
1147 A1 = abuf0[i * 2 ] >> 7;
1148 A2 = abuf0[i * 2 + 1] >> 7;
1151 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152 r, g, b, y, target, hasAlpha);
1157 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159 const int16_t **lumSrc, int lumFilterSize, \
1160 const int16_t *chrFilter, const int16_t **chrUSrc, \
1161 const int16_t **chrVSrc, int chrFilterSize, \
1162 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1165 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1169 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174 int yalpha, int uvalpha, int y) \
1176 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1180 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182 const int16_t *abuf0, uint8_t *dest, int dstW, \
1183 int uvalpha, int y) \
1185 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186 dstW, uvalpha, y, fmt, hasAlpha); \
1190 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1191 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1193 #if CONFIG_SWSCALE_ALPHA
1194 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1195 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1197 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1198 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1200 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1201 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1202 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1203 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1204 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1205 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1206 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1207 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1209 static av_always_inline void
1210 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1211 const int16_t **lumSrc, int lumFilterSize,
1212 const int16_t *chrFilter, const int16_t **chrUSrc,
1213 const int16_t **chrVSrc, int chrFilterSize,
1214 const int16_t **alpSrc, uint8_t *dest,
1215 int dstW, int y, enum PixelFormat target, int hasAlpha)
1218 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1220 for (i = 0; i < dstW; i++) {
1228 for (j = 0; j < lumFilterSize; j++) {
1229 Y += lumSrc[j][i] * lumFilter[j];
1231 for (j = 0; j < chrFilterSize; j++) {
1232 U += chrUSrc[j][i] * chrFilter[j];
1233 V += chrVSrc[j][i] * chrFilter[j];
1240 for (j = 0; j < lumFilterSize; j++) {
1241 A += alpSrc[j][i] * lumFilter[j];
1245 A = av_clip_uint8(A);
1247 Y -= c->yuv2rgb_y_offset;
1248 Y *= c->yuv2rgb_y_coeff;
1250 R = Y + V*c->yuv2rgb_v2r_coeff;
1251 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252 B = Y + U*c->yuv2rgb_u2b_coeff;
1253 if ((R | G | B) & 0xC0000000) {
1254 R = av_clip_uintp2(R, 30);
1255 G = av_clip_uintp2(G, 30);
1256 B = av_clip_uintp2(B, 30);
1261 dest[0] = hasAlpha ? A : 255;
1275 dest[3] = hasAlpha ? A : 255;
1278 dest[0] = hasAlpha ? A : 255;
1293 dest[3] = hasAlpha ? A : 255;
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1306 #if CONFIG_SWSCALE_ALPHA
1307 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1308 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1309 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1310 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1312 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1313 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1314 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1315 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1317 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1318 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1320 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1321 int width, int height,
1325 uint8_t *ptr = plane + stride*y;
1326 for (i=0; i<height; i++) {
1327 memset(ptr, val, width);
1332 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1334 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1335 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1337 static av_always_inline void
1338 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1339 enum PixelFormat origin)
1342 for (i = 0; i < width; i++) {
1343 unsigned int r_b = input_pixel(&src[i*3+0]);
1344 unsigned int g = input_pixel(&src[i*3+1]);
1345 unsigned int b_r = input_pixel(&src[i*3+2]);
1347 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1351 static av_always_inline void
1352 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1353 const uint16_t *src1, const uint16_t *src2,
1354 int width, enum PixelFormat origin)
1358 for (i = 0; i < width; i++) {
1359 int r_b = input_pixel(&src1[i*3+0]);
1360 int g = input_pixel(&src1[i*3+1]);
1361 int b_r = input_pixel(&src1[i*3+2]);
1363 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1368 static av_always_inline void
1369 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1370 const uint16_t *src1, const uint16_t *src2,
1371 int width, enum PixelFormat origin)
1375 for (i = 0; i < width; i++) {
1376 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1377 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1378 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1380 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 #define rgb48funcs(pattern, BE_LE, origin) \
1390 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1391 int width, uint32_t *unused) \
1393 const uint16_t *src = (const uint16_t *) _src; \
1394 uint16_t *dst = (uint16_t *) _dst; \
1395 rgb48ToY_c_template(dst, src, width, origin); \
1398 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1399 const uint8_t *_src1, const uint8_t *_src2, \
1400 int width, uint32_t *unused) \
1402 const uint16_t *src1 = (const uint16_t *) _src1, \
1403 *src2 = (const uint16_t *) _src2; \
1404 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1408 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1409 const uint8_t *_src1, const uint8_t *_src2, \
1410 int width, uint32_t *unused) \
1412 const uint16_t *src1 = (const uint16_t *) _src1, \
1413 *src2 = (const uint16_t *) _src2; \
1414 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1415 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1418 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1419 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1420 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1421 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1423 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1424 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1425 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1427 static av_always_inline void
1428 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1429 int width, enum PixelFormat origin,
1430 int shr, int shg, int shb, int shp,
1431 int maskr, int maskg, int maskb,
1432 int rsh, int gsh, int bsh, int S)
1434 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1435 const unsigned rnd = 33u << (S - 1);
1438 for (i = 0; i < width; i++) {
1439 int px = input_pixel(i) >> shp;
1440 int b = (px & maskb) >> shb;
1441 int g = (px & maskg) >> shg;
1442 int r = (px & maskr) >> shr;
1444 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1448 static av_always_inline void
1449 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1450 const uint8_t *src, int width,
1451 enum PixelFormat origin,
1452 int shr, int shg, int shb, int shp,
1453 int maskr, int maskg, int maskb,
1454 int rsh, int gsh, int bsh, int S)
1456 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1457 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1458 const unsigned rnd = 257u << (S - 1);
1461 for (i = 0; i < width; i++) {
1462 int px = input_pixel(i) >> shp;
1463 int b = (px & maskb) >> shb;
1464 int g = (px & maskg) >> shg;
1465 int r = (px & maskr) >> shr;
1467 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1468 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1472 static av_always_inline void
1473 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1474 const uint8_t *src, int width,
1475 enum PixelFormat origin,
1476 int shr, int shg, int shb, int shp,
1477 int maskr, int maskg, int maskb,
1478 int rsh, int gsh, int bsh, int S)
1480 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1481 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1482 maskgx = ~(maskr | maskb);
1483 const unsigned rnd = 257u << S;
1486 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487 for (i = 0; i < width; i++) {
1488 int px0 = input_pixel(2 * i + 0) >> shp;
1489 int px1 = input_pixel(2 * i + 1) >> shp;
1490 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491 int rb = px0 + px1 - g;
1493 b = (rb & maskb) >> shb;
1494 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1498 g = (g & maskg) >> shg;
1500 r = (rb & maskr) >> shr;
1502 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1509 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510 maskg, maskb, rsh, gsh, bsh, S) \
1511 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512 int width, uint32_t *unused) \
1514 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1518 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519 const uint8_t *src, const uint8_t *dummy, \
1520 int width, uint32_t *unused) \
1522 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1526 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527 const uint8_t *src, const uint8_t *dummy, \
1528 int width, uint32_t *unused) \
1530 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1534 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1535 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1536 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1537 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1538 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1539 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1540 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1541 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1542 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1543 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1544 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1545 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1547 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1550 for (i=0; i<width; i++) {
1555 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1558 for (i=0; i<width; i++) {
1563 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1566 for (i=0; i<width; i++) {
1569 dst[i]= pal[d] & 0xFF;
1573 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1574 const uint8_t *src1, const uint8_t *src2,
1575 int width, uint32_t *pal)
1578 assert(src1 == src2);
1579 for (i=0; i<width; i++) {
1580 int p= pal[src1[i]];
1587 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1588 int width, uint32_t *unused)
1591 for (i=0; i<width/8; i++) {
1594 dst[8*i+j]= ((d>>(7-j))&1)*255;
1598 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1599 int width, uint32_t *unused)
1602 for (i=0; i<width/8; i++) {
1605 dst[8*i+j]= ((d>>(7-j))&1)*255;
1609 //FIXME yuy2* can read up to 7 samples too much
1611 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1615 for (i=0; i<width; i++)
1619 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1620 const uint8_t *src2, int width, uint32_t *unused)
1623 for (i=0; i<width; i++) {
1624 dstU[i]= src1[4*i + 1];
1625 dstV[i]= src1[4*i + 3];
1627 assert(src1 == src2);
1630 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1633 const uint16_t *src = (const uint16_t *) _src;
1634 uint16_t *dst = (uint16_t *) _dst;
1635 for (i=0; i<width; i++) {
1636 dst[i] = av_bswap16(src[i]);
1640 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1641 const uint8_t *_src2, int width, uint32_t *unused)
1644 const uint16_t *src1 = (const uint16_t *) _src1,
1645 *src2 = (const uint16_t *) _src2;
1646 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1647 for (i=0; i<width; i++) {
1648 dstU[i] = av_bswap16(src1[i]);
1649 dstV[i] = av_bswap16(src2[i]);
1653 /* This is almost identical to the previous, end exists only because
1654 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1655 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1659 for (i=0; i<width; i++)
1663 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1664 const uint8_t *src2, int width, uint32_t *unused)
1667 for (i=0; i<width; i++) {
1668 dstU[i]= src1[4*i + 0];
1669 dstV[i]= src1[4*i + 2];
1671 assert(src1 == src2);
1674 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1675 const uint8_t *src, int width)
1678 for (i = 0; i < width; i++) {
1679 dst1[i] = src[2*i+0];
1680 dst2[i] = src[2*i+1];
1684 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1685 const uint8_t *src1, const uint8_t *src2,
1686 int width, uint32_t *unused)
1688 nvXXtoUV_c(dstU, dstV, src1, width);
1691 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1692 const uint8_t *src1, const uint8_t *src2,
1693 int width, uint32_t *unused)
1695 nvXXtoUV_c(dstV, dstU, src1, width);
1698 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1700 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1701 int width, uint32_t *unused)
1704 for (i=0; i<width; i++) {
1709 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1713 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1714 const uint8_t *src2, int width, uint32_t *unused)
1717 for (i=0; i<width; i++) {
1718 int b= src1[3*i + 0];
1719 int g= src1[3*i + 1];
1720 int r= src1[3*i + 2];
1722 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1723 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1725 assert(src1 == src2);
1728 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1729 const uint8_t *src2, int width, uint32_t *unused)
1732 for (i=0; i<width; i++) {
1733 int b= src1[6*i + 0] + src1[6*i + 3];
1734 int g= src1[6*i + 1] + src1[6*i + 4];
1735 int r= src1[6*i + 2] + src1[6*i + 5];
1737 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1738 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1740 assert(src1 == src2);
1743 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1747 for (i=0; i<width; i++) {
1752 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1756 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1757 const uint8_t *src2, int width, uint32_t *unused)
1761 for (i=0; i<width; i++) {
1762 int r= src1[3*i + 0];
1763 int g= src1[3*i + 1];
1764 int b= src1[3*i + 2];
1766 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1767 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1771 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1772 const uint8_t *src2, int width, uint32_t *unused)
1776 for (i=0; i<width; i++) {
1777 int r= src1[6*i + 0] + src1[6*i + 3];
1778 int g= src1[6*i + 1] + src1[6*i + 4];
1779 int b= src1[6*i + 2] + src1[6*i + 5];
1781 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1782 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1786 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1789 for (i = 0; i < width; i++) {
1794 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1798 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1801 const uint16_t **src = (const uint16_t **) _src;
1802 uint16_t *dst = (uint16_t *) _dst;
1803 for (i = 0; i < width; i++) {
1804 int g = AV_RL16(src[0] + i);
1805 int b = AV_RL16(src[1] + i);
1806 int r = AV_RL16(src[2] + i);
1808 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1812 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1815 const uint16_t **src = (const uint16_t **) _src;
1816 uint16_t *dst = (uint16_t *) _dst;
1817 for (i = 0; i < width; i++) {
1818 int g = AV_RB16(src[0] + i);
1819 int b = AV_RB16(src[1] + i);
1820 int r = AV_RB16(src[2] + i);
1822 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1826 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1829 for (i = 0; i < width; i++) {
1834 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1835 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1839 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1842 const uint16_t **src = (const uint16_t **) _src;
1843 uint16_t *dstU = (uint16_t *) _dstU;
1844 uint16_t *dstV = (uint16_t *) _dstV;
1845 for (i = 0; i < width; i++) {
1846 int g = AV_RL16(src[0] + i);
1847 int b = AV_RL16(src[1] + i);
1848 int r = AV_RL16(src[2] + i);
1850 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1851 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1855 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1858 const uint16_t **src = (const uint16_t **) _src;
1859 uint16_t *dstU = (uint16_t *) _dstU;
1860 uint16_t *dstV = (uint16_t *) _dstV;
1861 for (i = 0; i < width; i++) {
1862 int g = AV_RB16(src[0] + i);
1863 int b = AV_RB16(src[1] + i);
1864 int r = AV_RB16(src[2] + i);
1866 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1867 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1871 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1872 const int16_t *filter,
1873 const int16_t *filterPos, int filterSize)
1876 int32_t *dst = (int32_t *) _dst;
1877 const uint16_t *src = (const uint16_t *) _src;
1878 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1881 for (i = 0; i < dstW; i++) {
1883 int srcPos = filterPos[i];
1886 for (j = 0; j < filterSize; j++) {
1887 val += src[srcPos + j] * filter[filterSize * i + j];
1889 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1890 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1894 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1895 const int16_t *filter,
1896 const int16_t *filterPos, int filterSize)
1899 const uint16_t *src = (const uint16_t *) _src;
1900 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1902 for (i = 0; i < dstW; i++) {
1904 int srcPos = filterPos[i];
1907 for (j = 0; j < filterSize; j++) {
1908 val += src[srcPos + j] * filter[filterSize * i + j];
1910 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1911 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1915 // bilinear / bicubic scaling
1916 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1917 const int16_t *filter, const int16_t *filterPos,
1921 for (i=0; i<dstW; i++) {
1923 int srcPos= filterPos[i];
1925 for (j=0; j<filterSize; j++) {
1926 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1928 //filter += hFilterSize;
1929 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1934 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1935 const int16_t *filter, const int16_t *filterPos,
1939 int32_t *dst = (int32_t *) _dst;
1940 for (i=0; i<dstW; i++) {
1942 int srcPos= filterPos[i];
1944 for (j=0; j<filterSize; j++) {
1945 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1947 //filter += hFilterSize;
1948 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1953 //FIXME all pal and rgb srcFormats could do this convertion as well
1954 //FIXME all scalers more complex than bilinear could do half of this transform
1955 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1958 for (i = 0; i < width; i++) {
1959 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1960 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1963 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1966 for (i = 0; i < width; i++) {
1967 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1968 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1971 static void lumRangeToJpeg_c(int16_t *dst, int width)
1974 for (i = 0; i < width; i++)
1975 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1977 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1980 for (i = 0; i < width; i++)
1981 dst[i] = (dst[i]*14071 + 33561947)>>14;
1984 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1987 int32_t *dstU = (int32_t *) _dstU;
1988 int32_t *dstV = (int32_t *) _dstV;
1989 for (i = 0; i < width; i++) {
1990 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1991 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1994 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1997 int32_t *dstU = (int32_t *) _dstU;
1998 int32_t *dstV = (int32_t *) _dstV;
1999 for (i = 0; i < width; i++) {
2000 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2001 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2004 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2007 int32_t *dst = (int32_t *) _dst;
2008 for (i = 0; i < width; i++)
2009 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2011 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2014 int32_t *dst = (int32_t *) _dst;
2015 for (i = 0; i < width; i++)
2016 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2019 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2020 const uint8_t *src, int srcW, int xInc)
2023 unsigned int xpos=0;
2024 for (i=0;i<dstWidth;i++) {
2025 register unsigned int xx=xpos>>16;
2026 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2027 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2032 // *** horizontal scale Y line to temp buffer
2033 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2034 const uint8_t *src_in[4], int srcW, int xInc,
2035 const int16_t *hLumFilter,
2036 const int16_t *hLumFilterPos, int hLumFilterSize,
2037 uint8_t *formatConvBuffer,
2038 uint32_t *pal, int isAlpha)
2040 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2041 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2042 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2045 toYV12(formatConvBuffer, src, srcW, pal);
2046 src= formatConvBuffer;
2047 } else if (c->readLumPlanar && !isAlpha) {
2048 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2049 src = formatConvBuffer;
2052 if (!c->hyscale_fast) {
2053 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2054 } else { // fast bilinear upscale / crap downscale
2055 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2059 convertRange(dst, dstWidth);
2062 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2063 int dstWidth, const uint8_t *src1,
2064 const uint8_t *src2, int srcW, int xInc)
2067 unsigned int xpos=0;
2068 for (i=0;i<dstWidth;i++) {
2069 register unsigned int xx=xpos>>16;
2070 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2071 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2072 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2077 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2078 const uint8_t *src_in[4],
2079 int srcW, int xInc, const int16_t *hChrFilter,
2080 const int16_t *hChrFilterPos, int hChrFilterSize,
2081 uint8_t *formatConvBuffer, uint32_t *pal)
2083 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2085 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2086 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2087 src1= formatConvBuffer;
2089 } else if (c->readChrPlanar) {
2090 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2091 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2092 src1= formatConvBuffer;
2096 if (!c->hcscale_fast) {
2097 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2098 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2099 } else { // fast bilinear upscale / crap downscale
2100 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2103 if (c->chrConvertRange)
2104 c->chrConvertRange(dst1, dst2, dstWidth);
2107 static av_always_inline void
2108 find_c_packed_planar_out_funcs(SwsContext *c,
2109 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2110 yuv2interleavedX_fn *yuv2nv12cX,
2111 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2112 yuv2packedX_fn *yuv2packedX)
2114 enum PixelFormat dstFormat = c->dstFormat;
2116 if (is16BPS(dstFormat)) {
2117 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2118 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2119 } else if (is9_OR_10BPS(dstFormat)) {
2120 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2121 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2122 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2124 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2125 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2128 *yuv2plane1 = yuv2plane1_8_c;
2129 *yuv2planeX = yuv2planeX_8_c;
2130 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2131 *yuv2nv12cX = yuv2nv12cX_c;
2134 if(c->flags & SWS_FULL_CHR_H_INT) {
2135 switch (dstFormat) {
2138 *yuv2packedX = yuv2rgba32_full_X_c;
2140 #if CONFIG_SWSCALE_ALPHA
2142 *yuv2packedX = yuv2rgba32_full_X_c;
2144 #endif /* CONFIG_SWSCALE_ALPHA */
2146 *yuv2packedX = yuv2rgbx32_full_X_c;
2148 #endif /* !CONFIG_SMALL */
2152 *yuv2packedX = yuv2argb32_full_X_c;
2154 #if CONFIG_SWSCALE_ALPHA
2156 *yuv2packedX = yuv2argb32_full_X_c;
2158 #endif /* CONFIG_SWSCALE_ALPHA */
2160 *yuv2packedX = yuv2xrgb32_full_X_c;
2162 #endif /* !CONFIG_SMALL */
2166 *yuv2packedX = yuv2bgra32_full_X_c;
2168 #if CONFIG_SWSCALE_ALPHA
2170 *yuv2packedX = yuv2bgra32_full_X_c;
2172 #endif /* CONFIG_SWSCALE_ALPHA */
2174 *yuv2packedX = yuv2bgrx32_full_X_c;
2176 #endif /* !CONFIG_SMALL */
2180 *yuv2packedX = yuv2abgr32_full_X_c;
2182 #if CONFIG_SWSCALE_ALPHA
2184 *yuv2packedX = yuv2abgr32_full_X_c;
2186 #endif /* CONFIG_SWSCALE_ALPHA */
2188 *yuv2packedX = yuv2xbgr32_full_X_c;
2190 #endif /* !CONFIG_SMALL */
2193 *yuv2packedX = yuv2rgb24_full_X_c;
2196 *yuv2packedX = yuv2bgr24_full_X_c;
2200 switch (dstFormat) {
2201 case PIX_FMT_RGB48LE:
2202 *yuv2packed1 = yuv2rgb48le_1_c;
2203 *yuv2packed2 = yuv2rgb48le_2_c;
2204 *yuv2packedX = yuv2rgb48le_X_c;
2206 case PIX_FMT_RGB48BE:
2207 *yuv2packed1 = yuv2rgb48be_1_c;
2208 *yuv2packed2 = yuv2rgb48be_2_c;
2209 *yuv2packedX = yuv2rgb48be_X_c;
2211 case PIX_FMT_BGR48LE:
2212 *yuv2packed1 = yuv2bgr48le_1_c;
2213 *yuv2packed2 = yuv2bgr48le_2_c;
2214 *yuv2packedX = yuv2bgr48le_X_c;
2216 case PIX_FMT_BGR48BE:
2217 *yuv2packed1 = yuv2bgr48be_1_c;
2218 *yuv2packed2 = yuv2bgr48be_2_c;
2219 *yuv2packedX = yuv2bgr48be_X_c;
2224 *yuv2packed1 = yuv2rgb32_1_c;
2225 *yuv2packed2 = yuv2rgb32_2_c;
2226 *yuv2packedX = yuv2rgb32_X_c;
2228 #if CONFIG_SWSCALE_ALPHA
2230 *yuv2packed1 = yuv2rgba32_1_c;
2231 *yuv2packed2 = yuv2rgba32_2_c;
2232 *yuv2packedX = yuv2rgba32_X_c;
2234 #endif /* CONFIG_SWSCALE_ALPHA */
2236 *yuv2packed1 = yuv2rgbx32_1_c;
2237 *yuv2packed2 = yuv2rgbx32_2_c;
2238 *yuv2packedX = yuv2rgbx32_X_c;
2240 #endif /* !CONFIG_SMALL */
2242 case PIX_FMT_RGB32_1:
2243 case PIX_FMT_BGR32_1:
2245 *yuv2packed1 = yuv2rgb32_1_1_c;
2246 *yuv2packed2 = yuv2rgb32_1_2_c;
2247 *yuv2packedX = yuv2rgb32_1_X_c;
2249 #if CONFIG_SWSCALE_ALPHA
2251 *yuv2packed1 = yuv2rgba32_1_1_c;
2252 *yuv2packed2 = yuv2rgba32_1_2_c;
2253 *yuv2packedX = yuv2rgba32_1_X_c;
2255 #endif /* CONFIG_SWSCALE_ALPHA */
2257 *yuv2packed1 = yuv2rgbx32_1_1_c;
2258 *yuv2packed2 = yuv2rgbx32_1_2_c;
2259 *yuv2packedX = yuv2rgbx32_1_X_c;
2261 #endif /* !CONFIG_SMALL */
2264 *yuv2packed1 = yuv2rgb24_1_c;
2265 *yuv2packed2 = yuv2rgb24_2_c;
2266 *yuv2packedX = yuv2rgb24_X_c;
2269 *yuv2packed1 = yuv2bgr24_1_c;
2270 *yuv2packed2 = yuv2bgr24_2_c;
2271 *yuv2packedX = yuv2bgr24_X_c;
2273 case PIX_FMT_RGB565LE:
2274 case PIX_FMT_RGB565BE:
2275 case PIX_FMT_BGR565LE:
2276 case PIX_FMT_BGR565BE:
2277 *yuv2packed1 = yuv2rgb16_1_c;
2278 *yuv2packed2 = yuv2rgb16_2_c;
2279 *yuv2packedX = yuv2rgb16_X_c;
2281 case PIX_FMT_RGB555LE:
2282 case PIX_FMT_RGB555BE:
2283 case PIX_FMT_BGR555LE:
2284 case PIX_FMT_BGR555BE:
2285 *yuv2packed1 = yuv2rgb15_1_c;
2286 *yuv2packed2 = yuv2rgb15_2_c;
2287 *yuv2packedX = yuv2rgb15_X_c;
2289 case PIX_FMT_RGB444LE:
2290 case PIX_FMT_RGB444BE:
2291 case PIX_FMT_BGR444LE:
2292 case PIX_FMT_BGR444BE:
2293 *yuv2packed1 = yuv2rgb12_1_c;
2294 *yuv2packed2 = yuv2rgb12_2_c;
2295 *yuv2packedX = yuv2rgb12_X_c;
2299 *yuv2packed1 = yuv2rgb8_1_c;
2300 *yuv2packed2 = yuv2rgb8_2_c;
2301 *yuv2packedX = yuv2rgb8_X_c;
2305 *yuv2packed1 = yuv2rgb4_1_c;
2306 *yuv2packed2 = yuv2rgb4_2_c;
2307 *yuv2packedX = yuv2rgb4_X_c;
2309 case PIX_FMT_RGB4_BYTE:
2310 case PIX_FMT_BGR4_BYTE:
2311 *yuv2packed1 = yuv2rgb4b_1_c;
2312 *yuv2packed2 = yuv2rgb4b_2_c;
2313 *yuv2packedX = yuv2rgb4b_X_c;
2317 switch (dstFormat) {
2318 case PIX_FMT_GRAY16BE:
2319 *yuv2packed1 = yuv2gray16BE_1_c;
2320 *yuv2packed2 = yuv2gray16BE_2_c;
2321 *yuv2packedX = yuv2gray16BE_X_c;
2323 case PIX_FMT_GRAY16LE:
2324 *yuv2packed1 = yuv2gray16LE_1_c;
2325 *yuv2packed2 = yuv2gray16LE_2_c;
2326 *yuv2packedX = yuv2gray16LE_X_c;
2328 case PIX_FMT_MONOWHITE:
2329 *yuv2packed1 = yuv2monowhite_1_c;
2330 *yuv2packed2 = yuv2monowhite_2_c;
2331 *yuv2packedX = yuv2monowhite_X_c;
2333 case PIX_FMT_MONOBLACK:
2334 *yuv2packed1 = yuv2monoblack_1_c;
2335 *yuv2packed2 = yuv2monoblack_2_c;
2336 *yuv2packedX = yuv2monoblack_X_c;
2338 case PIX_FMT_YUYV422:
2339 *yuv2packed1 = yuv2yuyv422_1_c;
2340 *yuv2packed2 = yuv2yuyv422_2_c;
2341 *yuv2packedX = yuv2yuyv422_X_c;
2343 case PIX_FMT_UYVY422:
2344 *yuv2packed1 = yuv2uyvy422_1_c;
2345 *yuv2packed2 = yuv2uyvy422_2_c;
2346 *yuv2packedX = yuv2uyvy422_X_c;
2351 #define DEBUG_SWSCALE_BUFFERS 0
2352 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2354 static int swScale(SwsContext *c, const uint8_t* src[],
2355 int srcStride[], int srcSliceY,
2356 int srcSliceH, uint8_t* dst[], int dstStride[])
2358 /* load a few things into local vars to make the code more readable? and faster */
2359 const int srcW= c->srcW;
2360 const int dstW= c->dstW;
2361 const int dstH= c->dstH;
2362 const int chrDstW= c->chrDstW;
2363 const int chrSrcW= c->chrSrcW;
2364 const int lumXInc= c->lumXInc;
2365 const int chrXInc= c->chrXInc;
2366 const enum PixelFormat dstFormat= c->dstFormat;
2367 const int flags= c->flags;
2368 int16_t *vLumFilterPos= c->vLumFilterPos;
2369 int16_t *vChrFilterPos= c->vChrFilterPos;
2370 int16_t *hLumFilterPos= c->hLumFilterPos;
2371 int16_t *hChrFilterPos= c->hChrFilterPos;
2372 int16_t *vLumFilter= c->vLumFilter;
2373 int16_t *vChrFilter= c->vChrFilter;
2374 int16_t *hLumFilter= c->hLumFilter;
2375 int16_t *hChrFilter= c->hChrFilter;
2376 int32_t *lumMmxFilter= c->lumMmxFilter;
2377 int32_t *chrMmxFilter= c->chrMmxFilter;
2378 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2379 const int vLumFilterSize= c->vLumFilterSize;
2380 const int vChrFilterSize= c->vChrFilterSize;
2381 const int hLumFilterSize= c->hLumFilterSize;
2382 const int hChrFilterSize= c->hChrFilterSize;
2383 int16_t **lumPixBuf= c->lumPixBuf;
2384 int16_t **chrUPixBuf= c->chrUPixBuf;
2385 int16_t **chrVPixBuf= c->chrVPixBuf;
2386 int16_t **alpPixBuf= c->alpPixBuf;
2387 const int vLumBufSize= c->vLumBufSize;
2388 const int vChrBufSize= c->vChrBufSize;
2389 uint8_t *formatConvBuffer= c->formatConvBuffer;
2390 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2391 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2393 uint32_t *pal=c->pal_yuv;
2394 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2395 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2396 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2397 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2398 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2399 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2400 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2402 /* vars which will change and which we need to store back in the context */
2404 int lumBufIndex= c->lumBufIndex;
2405 int chrBufIndex= c->chrBufIndex;
2406 int lastInLumBuf= c->lastInLumBuf;
2407 int lastInChrBuf= c->lastInChrBuf;
2409 if (isPacked(c->srcFormat)) {
2417 srcStride[3]= srcStride[0];
2419 srcStride[1]<<= c->vChrDrop;
2420 srcStride[2]<<= c->vChrDrop;
2422 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2423 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2424 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2425 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2426 srcSliceY, srcSliceH, dstY, dstH);
2427 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2428 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2430 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2431 static int warnedAlready=0; //FIXME move this into the context perhaps
2432 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2433 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2434 " ->cannot do aligned memory accesses anymore\n");
2439 /* Note the user might start scaling the picture in the middle so this
2440 will not get executed. This is not really intended but works
2441 currently, so people might do it. */
2442 if (srcSliceY ==0) {
2450 if (!should_dither) {
2451 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2455 for (;dstY < dstH; dstY++) {
2456 const int chrDstY= dstY>>c->chrDstVSubSample;
2457 uint8_t *dest[4] = {
2458 dst[0] + dstStride[0] * dstY,
2459 dst[1] + dstStride[1] * chrDstY,
2460 dst[2] + dstStride[2] * chrDstY,
2461 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2464 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2465 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2466 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2468 // Last line needed as input
2469 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
2470 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
2471 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
2474 //handle holes (FAST_BILINEAR & weird filters)
2475 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2476 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2477 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2478 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2480 DEBUG_BUFFERS("dstY: %d\n", dstY);
2481 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2482 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2483 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2484 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2486 // Do we have enough lines in this slice to output the dstY line
2487 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2489 if (!enough_lines) {
2490 lastLumSrcY = srcSliceY + srcSliceH - 1;
2491 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2492 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2493 lastLumSrcY, lastChrSrcY);
2496 //Do horizontal scaling
2497 while(lastInLumBuf < lastLumSrcY) {
2498 const uint8_t *src1[4] = {
2499 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2500 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2501 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2502 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2505 assert(lumBufIndex < 2*vLumBufSize);
2506 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2507 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2508 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2509 hLumFilter, hLumFilterPos, hLumFilterSize,
2512 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2513 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2514 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2518 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2519 lumBufIndex, lastInLumBuf);
2521 while(lastInChrBuf < lastChrSrcY) {
2522 const uint8_t *src1[4] = {
2523 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2524 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2525 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2526 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2529 assert(chrBufIndex < 2*vChrBufSize);
2530 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2531 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2532 //FIXME replace parameters through context struct (some at least)
2534 if (c->needs_hcscale)
2535 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2536 chrDstW, src1, chrSrcW, chrXInc,
2537 hChrFilter, hChrFilterPos, hChrFilterSize,
2538 formatConvBuffer, pal);
2540 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2541 chrBufIndex, lastInChrBuf);
2543 //wrap buf index around to stay inside the ring buffer
2544 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2545 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2547 break; //we can't output a dstY line so let's try with the next slice
2550 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2552 if (should_dither) {
2553 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2554 c->lumDither8 = dither_8x8_128[dstY & 7];
2556 if (dstY >= dstH-2) {
2557 // hmm looks like we can't use MMX here without overwriting this array's tail
2558 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2559 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2563 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2564 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2565 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2566 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2568 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2569 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2570 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2571 for (i = 0; i < neg; i++)
2572 tmpY[i] = lumSrcPtr[neg];
2573 for ( ; i < end; i++)
2574 tmpY[i] = lumSrcPtr[i];
2575 for ( ; i < vLumFilterSize; i++)
2576 tmpY[i] = tmpY[i-1];
2580 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2581 for (i = 0; i < neg; i++)
2582 tmpA[i] = alpSrcPtr[neg];
2583 for ( ; i < end; i++)
2584 tmpA[i] = alpSrcPtr[i];
2585 for ( ; i < vLumFilterSize; i++)
2586 tmpA[i] = tmpA[i - 1];
2590 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2591 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2592 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2593 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2594 for (i = 0; i < neg; i++) {
2595 tmpU[i] = chrUSrcPtr[neg];
2596 tmpV[i] = chrVSrcPtr[neg];
2598 for ( ; i < end; i++) {
2599 tmpU[i] = chrUSrcPtr[i];
2600 tmpV[i] = chrVSrcPtr[i];
2602 for ( ; i < vChrFilterSize; i++) {
2603 tmpU[i] = tmpU[i - 1];
2604 tmpV[i] = tmpV[i - 1];
2610 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2611 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2613 if (vLumFilterSize == 1) {
2614 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2616 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2617 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2620 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2622 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2623 } else if (vChrFilterSize == 1) {
2624 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2625 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2627 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2628 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2629 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2630 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2634 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2635 if (vLumFilterSize == 1) {
2636 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2638 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2639 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2643 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2644 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2645 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2646 int chrAlpha = vChrFilter[2 * dstY + 1];
2647 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2648 alpPixBuf ? *alpSrcPtr : NULL,
2649 dest[0], dstW, chrAlpha, dstY);
2650 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2651 int lumAlpha = vLumFilter[2 * dstY + 1];
2652 int chrAlpha = vChrFilter[2 * dstY + 1];
2654 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2656 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2657 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2658 alpPixBuf ? alpSrcPtr : NULL,
2659 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2660 } else { //general RGB
2661 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2662 lumSrcPtr, vLumFilterSize,
2663 vChrFilter + dstY * vChrFilterSize,
2664 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2665 alpSrcPtr, dest[0], dstW, dstY);
2671 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2672 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2675 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2676 __asm__ volatile("sfence":::"memory");
2680 /* store changed local vars back in the context */
2682 c->lumBufIndex= lumBufIndex;
2683 c->chrBufIndex= chrBufIndex;
2684 c->lastInLumBuf= lastInLumBuf;
2685 c->lastInChrBuf= lastInChrBuf;
2687 return dstY - lastDstY;
2690 static av_cold void sws_init_swScale_c(SwsContext *c)
2692 enum PixelFormat srcFormat = c->srcFormat;
2694 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2695 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2698 c->chrToYV12 = NULL;
2700 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2701 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2702 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2703 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2707 case PIX_FMT_BGR4_BYTE:
2708 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2709 case PIX_FMT_GBRP9LE:
2710 case PIX_FMT_GBRP10LE:
2711 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2712 case PIX_FMT_GBRP9BE:
2713 case PIX_FMT_GBRP10BE:
2714 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2715 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2717 case PIX_FMT_YUV444P9LE:
2718 case PIX_FMT_YUV422P9LE:
2719 case PIX_FMT_YUV420P9LE:
2720 case PIX_FMT_YUV422P10LE:
2721 case PIX_FMT_YUV444P10LE:
2722 case PIX_FMT_YUV420P10LE:
2723 case PIX_FMT_YUV420P16LE:
2724 case PIX_FMT_YUV422P16LE:
2725 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2727 case PIX_FMT_YUV444P9BE:
2728 case PIX_FMT_YUV422P9BE:
2729 case PIX_FMT_YUV420P9BE:
2730 case PIX_FMT_YUV444P10BE:
2731 case PIX_FMT_YUV422P10BE:
2732 case PIX_FMT_YUV420P10BE:
2733 case PIX_FMT_YUV420P16BE:
2734 case PIX_FMT_YUV422P16BE:
2735 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2738 if (c->chrSrcHSubSample) {
2740 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2741 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2742 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2743 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2744 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2745 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2746 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2747 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2748 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2749 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2750 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2751 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2752 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2753 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2754 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2755 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2756 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2757 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2761 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2762 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2763 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2764 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2765 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2766 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2767 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2768 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2769 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2770 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2771 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2772 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2773 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2774 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2775 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2776 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2777 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2778 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2782 c->lumToYV12 = NULL;
2783 c->alpToYV12 = NULL;
2784 switch (srcFormat) {
2785 case PIX_FMT_GBRP9LE:
2786 case PIX_FMT_GBRP10LE:
2787 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2788 case PIX_FMT_GBRP9BE:
2789 case PIX_FMT_GBRP10BE:
2790 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2791 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2793 case PIX_FMT_YUV444P9LE:
2794 case PIX_FMT_YUV422P9LE:
2795 case PIX_FMT_YUV420P9LE:
2796 case PIX_FMT_YUV444P10LE:
2797 case PIX_FMT_YUV422P10LE:
2798 case PIX_FMT_YUV420P10LE:
2799 case PIX_FMT_YUV420P16LE:
2800 case PIX_FMT_YUV422P16LE:
2801 case PIX_FMT_YUV444P16LE:
2802 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2804 case PIX_FMT_YUV444P9BE:
2805 case PIX_FMT_YUV422P9BE:
2806 case PIX_FMT_YUV420P9BE:
2807 case PIX_FMT_YUV444P10BE:
2808 case PIX_FMT_YUV422P10BE:
2809 case PIX_FMT_YUV420P10BE:
2810 case PIX_FMT_YUV420P16BE:
2811 case PIX_FMT_YUV422P16BE:
2812 case PIX_FMT_YUV444P16BE:
2813 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2815 case PIX_FMT_YUYV422 :
2816 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2817 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2818 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2819 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2820 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2821 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2822 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2823 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2824 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2825 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2826 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2827 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2831 case PIX_FMT_BGR4_BYTE:
2832 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2833 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2834 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2835 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2836 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2837 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2838 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2839 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2840 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2841 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2842 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2845 switch (srcFormat) {
2847 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2849 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2850 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2854 if (c->srcBpc == 8) {
2855 if (c->dstBpc <= 10) {
2856 c->hyScale = c->hcScale = hScale8To15_c;
2857 if (c->flags & SWS_FAST_BILINEAR) {
2858 c->hyscale_fast = hyscale_fast_c;
2859 c->hcscale_fast = hcscale_fast_c;
2862 c->hyScale = c->hcScale = hScale8To19_c;
2865 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2868 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2869 if (c->dstBpc <= 10) {
2871 c->lumConvertRange = lumRangeFromJpeg_c;
2872 c->chrConvertRange = chrRangeFromJpeg_c;
2874 c->lumConvertRange = lumRangeToJpeg_c;
2875 c->chrConvertRange = chrRangeToJpeg_c;
2879 c->lumConvertRange = lumRangeFromJpeg16_c;
2880 c->chrConvertRange = chrRangeFromJpeg16_c;
2882 c->lumConvertRange = lumRangeToJpeg16_c;
2883 c->chrConvertRange = chrRangeToJpeg16_c;
2888 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2889 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2890 c->needs_hcscale = 1;
2893 SwsFunc ff_getSwsFunc(SwsContext *c)
2895 sws_init_swScale_c(c);
2898 ff_sws_init_swScale_mmx(c);
2900 ff_sws_init_swScale_altivec(c);