2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
165 #define output_pixel(pos, val, bias, signedness) \
167 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
169 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
172 static av_always_inline void
173 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174 int big_endian, int output_bits)
177 int shift = 19 - output_bits;
179 for (i = 0; i < dstW; i++) {
180 int val = src[i] + (1 << (shift - 1));
181 output_pixel(&dest[i], val, 0, uint);
185 static av_always_inline void
186 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187 const int32_t **src, uint16_t *dest, int dstW,
188 int big_endian, int output_bits)
191 int shift = 15 + 16 - output_bits;
193 for (i = 0; i < dstW; i++) {
194 int val = 1 << (30-output_bits);
197 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198 * filters (or anything with negative coeffs, the range can be slightly
199 * wider in both directions. To account for this overflow, we subtract
200 * a constant so it always fits in the signed range (assuming a
201 * reasonable filterSize), and re-add that at the end. */
203 for (j = 0; j < filterSize; j++)
204 val += src[j][i] * filter[j];
206 output_pixel(&dest[i], val, 0x8000, int);
212 #define output_pixel(pos, val) \
214 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
216 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
219 static av_always_inline void
220 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = src[i] + (1 << (shift - 1));
228 output_pixel(&dest[i], val);
232 static av_always_inline void
233 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234 const int16_t **src, uint16_t *dest, int dstW,
235 int big_endian, int output_bits)
238 int shift = 11 + 16 - output_bits;
240 for (i = 0; i < dstW; i++) {
241 int val = 1 << (26-output_bits);
244 for (j = 0; j < filterSize; j++)
245 val += src[j][i] * filter[j];
247 output_pixel(&dest[i], val);
253 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255 uint8_t *dest, int dstW, \
256 const uint8_t *dither, int offset)\
258 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259 (uint16_t *) dest, dstW, is_be, bits); \
261 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262 const int16_t **src, uint8_t *dest, int dstW, \
263 const uint8_t *dither, int offset)\
265 yuv2planeX_## template_size ## _c_template(filter, \
266 filterSize, (const typeX_t **) src, \
267 (uint16_t *) dest, dstW, is_be, bits); \
269 yuv2NBPS( 9, BE, 1, 10, int16_t)
270 yuv2NBPS( 9, LE, 0, 10, int16_t)
271 yuv2NBPS(10, BE, 1, 10, int16_t)
272 yuv2NBPS(10, LE, 0, 10, int16_t)
273 yuv2NBPS(16, BE, 1, 16, int32_t)
274 yuv2NBPS(16, LE, 0, 16, int32_t)
276 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277 const int16_t **src, uint8_t *dest, int dstW,
278 const uint8_t *dither, int offset)
281 for (i=0; i<dstW; i++) {
282 int val = dither[(i + offset) & 7] << 12;
284 for (j=0; j<filterSize; j++)
285 val += src[j][i] * filter[j];
287 dest[i]= av_clip_uint8(val>>19);
291 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292 const uint8_t *dither, int offset)
295 for (i=0; i<dstW; i++) {
296 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297 dest[i]= av_clip_uint8(val);
301 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302 const int16_t **chrUSrc, const int16_t **chrVSrc,
303 uint8_t *dest, int chrDstW)
305 enum PixelFormat dstFormat = c->dstFormat;
306 const uint8_t *chrDither = c->chrDither8;
309 if (dstFormat == PIX_FMT_NV12)
310 for (i=0; i<chrDstW; i++) {
311 int u = chrDither[i & 7] << 12;
312 int v = chrDither[(i + 3) & 7] << 12;
314 for (j=0; j<chrFilterSize; j++) {
315 u += chrUSrc[j][i] * chrFilter[j];
316 v += chrVSrc[j][i] * chrFilter[j];
319 dest[2*i]= av_clip_uint8(u>>19);
320 dest[2*i+1]= av_clip_uint8(v>>19);
323 for (i=0; i<chrDstW; i++) {
324 int u = chrDither[i & 7] << 12;
325 int v = chrDither[(i + 3) & 7] << 12;
327 for (j=0; j<chrFilterSize; j++) {
328 u += chrUSrc[j][i] * chrFilter[j];
329 v += chrVSrc[j][i] * chrFilter[j];
332 dest[2*i]= av_clip_uint8(v>>19);
333 dest[2*i+1]= av_clip_uint8(u>>19);
337 #define output_pixel(pos, val) \
338 if (target == PIX_FMT_GRAY16BE) { \
344 static av_always_inline void
345 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346 const int32_t **lumSrc, int lumFilterSize,
347 const int16_t *chrFilter, const int32_t **chrUSrc,
348 const int32_t **chrVSrc, int chrFilterSize,
349 const int32_t **alpSrc, uint16_t *dest, int dstW,
350 int y, enum PixelFormat target)
354 for (i = 0; i < (dstW >> 1); i++) {
356 int Y1 = (1 << 14) - 0x40000000;
357 int Y2 = (1 << 14) - 0x40000000;
359 for (j = 0; j < lumFilterSize; j++) {
360 Y1 += lumSrc[j][i * 2] * lumFilter[j];
361 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
365 Y1 = av_clip_int16(Y1);
366 Y2 = av_clip_int16(Y2);
367 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
372 static av_always_inline void
373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374 const int32_t *ubuf[2], const int32_t *vbuf[2],
375 const int32_t *abuf[2], uint16_t *dest, int dstW,
376 int yalpha, int uvalpha, int y,
377 enum PixelFormat target)
379 int yalpha1 = 4095 - yalpha;
381 const int32_t *buf0 = buf[0], *buf1 = buf[1];
383 for (i = 0; i < (dstW >> 1); i++) {
384 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
387 output_pixel(&dest[i * 2 + 0], Y1);
388 output_pixel(&dest[i * 2 + 1], Y2);
392 static av_always_inline void
393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394 const int32_t *ubuf[2], const int32_t *vbuf[2],
395 const int32_t *abuf0, uint16_t *dest, int dstW,
396 int uvalpha, int y, enum PixelFormat target)
400 for (i = 0; i < (dstW >> 1); i++) {
401 int Y1 = buf0[i * 2 ] << 1;
402 int Y2 = buf0[i * 2 + 1] << 1;
404 output_pixel(&dest[i * 2 + 0], Y1);
405 output_pixel(&dest[i * 2 + 1], Y2);
411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413 const int16_t **_lumSrc, int lumFilterSize, \
414 const int16_t *chrFilter, const int16_t **_chrUSrc, \
415 const int16_t **_chrVSrc, int chrFilterSize, \
416 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
419 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420 **chrUSrc = (const int32_t **) _chrUSrc, \
421 **chrVSrc = (const int32_t **) _chrVSrc, \
422 **alpSrc = (const int32_t **) _alpSrc; \
423 uint16_t *dest = (uint16_t *) _dest; \
424 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426 alpSrc, dest, dstW, y, fmt); \
429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432 int yalpha, int uvalpha, int y) \
434 const int32_t **buf = (const int32_t **) _buf, \
435 **ubuf = (const int32_t **) _ubuf, \
436 **vbuf = (const int32_t **) _vbuf, \
437 **abuf = (const int32_t **) _abuf; \
438 uint16_t *dest = (uint16_t *) _dest; \
439 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440 dest, dstW, yalpha, uvalpha, y, fmt); \
443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446 int uvalpha, int y) \
448 const int32_t *buf0 = (const int32_t *) _buf0, \
449 **ubuf = (const int32_t **) _ubuf, \
450 **vbuf = (const int32_t **) _vbuf, \
451 *abuf0 = (const int32_t *) _abuf0; \
452 uint16_t *dest = (uint16_t *) _dest; \
453 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454 dstW, uvalpha, y, fmt); \
457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
460 #define output_pixel(pos, acc) \
461 if (target == PIX_FMT_MONOBLACK) { \
467 static av_always_inline void
468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469 const int16_t **lumSrc, int lumFilterSize,
470 const int16_t *chrFilter, const int16_t **chrUSrc,
471 const int16_t **chrVSrc, int chrFilterSize,
472 const int16_t **alpSrc, uint8_t *dest, int dstW,
473 int y, enum PixelFormat target)
475 const uint8_t * const d128=dither_8x8_220[y&7];
476 uint8_t *g = c->table_gU[128] + c->table_gV[128];
480 for (i = 0; i < dstW - 1; i += 2) {
485 for (j = 0; j < lumFilterSize; j++) {
486 Y1 += lumSrc[j][i] * lumFilter[j];
487 Y2 += lumSrc[j][i+1] * lumFilter[j];
491 if ((Y1 | Y2) & 0x100) {
492 Y1 = av_clip_uint8(Y1);
493 Y2 = av_clip_uint8(Y2);
495 acc += acc + g[Y1 + d128[(i + 0) & 7]];
496 acc += acc + g[Y2 + d128[(i + 1) & 7]];
498 output_pixel(*dest++, acc);
503 static av_always_inline void
504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505 const int16_t *ubuf[2], const int16_t *vbuf[2],
506 const int16_t *abuf[2], uint8_t *dest, int dstW,
507 int yalpha, int uvalpha, int y,
508 enum PixelFormat target)
510 const int16_t *buf0 = buf[0], *buf1 = buf[1];
511 const uint8_t * const d128 = dither_8x8_220[y & 7];
512 uint8_t *g = c->table_gU[128] + c->table_gV[128];
513 int yalpha1 = 4095 - yalpha;
516 for (i = 0; i < dstW - 7; i += 8) {
517 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525 output_pixel(*dest++, acc);
529 static av_always_inline void
530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531 const int16_t *ubuf[2], const int16_t *vbuf[2],
532 const int16_t *abuf0, uint8_t *dest, int dstW,
533 int uvalpha, int y, enum PixelFormat target)
535 const uint8_t * const d128 = dither_8x8_220[y & 7];
536 uint8_t *g = c->table_gU[128] + c->table_gV[128];
539 for (i = 0; i < dstW - 7; i += 8) {
540 int acc = g[(buf0[i ] >> 7) + d128[0]];
541 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548 output_pixel(*dest++, acc);
554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556 const int16_t **lumSrc, int lumFilterSize, \
557 const int16_t *chrFilter, const int16_t **chrUSrc, \
558 const int16_t **chrVSrc, int chrFilterSize, \
559 const int16_t **alpSrc, uint8_t *dest, int dstW, \
562 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564 alpSrc, dest, dstW, y, fmt); \
567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568 const int16_t *ubuf[2], const int16_t *vbuf[2], \
569 const int16_t *abuf[2], uint8_t *dest, int dstW, \
570 int yalpha, int uvalpha, int y) \
572 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573 dest, dstW, yalpha, uvalpha, y, fmt); \
576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577 const int16_t *ubuf[2], const int16_t *vbuf[2], \
578 const int16_t *abuf0, uint8_t *dest, int dstW, \
579 int uvalpha, int y) \
581 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582 abuf0, dest, dstW, uvalpha, \
586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
589 #define output_pixels(pos, Y1, U, Y2, V) \
590 if (target == PIX_FMT_YUYV422) { \
591 dest[pos + 0] = Y1; \
593 dest[pos + 2] = Y2; \
597 dest[pos + 1] = Y1; \
599 dest[pos + 3] = Y2; \
602 static av_always_inline void
603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604 const int16_t **lumSrc, int lumFilterSize,
605 const int16_t *chrFilter, const int16_t **chrUSrc,
606 const int16_t **chrVSrc, int chrFilterSize,
607 const int16_t **alpSrc, uint8_t *dest, int dstW,
608 int y, enum PixelFormat target)
612 for (i = 0; i < (dstW >> 1); i++) {
619 for (j = 0; j < lumFilterSize; j++) {
620 Y1 += lumSrc[j][i * 2] * lumFilter[j];
621 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
623 for (j = 0; j < chrFilterSize; j++) {
624 U += chrUSrc[j][i] * chrFilter[j];
625 V += chrVSrc[j][i] * chrFilter[j];
631 if ((Y1 | Y2 | U | V) & 0x100) {
632 Y1 = av_clip_uint8(Y1);
633 Y2 = av_clip_uint8(Y2);
634 U = av_clip_uint8(U);
635 V = av_clip_uint8(V);
637 output_pixels(4*i, Y1, U, Y2, V);
641 static av_always_inline void
642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643 const int16_t *ubuf[2], const int16_t *vbuf[2],
644 const int16_t *abuf[2], uint8_t *dest, int dstW,
645 int yalpha, int uvalpha, int y,
646 enum PixelFormat target)
648 const int16_t *buf0 = buf[0], *buf1 = buf[1],
649 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651 int yalpha1 = 4095 - yalpha;
652 int uvalpha1 = 4095 - uvalpha;
655 for (i = 0; i < (dstW >> 1); i++) {
656 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
661 output_pixels(i * 4, Y1, U, Y2, V);
665 static av_always_inline void
666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667 const int16_t *ubuf[2], const int16_t *vbuf[2],
668 const int16_t *abuf0, uint8_t *dest, int dstW,
669 int uvalpha, int y, enum PixelFormat target)
671 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
675 if (uvalpha < 2048) {
676 for (i = 0; i < (dstW >> 1); i++) {
677 int Y1 = buf0[i * 2] >> 7;
678 int Y2 = buf0[i * 2 + 1] >> 7;
679 int U = ubuf1[i] >> 7;
680 int V = vbuf1[i] >> 7;
682 output_pixels(i * 4, Y1, U, Y2, V);
685 for (i = 0; i < (dstW >> 1); i++) {
686 int Y1 = buf0[i * 2] >> 7;
687 int Y2 = buf0[i * 2 + 1] >> 7;
688 int U = (ubuf0[i] + ubuf1[i]) >> 8;
689 int V = (vbuf0[i] + vbuf1[i]) >> 8;
691 output_pixels(i * 4, Y1, U, Y2, V);
698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703 #define output_pixel(pos, val) \
704 if (isBE(target)) { \
710 static av_always_inline void
711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712 const int32_t **lumSrc, int lumFilterSize,
713 const int16_t *chrFilter, const int32_t **chrUSrc,
714 const int32_t **chrVSrc, int chrFilterSize,
715 const int32_t **alpSrc, uint16_t *dest, int dstW,
716 int y, enum PixelFormat target)
720 for (i = 0; i < (dstW >> 1); i++) {
722 int Y1 = -0x40000000;
723 int Y2 = -0x40000000;
724 int U = -128 << 23; // 19
728 for (j = 0; j < lumFilterSize; j++) {
729 Y1 += lumSrc[j][i * 2] * lumFilter[j];
730 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
732 for (j = 0; j < chrFilterSize; j++) {
733 U += chrUSrc[j][i] * chrFilter[j];
734 V += chrVSrc[j][i] * chrFilter[j];
737 // 8bit: 12+15=27; 16-bit: 12+19=31
745 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746 Y1 -= c->yuv2rgb_y_offset;
747 Y2 -= c->yuv2rgb_y_offset;
748 Y1 *= c->yuv2rgb_y_coeff;
749 Y2 *= c->yuv2rgb_y_coeff;
752 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
754 R = V * c->yuv2rgb_v2r_coeff;
755 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756 B = U * c->yuv2rgb_u2b_coeff;
758 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
769 static av_always_inline void
770 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771 const int32_t *ubuf[2], const int32_t *vbuf[2],
772 const int32_t *abuf[2], uint16_t *dest, int dstW,
773 int yalpha, int uvalpha, int y,
774 enum PixelFormat target)
776 const int32_t *buf0 = buf[0], *buf1 = buf[1],
777 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779 int yalpha1 = 4095 - yalpha;
780 int uvalpha1 = 4095 - uvalpha;
783 for (i = 0; i < (dstW >> 1); i++) {
784 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
790 Y1 -= c->yuv2rgb_y_offset;
791 Y2 -= c->yuv2rgb_y_offset;
792 Y1 *= c->yuv2rgb_y_coeff;
793 Y2 *= c->yuv2rgb_y_coeff;
797 R = V * c->yuv2rgb_v2r_coeff;
798 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799 B = U * c->yuv2rgb_u2b_coeff;
801 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
811 static av_always_inline void
812 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813 const int32_t *ubuf[2], const int32_t *vbuf[2],
814 const int32_t *abuf0, uint16_t *dest, int dstW,
815 int uvalpha, int y, enum PixelFormat target)
817 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
821 if (uvalpha < 2048) {
822 for (i = 0; i < (dstW >> 1); i++) {
823 int Y1 = (buf0[i * 2] ) >> 2;
824 int Y2 = (buf0[i * 2 + 1]) >> 2;
825 int U = (ubuf0[i] + (-128 << 11)) >> 2;
826 int V = (vbuf0[i] + (-128 << 11)) >> 2;
829 Y1 -= c->yuv2rgb_y_offset;
830 Y2 -= c->yuv2rgb_y_offset;
831 Y1 *= c->yuv2rgb_y_coeff;
832 Y2 *= c->yuv2rgb_y_coeff;
836 R = V * c->yuv2rgb_v2r_coeff;
837 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838 B = U * c->yuv2rgb_u2b_coeff;
840 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
849 for (i = 0; i < (dstW >> 1); i++) {
850 int Y1 = (buf0[i * 2] ) >> 2;
851 int Y2 = (buf0[i * 2 + 1]) >> 2;
852 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
856 Y1 -= c->yuv2rgb_y_offset;
857 Y2 -= c->yuv2rgb_y_offset;
858 Y1 *= c->yuv2rgb_y_coeff;
859 Y2 *= c->yuv2rgb_y_coeff;
863 R = V * c->yuv2rgb_v2r_coeff;
864 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865 B = U * c->yuv2rgb_u2b_coeff;
867 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
888 * Write out 2 RGB pixels in the target pixel format. This function takes a
889 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890 * things like endianness conversion and shifting. The caller takes care of
891 * setting the correct offset in these tables from the chroma (U/V) values.
892 * This function then uses the luminance (Y1/Y2) values to write out the
893 * correct RGB values into the destination buffer.
895 static av_always_inline void
896 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897 unsigned A1, unsigned A2,
898 const void *_r, const void *_g, const void *_b, int y,
899 enum PixelFormat target, int hasAlpha)
901 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903 uint32_t *dest = (uint32_t *) _dest;
904 const uint32_t *r = (const uint32_t *) _r;
905 const uint32_t *g = (const uint32_t *) _g;
906 const uint32_t *b = (const uint32_t *) _b;
909 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
911 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
915 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
917 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
920 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
924 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925 uint8_t *dest = (uint8_t *) _dest;
926 const uint8_t *r = (const uint8_t *) _r;
927 const uint8_t *g = (const uint8_t *) _g;
928 const uint8_t *b = (const uint8_t *) _b;
930 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
931 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
932 dest[i * 6 + 0] = r_b[Y1];
933 dest[i * 6 + 1] = g[Y1];
934 dest[i * 6 + 2] = b_r[Y1];
935 dest[i * 6 + 3] = r_b[Y2];
936 dest[i * 6 + 4] = g[Y2];
937 dest[i * 6 + 5] = b_r[Y2];
940 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943 uint16_t *dest = (uint16_t *) _dest;
944 const uint16_t *r = (const uint16_t *) _r;
945 const uint16_t *g = (const uint16_t *) _g;
946 const uint16_t *b = (const uint16_t *) _b;
947 int dr1, dg1, db1, dr2, dg2, db2;
949 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950 dr1 = dither_2x2_8[ y & 1 ][0];
951 dg1 = dither_2x2_4[ y & 1 ][0];
952 db1 = dither_2x2_8[(y & 1) ^ 1][0];
953 dr2 = dither_2x2_8[ y & 1 ][1];
954 dg2 = dither_2x2_4[ y & 1 ][1];
955 db2 = dither_2x2_8[(y & 1) ^ 1][1];
956 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957 dr1 = dither_2x2_8[ y & 1 ][0];
958 dg1 = dither_2x2_8[ y & 1 ][1];
959 db1 = dither_2x2_8[(y & 1) ^ 1][0];
960 dr2 = dither_2x2_8[ y & 1 ][1];
961 dg2 = dither_2x2_8[ y & 1 ][0];
962 db2 = dither_2x2_8[(y & 1) ^ 1][1];
964 dr1 = dither_4x4_16[ y & 3 ][0];
965 dg1 = dither_4x4_16[ y & 3 ][1];
966 db1 = dither_4x4_16[(y & 3) ^ 3][0];
967 dr2 = dither_4x4_16[ y & 3 ][1];
968 dg2 = dither_4x4_16[ y & 3 ][0];
969 db2 = dither_4x4_16[(y & 3) ^ 3][1];
972 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974 } else /* 8/4-bit */ {
975 uint8_t *dest = (uint8_t *) _dest;
976 const uint8_t *r = (const uint8_t *) _r;
977 const uint8_t *g = (const uint8_t *) _g;
978 const uint8_t *b = (const uint8_t *) _b;
979 int dr1, dg1, db1, dr2, dg2, db2;
981 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982 const uint8_t * const d64 = dither_8x8_73[y & 7];
983 const uint8_t * const d32 = dither_8x8_32[y & 7];
984 dr1 = dg1 = d32[(i * 2 + 0) & 7];
985 db1 = d64[(i * 2 + 0) & 7];
986 dr2 = dg2 = d32[(i * 2 + 1) & 7];
987 db2 = d64[(i * 2 + 1) & 7];
989 const uint8_t * const d64 = dither_8x8_73 [y & 7];
990 const uint8_t * const d128 = dither_8x8_220[y & 7];
991 dr1 = db1 = d128[(i * 2 + 0) & 7];
992 dg1 = d64[(i * 2 + 0) & 7];
993 dr2 = db2 = d128[(i * 2 + 1) & 7];
994 dg2 = d64[(i * 2 + 1) & 7];
997 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1001 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1007 static av_always_inline void
1008 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009 const int16_t **lumSrc, int lumFilterSize,
1010 const int16_t *chrFilter, const int16_t **chrUSrc,
1011 const int16_t **chrVSrc, int chrFilterSize,
1012 const int16_t **alpSrc, uint8_t *dest, int dstW,
1013 int y, enum PixelFormat target, int hasAlpha)
1017 for (i = 0; i < (dstW >> 1); i++) {
1023 int av_unused A1, A2;
1024 const void *r, *g, *b;
1026 for (j = 0; j < lumFilterSize; j++) {
1027 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1028 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1030 for (j = 0; j < chrFilterSize; j++) {
1031 U += chrUSrc[j][i] * chrFilter[j];
1032 V += chrVSrc[j][i] * chrFilter[j];
1038 if ((Y1 | Y2 | U | V) & 0x100) {
1039 Y1 = av_clip_uint8(Y1);
1040 Y2 = av_clip_uint8(Y2);
1041 U = av_clip_uint8(U);
1042 V = av_clip_uint8(V);
1047 for (j = 0; j < lumFilterSize; j++) {
1048 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1049 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1053 if ((A1 | A2) & 0x100) {
1054 A1 = av_clip_uint8(A1);
1055 A2 = av_clip_uint8(A2);
1059 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1061 g = (c->table_gU[U] + c->table_gV[V]);
1064 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065 r, g, b, y, target, hasAlpha);
1069 static av_always_inline void
1070 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071 const int16_t *ubuf[2], const int16_t *vbuf[2],
1072 const int16_t *abuf[2], uint8_t *dest, int dstW,
1073 int yalpha, int uvalpha, int y,
1074 enum PixelFormat target, int hasAlpha)
1076 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1077 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079 *abuf0 = hasAlpha ? abuf[0] : NULL,
1080 *abuf1 = hasAlpha ? abuf[1] : NULL;
1081 int yalpha1 = 4095 - yalpha;
1082 int uvalpha1 = 4095 - uvalpha;
1085 for (i = 0; i < (dstW >> 1); i++) {
1086 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1087 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1088 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1089 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1091 const void *r = c->table_rV[V],
1092 *g = (c->table_gU[U] + c->table_gV[V]),
1093 *b = c->table_bU[U];
1096 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1097 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1100 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1101 r, g, b, y, target, hasAlpha);
1105 static av_always_inline void
1106 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1107 const int16_t *ubuf[2], const int16_t *vbuf[2],
1108 const int16_t *abuf0, uint8_t *dest, int dstW,
1109 int uvalpha, int y, enum PixelFormat target,
1112 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1113 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1116 if (uvalpha < 2048) {
1117 for (i = 0; i < (dstW >> 1); i++) {
1118 int Y1 = buf0[i * 2] >> 7;
1119 int Y2 = buf0[i * 2 + 1] >> 7;
1120 int U = ubuf1[i] >> 7;
1121 int V = vbuf1[i] >> 7;
1123 const void *r = c->table_rV[V],
1124 *g = (c->table_gU[U] + c->table_gV[V]),
1125 *b = c->table_bU[U];
1128 A1 = abuf0[i * 2 ] >> 7;
1129 A2 = abuf0[i * 2 + 1] >> 7;
1132 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1133 r, g, b, y, target, hasAlpha);
1136 for (i = 0; i < (dstW >> 1); i++) {
1137 int Y1 = buf0[i * 2] >> 7;
1138 int Y2 = buf0[i * 2 + 1] >> 7;
1139 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1140 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1142 const void *r = c->table_rV[V],
1143 *g = (c->table_gU[U] + c->table_gV[V]),
1144 *b = c->table_bU[U];
1147 A1 = abuf0[i * 2 ] >> 7;
1148 A2 = abuf0[i * 2 + 1] >> 7;
1151 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152 r, g, b, y, target, hasAlpha);
1157 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159 const int16_t **lumSrc, int lumFilterSize, \
1160 const int16_t *chrFilter, const int16_t **chrUSrc, \
1161 const int16_t **chrVSrc, int chrFilterSize, \
1162 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1165 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1169 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174 int yalpha, int uvalpha, int y) \
1176 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1180 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182 const int16_t *abuf0, uint8_t *dest, int dstW, \
1183 int uvalpha, int y) \
1185 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186 dstW, uvalpha, y, fmt, hasAlpha); \
1190 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1191 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1193 #if CONFIG_SWSCALE_ALPHA
1194 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1195 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1197 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1198 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1200 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1201 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1202 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1203 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1204 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1205 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1206 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1207 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1209 static av_always_inline void
1210 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1211 const int16_t **lumSrc, int lumFilterSize,
1212 const int16_t *chrFilter, const int16_t **chrUSrc,
1213 const int16_t **chrVSrc, int chrFilterSize,
1214 const int16_t **alpSrc, uint8_t *dest,
1215 int dstW, int y, enum PixelFormat target, int hasAlpha)
1218 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1220 for (i = 0; i < dstW; i++) {
1228 for (j = 0; j < lumFilterSize; j++) {
1229 Y += lumSrc[j][i] * lumFilter[j];
1231 for (j = 0; j < chrFilterSize; j++) {
1232 U += chrUSrc[j][i] * chrFilter[j];
1233 V += chrVSrc[j][i] * chrFilter[j];
1240 for (j = 0; j < lumFilterSize; j++) {
1241 A += alpSrc[j][i] * lumFilter[j];
1245 A = av_clip_uint8(A);
1247 Y -= c->yuv2rgb_y_offset;
1248 Y *= c->yuv2rgb_y_coeff;
1250 R = Y + V*c->yuv2rgb_v2r_coeff;
1251 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252 B = Y + U*c->yuv2rgb_u2b_coeff;
1253 if ((R | G | B) & 0xC0000000) {
1254 R = av_clip_uintp2(R, 30);
1255 G = av_clip_uintp2(G, 30);
1256 B = av_clip_uintp2(B, 30);
1261 dest[0] = hasAlpha ? A : 255;
1275 dest[3] = hasAlpha ? A : 255;
1278 dest[0] = hasAlpha ? A : 255;
1293 dest[3] = hasAlpha ? A : 255;
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1306 #if CONFIG_SWSCALE_ALPHA
1307 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1308 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1309 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1310 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1312 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1313 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1314 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1315 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1317 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1318 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1320 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1321 int width, int height,
1325 uint8_t *ptr = plane + stride*y;
1326 for (i=0; i<height; i++) {
1327 memset(ptr, val, width);
1332 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1334 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1335 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1337 static av_always_inline void
1338 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1339 enum PixelFormat origin)
1342 for (i = 0; i < width; i++) {
1343 unsigned int r_b = input_pixel(&src[i*3+0]);
1344 unsigned int g = input_pixel(&src[i*3+1]);
1345 unsigned int b_r = input_pixel(&src[i*3+2]);
1347 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1351 static av_always_inline void
1352 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1353 const uint16_t *src1, const uint16_t *src2,
1354 int width, enum PixelFormat origin)
1358 for (i = 0; i < width; i++) {
1359 int r_b = input_pixel(&src1[i*3+0]);
1360 int g = input_pixel(&src1[i*3+1]);
1361 int b_r = input_pixel(&src1[i*3+2]);
1363 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1368 static av_always_inline void
1369 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1370 const uint16_t *src1, const uint16_t *src2,
1371 int width, enum PixelFormat origin)
1375 for (i = 0; i < width; i++) {
1376 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1377 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1378 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1380 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 #define rgb48funcs(pattern, BE_LE, origin) \
1390 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1391 int width, uint32_t *unused) \
1393 const uint16_t *src = (const uint16_t *) _src; \
1394 uint16_t *dst = (uint16_t *) _dst; \
1395 rgb48ToY_c_template(dst, src, width, origin); \
1398 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1399 const uint8_t *_src1, const uint8_t *_src2, \
1400 int width, uint32_t *unused) \
1402 const uint16_t *src1 = (const uint16_t *) _src1, \
1403 *src2 = (const uint16_t *) _src2; \
1404 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1408 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1409 const uint8_t *_src1, const uint8_t *_src2, \
1410 int width, uint32_t *unused) \
1412 const uint16_t *src1 = (const uint16_t *) _src1, \
1413 *src2 = (const uint16_t *) _src2; \
1414 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1415 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1418 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1419 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1420 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1421 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1423 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1424 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1425 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1427 static av_always_inline void
1428 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1429 int width, enum PixelFormat origin,
1430 int shr, int shg, int shb, int shp,
1431 int maskr, int maskg, int maskb,
1432 int rsh, int gsh, int bsh, int S)
1434 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1435 const unsigned rnd = 33u << (S - 1);
1438 for (i = 0; i < width; i++) {
1439 int px = input_pixel(i) >> shp;
1440 int b = (px & maskb) >> shb;
1441 int g = (px & maskg) >> shg;
1442 int r = (px & maskr) >> shr;
1444 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1448 static av_always_inline void
1449 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1450 const uint8_t *src, int width,
1451 enum PixelFormat origin,
1452 int shr, int shg, int shb, int shp,
1453 int maskr, int maskg, int maskb,
1454 int rsh, int gsh, int bsh, int S)
1456 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1457 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1458 const unsigned rnd = 257u << (S - 1);
1461 for (i = 0; i < width; i++) {
1462 int px = input_pixel(i) >> shp;
1463 int b = (px & maskb) >> shb;
1464 int g = (px & maskg) >> shg;
1465 int r = (px & maskr) >> shr;
1467 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1468 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1472 static av_always_inline void
1473 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1474 const uint8_t *src, int width,
1475 enum PixelFormat origin,
1476 int shr, int shg, int shb, int shp,
1477 int maskr, int maskg, int maskb,
1478 int rsh, int gsh, int bsh, int S)
1480 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1481 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1482 maskgx = ~(maskr | maskb);
1483 const unsigned rnd = 257u << S;
1486 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487 for (i = 0; i < width; i++) {
1488 int px0 = input_pixel(2 * i + 0) >> shp;
1489 int px1 = input_pixel(2 * i + 1) >> shp;
1490 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491 int rb = px0 + px1 - g;
1493 b = (rb & maskb) >> shb;
1494 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1498 g = (g & maskg) >> shg;
1500 r = (rb & maskr) >> shr;
1502 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1509 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510 maskg, maskb, rsh, gsh, bsh, S) \
1511 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512 int width, uint32_t *unused) \
1514 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1518 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519 const uint8_t *src, const uint8_t *dummy, \
1520 int width, uint32_t *unused) \
1522 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1526 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527 const uint8_t *src, const uint8_t *dummy, \
1528 int width, uint32_t *unused) \
1530 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1534 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1535 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1536 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1537 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1538 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1539 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1540 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1541 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1542 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1543 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1544 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1545 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1546 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1547 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1548 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1549 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1551 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1554 for (i=0; i<width; i++) {
1559 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1562 for (i=0; i<width; i++) {
1567 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1570 for (i=0; i<width; i++) {
1573 dst[i]= pal[d] & 0xFF;
1577 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1578 const uint8_t *src1, const uint8_t *src2,
1579 int width, uint32_t *pal)
1582 assert(src1 == src2);
1583 for (i=0; i<width; i++) {
1584 int p= pal[src1[i]];
1591 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1592 int width, uint32_t *unused)
1595 for (i=0; i<width/8; i++) {
1598 dst[8*i+j]= ((d>>(7-j))&1)*255;
1602 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1603 int width, uint32_t *unused)
1606 for (i=0; i<width/8; i++) {
1609 dst[8*i+j]= ((d>>(7-j))&1)*255;
1613 //FIXME yuy2* can read up to 7 samples too much
1615 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1619 for (i=0; i<width; i++)
1623 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1624 const uint8_t *src2, int width, uint32_t *unused)
1627 for (i=0; i<width; i++) {
1628 dstU[i]= src1[4*i + 1];
1629 dstV[i]= src1[4*i + 3];
1631 assert(src1 == src2);
1634 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1637 const uint16_t *src = (const uint16_t *) _src;
1638 uint16_t *dst = (uint16_t *) _dst;
1639 for (i=0; i<width; i++) {
1640 dst[i] = av_bswap16(src[i]);
1644 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1645 const uint8_t *_src2, int width, uint32_t *unused)
1648 const uint16_t *src1 = (const uint16_t *) _src1,
1649 *src2 = (const uint16_t *) _src2;
1650 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1651 for (i=0; i<width; i++) {
1652 dstU[i] = av_bswap16(src1[i]);
1653 dstV[i] = av_bswap16(src2[i]);
1657 /* This is almost identical to the previous, end exists only because
1658 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1659 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1663 for (i=0; i<width; i++)
1667 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1668 const uint8_t *src2, int width, uint32_t *unused)
1671 for (i=0; i<width; i++) {
1672 dstU[i]= src1[4*i + 0];
1673 dstV[i]= src1[4*i + 2];
1675 assert(src1 == src2);
1678 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1679 const uint8_t *src, int width)
1682 for (i = 0; i < width; i++) {
1683 dst1[i] = src[2*i+0];
1684 dst2[i] = src[2*i+1];
1688 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1689 const uint8_t *src1, const uint8_t *src2,
1690 int width, uint32_t *unused)
1692 nvXXtoUV_c(dstU, dstV, src1, width);
1695 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1696 const uint8_t *src1, const uint8_t *src2,
1697 int width, uint32_t *unused)
1699 nvXXtoUV_c(dstV, dstU, src1, width);
1702 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1704 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1705 int width, uint32_t *unused)
1708 for (i=0; i<width; i++) {
1713 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1717 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1718 const uint8_t *src2, int width, uint32_t *unused)
1721 for (i=0; i<width; i++) {
1722 int b= src1[3*i + 0];
1723 int g= src1[3*i + 1];
1724 int r= src1[3*i + 2];
1726 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1727 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1729 assert(src1 == src2);
1732 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1733 const uint8_t *src2, int width, uint32_t *unused)
1736 for (i=0; i<width; i++) {
1737 int b= src1[6*i + 0] + src1[6*i + 3];
1738 int g= src1[6*i + 1] + src1[6*i + 4];
1739 int r= src1[6*i + 2] + src1[6*i + 5];
1741 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1742 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1744 assert(src1 == src2);
1747 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1751 for (i=0; i<width; i++) {
1756 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1760 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1761 const uint8_t *src2, int width, uint32_t *unused)
1765 for (i=0; i<width; i++) {
1766 int r= src1[3*i + 0];
1767 int g= src1[3*i + 1];
1768 int b= src1[3*i + 2];
1770 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1771 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1775 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1776 const uint8_t *src2, int width, uint32_t *unused)
1780 for (i=0; i<width; i++) {
1781 int r= src1[6*i + 0] + src1[6*i + 3];
1782 int g= src1[6*i + 1] + src1[6*i + 4];
1783 int b= src1[6*i + 2] + src1[6*i + 5];
1785 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1786 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1790 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1793 for (i = 0; i < width; i++) {
1798 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1802 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1805 const uint16_t **src = (const uint16_t **) _src;
1806 uint16_t *dst = (uint16_t *) _dst;
1807 for (i = 0; i < width; i++) {
1808 int g = AV_RL16(src[0] + i);
1809 int b = AV_RL16(src[1] + i);
1810 int r = AV_RL16(src[2] + i);
1812 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1816 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1819 const uint16_t **src = (const uint16_t **) _src;
1820 uint16_t *dst = (uint16_t *) _dst;
1821 for (i = 0; i < width; i++) {
1822 int g = AV_RB16(src[0] + i);
1823 int b = AV_RB16(src[1] + i);
1824 int r = AV_RB16(src[2] + i);
1826 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1830 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1833 for (i = 0; i < width; i++) {
1838 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1839 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1843 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1846 const uint16_t **src = (const uint16_t **) _src;
1847 uint16_t *dstU = (uint16_t *) _dstU;
1848 uint16_t *dstV = (uint16_t *) _dstV;
1849 for (i = 0; i < width; i++) {
1850 int g = AV_RL16(src[0] + i);
1851 int b = AV_RL16(src[1] + i);
1852 int r = AV_RL16(src[2] + i);
1854 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1855 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1859 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1862 const uint16_t **src = (const uint16_t **) _src;
1863 uint16_t *dstU = (uint16_t *) _dstU;
1864 uint16_t *dstV = (uint16_t *) _dstV;
1865 for (i = 0; i < width; i++) {
1866 int g = AV_RB16(src[0] + i);
1867 int b = AV_RB16(src[1] + i);
1868 int r = AV_RB16(src[2] + i);
1870 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1871 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1875 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1876 const int16_t *filter,
1877 const int16_t *filterPos, int filterSize)
1880 int32_t *dst = (int32_t *) _dst;
1881 const uint16_t *src = (const uint16_t *) _src;
1882 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1885 for (i = 0; i < dstW; i++) {
1887 int srcPos = filterPos[i];
1890 for (j = 0; j < filterSize; j++) {
1891 val += src[srcPos + j] * filter[filterSize * i + j];
1893 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1894 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1898 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1899 const int16_t *filter,
1900 const int16_t *filterPos, int filterSize)
1903 const uint16_t *src = (const uint16_t *) _src;
1904 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1906 for (i = 0; i < dstW; i++) {
1908 int srcPos = filterPos[i];
1911 for (j = 0; j < filterSize; j++) {
1912 val += src[srcPos + j] * filter[filterSize * i + j];
1914 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1915 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1919 // bilinear / bicubic scaling
1920 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1921 const int16_t *filter, const int16_t *filterPos,
1925 for (i=0; i<dstW; i++) {
1927 int srcPos= filterPos[i];
1929 for (j=0; j<filterSize; j++) {
1930 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1932 //filter += hFilterSize;
1933 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1938 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1939 const int16_t *filter, const int16_t *filterPos,
1943 int32_t *dst = (int32_t *) _dst;
1944 for (i=0; i<dstW; i++) {
1946 int srcPos= filterPos[i];
1948 for (j=0; j<filterSize; j++) {
1949 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1951 //filter += hFilterSize;
1952 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1957 //FIXME all pal and rgb srcFormats could do this convertion as well
1958 //FIXME all scalers more complex than bilinear could do half of this transform
1959 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1962 for (i = 0; i < width; i++) {
1963 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1964 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1967 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1970 for (i = 0; i < width; i++) {
1971 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1972 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1975 static void lumRangeToJpeg_c(int16_t *dst, int width)
1978 for (i = 0; i < width; i++)
1979 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1981 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1984 for (i = 0; i < width; i++)
1985 dst[i] = (dst[i]*14071 + 33561947)>>14;
1988 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1991 int32_t *dstU = (int32_t *) _dstU;
1992 int32_t *dstV = (int32_t *) _dstV;
1993 for (i = 0; i < width; i++) {
1994 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1995 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1998 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2001 int32_t *dstU = (int32_t *) _dstU;
2002 int32_t *dstV = (int32_t *) _dstV;
2003 for (i = 0; i < width; i++) {
2004 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2005 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2008 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2011 int32_t *dst = (int32_t *) _dst;
2012 for (i = 0; i < width; i++)
2013 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2015 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2018 int32_t *dst = (int32_t *) _dst;
2019 for (i = 0; i < width; i++)
2020 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2023 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2024 const uint8_t *src, int srcW, int xInc)
2027 unsigned int xpos=0;
2028 for (i=0;i<dstWidth;i++) {
2029 register unsigned int xx=xpos>>16;
2030 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2031 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2036 // *** horizontal scale Y line to temp buffer
2037 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2038 const uint8_t *src_in[4], int srcW, int xInc,
2039 const int16_t *hLumFilter,
2040 const int16_t *hLumFilterPos, int hLumFilterSize,
2041 uint8_t *formatConvBuffer,
2042 uint32_t *pal, int isAlpha)
2044 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2045 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2046 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2049 toYV12(formatConvBuffer, src, srcW, pal);
2050 src= formatConvBuffer;
2051 } else if (c->readLumPlanar && !isAlpha) {
2052 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2053 src = formatConvBuffer;
2056 if (!c->hyscale_fast) {
2057 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2058 } else { // fast bilinear upscale / crap downscale
2059 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2063 convertRange(dst, dstWidth);
2066 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2067 int dstWidth, const uint8_t *src1,
2068 const uint8_t *src2, int srcW, int xInc)
2071 unsigned int xpos=0;
2072 for (i=0;i<dstWidth;i++) {
2073 register unsigned int xx=xpos>>16;
2074 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2075 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2076 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2081 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2082 const uint8_t *src_in[4],
2083 int srcW, int xInc, const int16_t *hChrFilter,
2084 const int16_t *hChrFilterPos, int hChrFilterSize,
2085 uint8_t *formatConvBuffer, uint32_t *pal)
2087 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2089 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2090 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2091 src1= formatConvBuffer;
2093 } else if (c->readChrPlanar) {
2094 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2095 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2096 src1= formatConvBuffer;
2100 if (!c->hcscale_fast) {
2101 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2102 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2103 } else { // fast bilinear upscale / crap downscale
2104 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2107 if (c->chrConvertRange)
2108 c->chrConvertRange(dst1, dst2, dstWidth);
2111 static av_always_inline void
2112 find_c_packed_planar_out_funcs(SwsContext *c,
2113 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2114 yuv2interleavedX_fn *yuv2nv12cX,
2115 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2116 yuv2packedX_fn *yuv2packedX)
2118 enum PixelFormat dstFormat = c->dstFormat;
2120 if (is16BPS(dstFormat)) {
2121 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2122 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2123 } else if (is9_OR_10BPS(dstFormat)) {
2124 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2125 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2126 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2128 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2129 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2132 *yuv2plane1 = yuv2plane1_8_c;
2133 *yuv2planeX = yuv2planeX_8_c;
2134 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2135 *yuv2nv12cX = yuv2nv12cX_c;
2138 if(c->flags & SWS_FULL_CHR_H_INT) {
2139 switch (dstFormat) {
2142 *yuv2packedX = yuv2rgba32_full_X_c;
2144 #if CONFIG_SWSCALE_ALPHA
2146 *yuv2packedX = yuv2rgba32_full_X_c;
2148 #endif /* CONFIG_SWSCALE_ALPHA */
2150 *yuv2packedX = yuv2rgbx32_full_X_c;
2152 #endif /* !CONFIG_SMALL */
2156 *yuv2packedX = yuv2argb32_full_X_c;
2158 #if CONFIG_SWSCALE_ALPHA
2160 *yuv2packedX = yuv2argb32_full_X_c;
2162 #endif /* CONFIG_SWSCALE_ALPHA */
2164 *yuv2packedX = yuv2xrgb32_full_X_c;
2166 #endif /* !CONFIG_SMALL */
2170 *yuv2packedX = yuv2bgra32_full_X_c;
2172 #if CONFIG_SWSCALE_ALPHA
2174 *yuv2packedX = yuv2bgra32_full_X_c;
2176 #endif /* CONFIG_SWSCALE_ALPHA */
2178 *yuv2packedX = yuv2bgrx32_full_X_c;
2180 #endif /* !CONFIG_SMALL */
2184 *yuv2packedX = yuv2abgr32_full_X_c;
2186 #if CONFIG_SWSCALE_ALPHA
2188 *yuv2packedX = yuv2abgr32_full_X_c;
2190 #endif /* CONFIG_SWSCALE_ALPHA */
2192 *yuv2packedX = yuv2xbgr32_full_X_c;
2194 #endif /* !CONFIG_SMALL */
2197 *yuv2packedX = yuv2rgb24_full_X_c;
2200 *yuv2packedX = yuv2bgr24_full_X_c;
2204 switch (dstFormat) {
2205 case PIX_FMT_RGB48LE:
2206 *yuv2packed1 = yuv2rgb48le_1_c;
2207 *yuv2packed2 = yuv2rgb48le_2_c;
2208 *yuv2packedX = yuv2rgb48le_X_c;
2210 case PIX_FMT_RGB48BE:
2211 *yuv2packed1 = yuv2rgb48be_1_c;
2212 *yuv2packed2 = yuv2rgb48be_2_c;
2213 *yuv2packedX = yuv2rgb48be_X_c;
2215 case PIX_FMT_BGR48LE:
2216 *yuv2packed1 = yuv2bgr48le_1_c;
2217 *yuv2packed2 = yuv2bgr48le_2_c;
2218 *yuv2packedX = yuv2bgr48le_X_c;
2220 case PIX_FMT_BGR48BE:
2221 *yuv2packed1 = yuv2bgr48be_1_c;
2222 *yuv2packed2 = yuv2bgr48be_2_c;
2223 *yuv2packedX = yuv2bgr48be_X_c;
2228 *yuv2packed1 = yuv2rgb32_1_c;
2229 *yuv2packed2 = yuv2rgb32_2_c;
2230 *yuv2packedX = yuv2rgb32_X_c;
2232 #if CONFIG_SWSCALE_ALPHA
2234 *yuv2packed1 = yuv2rgba32_1_c;
2235 *yuv2packed2 = yuv2rgba32_2_c;
2236 *yuv2packedX = yuv2rgba32_X_c;
2238 #endif /* CONFIG_SWSCALE_ALPHA */
2240 *yuv2packed1 = yuv2rgbx32_1_c;
2241 *yuv2packed2 = yuv2rgbx32_2_c;
2242 *yuv2packedX = yuv2rgbx32_X_c;
2244 #endif /* !CONFIG_SMALL */
2246 case PIX_FMT_RGB32_1:
2247 case PIX_FMT_BGR32_1:
2249 *yuv2packed1 = yuv2rgb32_1_1_c;
2250 *yuv2packed2 = yuv2rgb32_1_2_c;
2251 *yuv2packedX = yuv2rgb32_1_X_c;
2253 #if CONFIG_SWSCALE_ALPHA
2255 *yuv2packed1 = yuv2rgba32_1_1_c;
2256 *yuv2packed2 = yuv2rgba32_1_2_c;
2257 *yuv2packedX = yuv2rgba32_1_X_c;
2259 #endif /* CONFIG_SWSCALE_ALPHA */
2261 *yuv2packed1 = yuv2rgbx32_1_1_c;
2262 *yuv2packed2 = yuv2rgbx32_1_2_c;
2263 *yuv2packedX = yuv2rgbx32_1_X_c;
2265 #endif /* !CONFIG_SMALL */
2268 *yuv2packed1 = yuv2rgb24_1_c;
2269 *yuv2packed2 = yuv2rgb24_2_c;
2270 *yuv2packedX = yuv2rgb24_X_c;
2273 *yuv2packed1 = yuv2bgr24_1_c;
2274 *yuv2packed2 = yuv2bgr24_2_c;
2275 *yuv2packedX = yuv2bgr24_X_c;
2277 case PIX_FMT_RGB565LE:
2278 case PIX_FMT_RGB565BE:
2279 case PIX_FMT_BGR565LE:
2280 case PIX_FMT_BGR565BE:
2281 *yuv2packed1 = yuv2rgb16_1_c;
2282 *yuv2packed2 = yuv2rgb16_2_c;
2283 *yuv2packedX = yuv2rgb16_X_c;
2285 case PIX_FMT_RGB555LE:
2286 case PIX_FMT_RGB555BE:
2287 case PIX_FMT_BGR555LE:
2288 case PIX_FMT_BGR555BE:
2289 *yuv2packed1 = yuv2rgb15_1_c;
2290 *yuv2packed2 = yuv2rgb15_2_c;
2291 *yuv2packedX = yuv2rgb15_X_c;
2293 case PIX_FMT_RGB444LE:
2294 case PIX_FMT_RGB444BE:
2295 case PIX_FMT_BGR444LE:
2296 case PIX_FMT_BGR444BE:
2297 *yuv2packed1 = yuv2rgb12_1_c;
2298 *yuv2packed2 = yuv2rgb12_2_c;
2299 *yuv2packedX = yuv2rgb12_X_c;
2303 *yuv2packed1 = yuv2rgb8_1_c;
2304 *yuv2packed2 = yuv2rgb8_2_c;
2305 *yuv2packedX = yuv2rgb8_X_c;
2309 *yuv2packed1 = yuv2rgb4_1_c;
2310 *yuv2packed2 = yuv2rgb4_2_c;
2311 *yuv2packedX = yuv2rgb4_X_c;
2313 case PIX_FMT_RGB4_BYTE:
2314 case PIX_FMT_BGR4_BYTE:
2315 *yuv2packed1 = yuv2rgb4b_1_c;
2316 *yuv2packed2 = yuv2rgb4b_2_c;
2317 *yuv2packedX = yuv2rgb4b_X_c;
2321 switch (dstFormat) {
2322 case PIX_FMT_GRAY16BE:
2323 *yuv2packed1 = yuv2gray16BE_1_c;
2324 *yuv2packed2 = yuv2gray16BE_2_c;
2325 *yuv2packedX = yuv2gray16BE_X_c;
2327 case PIX_FMT_GRAY16LE:
2328 *yuv2packed1 = yuv2gray16LE_1_c;
2329 *yuv2packed2 = yuv2gray16LE_2_c;
2330 *yuv2packedX = yuv2gray16LE_X_c;
2332 case PIX_FMT_MONOWHITE:
2333 *yuv2packed1 = yuv2monowhite_1_c;
2334 *yuv2packed2 = yuv2monowhite_2_c;
2335 *yuv2packedX = yuv2monowhite_X_c;
2337 case PIX_FMT_MONOBLACK:
2338 *yuv2packed1 = yuv2monoblack_1_c;
2339 *yuv2packed2 = yuv2monoblack_2_c;
2340 *yuv2packedX = yuv2monoblack_X_c;
2342 case PIX_FMT_YUYV422:
2343 *yuv2packed1 = yuv2yuyv422_1_c;
2344 *yuv2packed2 = yuv2yuyv422_2_c;
2345 *yuv2packedX = yuv2yuyv422_X_c;
2347 case PIX_FMT_UYVY422:
2348 *yuv2packed1 = yuv2uyvy422_1_c;
2349 *yuv2packed2 = yuv2uyvy422_2_c;
2350 *yuv2packedX = yuv2uyvy422_X_c;
2355 #define DEBUG_SWSCALE_BUFFERS 0
2356 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2358 static int swScale(SwsContext *c, const uint8_t* src[],
2359 int srcStride[], int srcSliceY,
2360 int srcSliceH, uint8_t* dst[], int dstStride[])
2362 /* load a few things into local vars to make the code more readable? and faster */
2363 const int srcW= c->srcW;
2364 const int dstW= c->dstW;
2365 const int dstH= c->dstH;
2366 const int chrDstW= c->chrDstW;
2367 const int chrSrcW= c->chrSrcW;
2368 const int lumXInc= c->lumXInc;
2369 const int chrXInc= c->chrXInc;
2370 const enum PixelFormat dstFormat= c->dstFormat;
2371 const int flags= c->flags;
2372 int16_t *vLumFilterPos= c->vLumFilterPos;
2373 int16_t *vChrFilterPos= c->vChrFilterPos;
2374 int16_t *hLumFilterPos= c->hLumFilterPos;
2375 int16_t *hChrFilterPos= c->hChrFilterPos;
2376 int16_t *vLumFilter= c->vLumFilter;
2377 int16_t *vChrFilter= c->vChrFilter;
2378 int16_t *hLumFilter= c->hLumFilter;
2379 int16_t *hChrFilter= c->hChrFilter;
2380 int32_t *lumMmxFilter= c->lumMmxFilter;
2381 int32_t *chrMmxFilter= c->chrMmxFilter;
2382 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2383 const int vLumFilterSize= c->vLumFilterSize;
2384 const int vChrFilterSize= c->vChrFilterSize;
2385 const int hLumFilterSize= c->hLumFilterSize;
2386 const int hChrFilterSize= c->hChrFilterSize;
2387 int16_t **lumPixBuf= c->lumPixBuf;
2388 int16_t **chrUPixBuf= c->chrUPixBuf;
2389 int16_t **chrVPixBuf= c->chrVPixBuf;
2390 int16_t **alpPixBuf= c->alpPixBuf;
2391 const int vLumBufSize= c->vLumBufSize;
2392 const int vChrBufSize= c->vChrBufSize;
2393 uint8_t *formatConvBuffer= c->formatConvBuffer;
2394 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2395 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2397 uint32_t *pal=c->pal_yuv;
2398 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2399 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2400 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2401 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2402 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2403 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2404 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2406 /* vars which will change and which we need to store back in the context */
2408 int lumBufIndex= c->lumBufIndex;
2409 int chrBufIndex= c->chrBufIndex;
2410 int lastInLumBuf= c->lastInLumBuf;
2411 int lastInChrBuf= c->lastInChrBuf;
2413 if (isPacked(c->srcFormat)) {
2421 srcStride[3]= srcStride[0];
2423 srcStride[1]<<= c->vChrDrop;
2424 srcStride[2]<<= c->vChrDrop;
2426 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2427 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2428 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2429 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2430 srcSliceY, srcSliceH, dstY, dstH);
2431 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2432 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2434 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2435 static int warnedAlready=0; //FIXME move this into the context perhaps
2436 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2437 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2438 " ->cannot do aligned memory accesses anymore\n");
2443 /* Note the user might start scaling the picture in the middle so this
2444 will not get executed. This is not really intended but works
2445 currently, so people might do it. */
2446 if (srcSliceY ==0) {
2454 if (!should_dither) {
2455 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2459 for (;dstY < dstH; dstY++) {
2460 const int chrDstY= dstY>>c->chrDstVSubSample;
2461 uint8_t *dest[4] = {
2462 dst[0] + dstStride[0] * dstY,
2463 dst[1] + dstStride[1] * chrDstY,
2464 dst[2] + dstStride[2] * chrDstY,
2465 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2468 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2469 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2470 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2472 // Last line needed as input
2473 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
2474 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
2475 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
2478 //handle holes (FAST_BILINEAR & weird filters)
2479 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2480 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2481 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2482 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2484 DEBUG_BUFFERS("dstY: %d\n", dstY);
2485 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2486 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2487 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2488 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2490 // Do we have enough lines in this slice to output the dstY line
2491 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2493 if (!enough_lines) {
2494 lastLumSrcY = srcSliceY + srcSliceH - 1;
2495 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2496 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2497 lastLumSrcY, lastChrSrcY);
2500 //Do horizontal scaling
2501 while(lastInLumBuf < lastLumSrcY) {
2502 const uint8_t *src1[4] = {
2503 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2504 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2505 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2506 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2509 assert(lumBufIndex < 2*vLumBufSize);
2510 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2511 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2512 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2513 hLumFilter, hLumFilterPos, hLumFilterSize,
2516 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2517 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2518 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2522 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2523 lumBufIndex, lastInLumBuf);
2525 while(lastInChrBuf < lastChrSrcY) {
2526 const uint8_t *src1[4] = {
2527 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2528 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2529 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2530 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2533 assert(chrBufIndex < 2*vChrBufSize);
2534 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2535 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2536 //FIXME replace parameters through context struct (some at least)
2538 if (c->needs_hcscale)
2539 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2540 chrDstW, src1, chrSrcW, chrXInc,
2541 hChrFilter, hChrFilterPos, hChrFilterSize,
2542 formatConvBuffer, pal);
2544 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2545 chrBufIndex, lastInChrBuf);
2547 //wrap buf index around to stay inside the ring buffer
2548 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2549 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2551 break; //we can't output a dstY line so let's try with the next slice
2554 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2556 if (should_dither) {
2557 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2558 c->lumDither8 = dither_8x8_128[dstY & 7];
2560 if (dstY >= dstH-2) {
2561 // hmm looks like we can't use MMX here without overwriting this array's tail
2562 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2563 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2567 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2568 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2569 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2570 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2572 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2573 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2574 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2575 for (i = 0; i < neg; i++)
2576 tmpY[i] = lumSrcPtr[neg];
2577 for ( ; i < end; i++)
2578 tmpY[i] = lumSrcPtr[i];
2579 for ( ; i < vLumFilterSize; i++)
2580 tmpY[i] = tmpY[i-1];
2584 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2585 for (i = 0; i < neg; i++)
2586 tmpA[i] = alpSrcPtr[neg];
2587 for ( ; i < end; i++)
2588 tmpA[i] = alpSrcPtr[i];
2589 for ( ; i < vLumFilterSize; i++)
2590 tmpA[i] = tmpA[i - 1];
2594 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2595 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2596 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2597 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2598 for (i = 0; i < neg; i++) {
2599 tmpU[i] = chrUSrcPtr[neg];
2600 tmpV[i] = chrVSrcPtr[neg];
2602 for ( ; i < end; i++) {
2603 tmpU[i] = chrUSrcPtr[i];
2604 tmpV[i] = chrVSrcPtr[i];
2606 for ( ; i < vChrFilterSize; i++) {
2607 tmpU[i] = tmpU[i - 1];
2608 tmpV[i] = tmpV[i - 1];
2614 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2615 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2617 if (vLumFilterSize == 1) {
2618 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2620 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2621 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2624 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2626 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2627 } else if (vChrFilterSize == 1) {
2628 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2629 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2631 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2632 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2633 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2634 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2638 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2639 if (vLumFilterSize == 1) {
2640 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2642 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2643 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2647 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2648 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2649 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2650 int chrAlpha = vChrFilter[2 * dstY + 1];
2651 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2652 alpPixBuf ? *alpSrcPtr : NULL,
2653 dest[0], dstW, chrAlpha, dstY);
2654 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2655 int lumAlpha = vLumFilter[2 * dstY + 1];
2656 int chrAlpha = vChrFilter[2 * dstY + 1];
2658 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2660 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2661 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2662 alpPixBuf ? alpSrcPtr : NULL,
2663 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2664 } else { //general RGB
2665 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2666 lumSrcPtr, vLumFilterSize,
2667 vChrFilter + dstY * vChrFilterSize,
2668 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2669 alpSrcPtr, dest[0], dstW, dstY);
2675 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
2676 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2679 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2680 __asm__ volatile("sfence":::"memory");
2684 /* store changed local vars back in the context */
2686 c->lumBufIndex= lumBufIndex;
2687 c->chrBufIndex= chrBufIndex;
2688 c->lastInLumBuf= lastInLumBuf;
2689 c->lastInChrBuf= lastInChrBuf;
2691 return dstY - lastDstY;
2694 static av_cold void sws_init_swScale_c(SwsContext *c)
2696 enum PixelFormat srcFormat = c->srcFormat;
2698 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2699 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2702 c->chrToYV12 = NULL;
2704 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2705 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2706 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2707 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2711 case PIX_FMT_BGR4_BYTE:
2712 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2713 case PIX_FMT_GBRP9LE:
2714 case PIX_FMT_GBRP10LE:
2715 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2716 case PIX_FMT_GBRP9BE:
2717 case PIX_FMT_GBRP10BE:
2718 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2719 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2721 case PIX_FMT_YUV444P9LE:
2722 case PIX_FMT_YUV422P9LE:
2723 case PIX_FMT_YUV420P9LE:
2724 case PIX_FMT_YUV422P10LE:
2725 case PIX_FMT_YUV444P10LE:
2726 case PIX_FMT_YUV420P10LE:
2727 case PIX_FMT_YUV420P16LE:
2728 case PIX_FMT_YUV422P16LE:
2729 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2731 case PIX_FMT_YUV444P9BE:
2732 case PIX_FMT_YUV422P9BE:
2733 case PIX_FMT_YUV420P9BE:
2734 case PIX_FMT_YUV444P10BE:
2735 case PIX_FMT_YUV422P10BE:
2736 case PIX_FMT_YUV420P10BE:
2737 case PIX_FMT_YUV420P16BE:
2738 case PIX_FMT_YUV422P16BE:
2739 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2742 if (c->chrSrcHSubSample) {
2744 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2745 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2746 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2747 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2748 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2749 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2750 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2751 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2752 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2753 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2754 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2755 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
2756 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
2757 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2758 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2759 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2760 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2761 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2762 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2763 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2764 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
2765 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
2769 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2770 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2771 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2772 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2773 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2774 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2775 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2776 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2777 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2778 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2779 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2780 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
2781 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
2782 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2783 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2784 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2785 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2786 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2787 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2788 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2789 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
2790 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
2794 c->lumToYV12 = NULL;
2795 c->alpToYV12 = NULL;
2796 switch (srcFormat) {
2797 case PIX_FMT_GBRP9LE:
2798 case PIX_FMT_GBRP10LE:
2799 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2800 case PIX_FMT_GBRP9BE:
2801 case PIX_FMT_GBRP10BE:
2802 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2803 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2805 case PIX_FMT_YUV444P9LE:
2806 case PIX_FMT_YUV422P9LE:
2807 case PIX_FMT_YUV420P9LE:
2808 case PIX_FMT_YUV444P10LE:
2809 case PIX_FMT_YUV422P10LE:
2810 case PIX_FMT_YUV420P10LE:
2811 case PIX_FMT_YUV420P16LE:
2812 case PIX_FMT_YUV422P16LE:
2813 case PIX_FMT_YUV444P16LE:
2814 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2816 case PIX_FMT_YUV444P9BE:
2817 case PIX_FMT_YUV422P9BE:
2818 case PIX_FMT_YUV420P9BE:
2819 case PIX_FMT_YUV444P10BE:
2820 case PIX_FMT_YUV422P10BE:
2821 case PIX_FMT_YUV420P10BE:
2822 case PIX_FMT_YUV420P16BE:
2823 case PIX_FMT_YUV422P16BE:
2824 case PIX_FMT_YUV444P16BE:
2825 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2827 case PIX_FMT_YUYV422 :
2828 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2829 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2830 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2831 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2832 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2833 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2834 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2835 case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
2836 case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
2837 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2838 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2839 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2840 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2841 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2842 case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
2843 case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
2847 case PIX_FMT_BGR4_BYTE:
2848 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2849 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2850 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2851 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2852 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2853 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2854 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2855 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2856 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2857 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2858 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2861 switch (srcFormat) {
2863 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2865 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2866 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2870 if (c->srcBpc == 8) {
2871 if (c->dstBpc <= 10) {
2872 c->hyScale = c->hcScale = hScale8To15_c;
2873 if (c->flags & SWS_FAST_BILINEAR) {
2874 c->hyscale_fast = hyscale_fast_c;
2875 c->hcscale_fast = hcscale_fast_c;
2878 c->hyScale = c->hcScale = hScale8To19_c;
2881 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2884 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2885 if (c->dstBpc <= 10) {
2887 c->lumConvertRange = lumRangeFromJpeg_c;
2888 c->chrConvertRange = chrRangeFromJpeg_c;
2890 c->lumConvertRange = lumRangeToJpeg_c;
2891 c->chrConvertRange = chrRangeToJpeg_c;
2895 c->lumConvertRange = lumRangeFromJpeg16_c;
2896 c->chrConvertRange = chrRangeFromJpeg16_c;
2898 c->lumConvertRange = lumRangeToJpeg16_c;
2899 c->chrConvertRange = chrRangeToJpeg16_c;
2904 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2905 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2906 c->needs_hcscale = 1;
2909 SwsFunc ff_getSwsFunc(SwsContext *c)
2911 sws_init_swScale_c(c);
2914 ff_sws_init_swScale_mmx(c);
2916 ff_sws_init_swScale_altivec(c);