2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 #define output_pixel(pos, val, bias, signedness) \
200 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
202 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
205 static av_always_inline void
206 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
207 int big_endian, int output_bits)
210 int shift = 19 - output_bits;
212 for (i = 0; i < dstW; i++) {
213 int val = src[i] + (1 << (shift - 1));
214 output_pixel(&dest[i], val, 0, uint);
218 static av_always_inline void
219 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
220 const int32_t **src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 + 16 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits);
230 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
231 * filters (or anything with negative coeffs, the range can be slightly
232 * wider in both directions. To account for this overflow, we subtract
233 * a constant so it always fits in the signed range (assuming a
234 * reasonable filterSize), and re-add that at the end. */
236 for (j = 0; j < filterSize; j++)
237 val += src[j][i] * filter[j];
239 output_pixel(&dest[i], val, 0x8000, int);
245 #define output_pixel(pos, val) \
247 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
249 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
252 static av_always_inline void
253 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
254 int big_endian, int output_bits)
257 int shift = 15 - output_bits;
259 for (i = 0; i < dstW; i++) {
260 int val = src[i] + (1 << (shift - 1));
261 output_pixel(&dest[i], val);
265 static av_always_inline void
266 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
267 const int16_t **src, uint16_t *dest, int dstW,
268 int big_endian, int output_bits)
271 int shift = 11 + 16 - output_bits;
273 for (i = 0; i < dstW; i++) {
274 int val = 1 << (26-output_bits);
277 for (j = 0; j < filterSize; j++)
278 val += src[j][i] * filter[j];
280 output_pixel(&dest[i], val);
286 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
287 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
288 uint8_t *dest, int dstW, \
289 const uint8_t *dither, int offset)\
291 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
292 (uint16_t *) dest, dstW, is_be, bits); \
294 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
295 const int16_t **src, uint8_t *dest, int dstW, \
296 const uint8_t *dither, int offset)\
298 yuv2planeX_## template_size ## _c_template(filter, \
299 filterSize, (const typeX_t **) src, \
300 (uint16_t *) dest, dstW, is_be, bits); \
302 yuv2NBPS( 9, BE, 1, 10, int16_t)
303 yuv2NBPS( 9, LE, 0, 10, int16_t)
304 yuv2NBPS(10, BE, 1, 10, int16_t)
305 yuv2NBPS(10, LE, 0, 10, int16_t)
306 yuv2NBPS(16, BE, 1, 16, int32_t)
307 yuv2NBPS(16, LE, 0, 16, int32_t)
309 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
310 const int16_t **src, uint8_t *dest, int dstW,
311 const uint8_t *dither, int offset)
314 for (i=0; i<dstW; i++) {
315 int val = dither[(i + offset) & 7] << 12;
317 for (j=0; j<filterSize; j++)
318 val += src[j][i] * filter[j];
320 dest[i]= av_clip_uint8(val>>19);
324 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
325 const uint8_t *dither, int offset)
328 for (i=0; i<dstW; i++) {
329 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
330 dest[i]= av_clip_uint8(val);
334 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
335 const int16_t **chrUSrc, const int16_t **chrVSrc,
336 uint8_t *dest, int chrDstW)
338 enum PixelFormat dstFormat = c->dstFormat;
339 const uint8_t *chrDither = c->chrDither8;
342 if (dstFormat == PIX_FMT_NV12)
343 for (i=0; i<chrDstW; i++) {
344 int u = chrDither[i & 7] << 12;
345 int v = chrDither[(i + 3) & 7] << 12;
347 for (j=0; j<chrFilterSize; j++) {
348 u += chrUSrc[j][i] * chrFilter[j];
349 v += chrVSrc[j][i] * chrFilter[j];
352 dest[2*i]= av_clip_uint8(u>>19);
353 dest[2*i+1]= av_clip_uint8(v>>19);
356 for (i=0; i<chrDstW; i++) {
357 int u = chrDither[i & 7] << 12;
358 int v = chrDither[(i + 3) & 7] << 12;
360 for (j=0; j<chrFilterSize; j++) {
361 u += chrUSrc[j][i] * chrFilter[j];
362 v += chrVSrc[j][i] * chrFilter[j];
365 dest[2*i]= av_clip_uint8(v>>19);
366 dest[2*i+1]= av_clip_uint8(u>>19);
370 #define output_pixel(pos, val) \
371 if (target == PIX_FMT_GRAY16BE) { \
377 static av_always_inline void
378 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
379 const int32_t **lumSrc, int lumFilterSize,
380 const int16_t *chrFilter, const int32_t **chrUSrc,
381 const int32_t **chrVSrc, int chrFilterSize,
382 const int32_t **alpSrc, uint16_t *dest, int dstW,
383 int y, enum PixelFormat target)
387 for (i = 0; i < (dstW >> 1); i++) {
392 for (j = 0; j < lumFilterSize; j++) {
393 Y1 += lumSrc[j][i * 2] * lumFilter[j];
394 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
398 if ((Y1 | Y2) & 0x10000) {
399 Y1 = av_clip_uint16(Y1);
400 Y2 = av_clip_uint16(Y2);
402 output_pixel(&dest[i * 2 + 0], Y1);
403 output_pixel(&dest[i * 2 + 1], Y2);
407 static av_always_inline void
408 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
409 const int32_t *ubuf[2], const int32_t *vbuf[2],
410 const int32_t *abuf[2], uint16_t *dest, int dstW,
411 int yalpha, int uvalpha, int y,
412 enum PixelFormat target)
414 int yalpha1 = 4095 - yalpha;
416 const int32_t *buf0 = buf[0], *buf1 = buf[1];
418 for (i = 0; i < (dstW >> 1); i++) {
419 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
420 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
422 output_pixel(&dest[i * 2 + 0], Y1);
423 output_pixel(&dest[i * 2 + 1], Y2);
427 static av_always_inline void
428 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
429 const int32_t *ubuf[2], const int32_t *vbuf[2],
430 const int32_t *abuf0, uint16_t *dest, int dstW,
431 int uvalpha, int y, enum PixelFormat target)
435 for (i = 0; i < (dstW >> 1); i++) {
436 int Y1 = buf0[i * 2 ] << 1;
437 int Y2 = buf0[i * 2 + 1] << 1;
439 output_pixel(&dest[i * 2 + 0], Y1);
440 output_pixel(&dest[i * 2 + 1], Y2);
446 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
447 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
448 const int16_t **_lumSrc, int lumFilterSize, \
449 const int16_t *chrFilter, const int16_t **_chrUSrc, \
450 const int16_t **_chrVSrc, int chrFilterSize, \
451 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
454 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
455 **chrUSrc = (const int32_t **) _chrUSrc, \
456 **chrVSrc = (const int32_t **) _chrVSrc, \
457 **alpSrc = (const int32_t **) _alpSrc; \
458 uint16_t *dest = (uint16_t *) _dest; \
459 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
460 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
461 alpSrc, dest, dstW, y, fmt); \
464 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
465 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
466 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
467 int yalpha, int uvalpha, int y) \
469 const int32_t **buf = (const int32_t **) _buf, \
470 **ubuf = (const int32_t **) _ubuf, \
471 **vbuf = (const int32_t **) _vbuf, \
472 **abuf = (const int32_t **) _abuf; \
473 uint16_t *dest = (uint16_t *) _dest; \
474 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
475 dest, dstW, yalpha, uvalpha, y, fmt); \
478 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
479 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
480 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
481 int uvalpha, int y) \
483 const int32_t *buf0 = (const int32_t *) _buf0, \
484 **ubuf = (const int32_t **) _ubuf, \
485 **vbuf = (const int32_t **) _vbuf, \
486 *abuf0 = (const int32_t *) _abuf0; \
487 uint16_t *dest = (uint16_t *) _dest; \
488 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
489 dstW, uvalpha, y, fmt); \
492 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
493 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
495 #define output_pixel(pos, acc) \
496 if (target == PIX_FMT_MONOBLACK) { \
502 static av_always_inline void
503 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
504 const int16_t **lumSrc, int lumFilterSize,
505 const int16_t *chrFilter, const int16_t **chrUSrc,
506 const int16_t **chrVSrc, int chrFilterSize,
507 const int16_t **alpSrc, uint8_t *dest, int dstW,
508 int y, enum PixelFormat target)
510 const uint8_t * const d128=dither_8x8_220[y&7];
511 uint8_t *g = c->table_gU[128] + c->table_gV[128];
515 for (i = 0; i < dstW - 1; i += 2) {
520 for (j = 0; j < lumFilterSize; j++) {
521 Y1 += lumSrc[j][i] * lumFilter[j];
522 Y2 += lumSrc[j][i+1] * lumFilter[j];
526 if ((Y1 | Y2) & 0x100) {
527 Y1 = av_clip_uint8(Y1);
528 Y2 = av_clip_uint8(Y2);
530 acc += acc + g[Y1 + d128[(i + 0) & 7]];
531 acc += acc + g[Y2 + d128[(i + 1) & 7]];
533 output_pixel(*dest++, acc);
538 static av_always_inline void
539 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
540 const int16_t *ubuf[2], const int16_t *vbuf[2],
541 const int16_t *abuf[2], uint8_t *dest, int dstW,
542 int yalpha, int uvalpha, int y,
543 enum PixelFormat target)
545 const int16_t *buf0 = buf[0], *buf1 = buf[1];
546 const uint8_t * const d128 = dither_8x8_220[y & 7];
547 uint8_t *g = c->table_gU[128] + c->table_gV[128];
548 int yalpha1 = 4095 - yalpha;
551 for (i = 0; i < dstW - 7; i += 8) {
552 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
553 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
554 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
555 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
556 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
557 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
558 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
559 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
560 output_pixel(*dest++, acc);
564 static av_always_inline void
565 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
566 const int16_t *ubuf[2], const int16_t *vbuf[2],
567 const int16_t *abuf0, uint8_t *dest, int dstW,
568 int uvalpha, int y, enum PixelFormat target)
570 const uint8_t * const d128 = dither_8x8_220[y & 7];
571 uint8_t *g = c->table_gU[128] + c->table_gV[128];
574 for (i = 0; i < dstW - 7; i += 8) {
575 int acc = g[(buf0[i ] >> 7) + d128[0]];
576 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
577 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
578 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
579 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
580 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
581 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
582 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
583 output_pixel(*dest++, acc);
589 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
590 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
591 const int16_t **lumSrc, int lumFilterSize, \
592 const int16_t *chrFilter, const int16_t **chrUSrc, \
593 const int16_t **chrVSrc, int chrFilterSize, \
594 const int16_t **alpSrc, uint8_t *dest, int dstW, \
597 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
598 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
599 alpSrc, dest, dstW, y, fmt); \
602 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
603 const int16_t *ubuf[2], const int16_t *vbuf[2], \
604 const int16_t *abuf[2], uint8_t *dest, int dstW, \
605 int yalpha, int uvalpha, int y) \
607 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
608 dest, dstW, yalpha, uvalpha, y, fmt); \
611 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
612 const int16_t *ubuf[2], const int16_t *vbuf[2], \
613 const int16_t *abuf0, uint8_t *dest, int dstW, \
614 int uvalpha, int y) \
616 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
617 abuf0, dest, dstW, uvalpha, \
621 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
622 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
624 #define output_pixels(pos, Y1, U, Y2, V) \
625 if (target == PIX_FMT_YUYV422) { \
626 dest[pos + 0] = Y1; \
628 dest[pos + 2] = Y2; \
632 dest[pos + 1] = Y1; \
634 dest[pos + 3] = Y2; \
637 static av_always_inline void
638 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
639 const int16_t **lumSrc, int lumFilterSize,
640 const int16_t *chrFilter, const int16_t **chrUSrc,
641 const int16_t **chrVSrc, int chrFilterSize,
642 const int16_t **alpSrc, uint8_t *dest, int dstW,
643 int y, enum PixelFormat target)
647 for (i = 0; i < (dstW >> 1); i++) {
654 for (j = 0; j < lumFilterSize; j++) {
655 Y1 += lumSrc[j][i * 2] * lumFilter[j];
656 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
658 for (j = 0; j < chrFilterSize; j++) {
659 U += chrUSrc[j][i] * chrFilter[j];
660 V += chrVSrc[j][i] * chrFilter[j];
666 if ((Y1 | Y2 | U | V) & 0x100) {
667 Y1 = av_clip_uint8(Y1);
668 Y2 = av_clip_uint8(Y2);
669 U = av_clip_uint8(U);
670 V = av_clip_uint8(V);
672 output_pixels(4*i, Y1, U, Y2, V);
676 static av_always_inline void
677 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
678 const int16_t *ubuf[2], const int16_t *vbuf[2],
679 const int16_t *abuf[2], uint8_t *dest, int dstW,
680 int yalpha, int uvalpha, int y,
681 enum PixelFormat target)
683 const int16_t *buf0 = buf[0], *buf1 = buf[1],
684 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
685 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
686 int yalpha1 = 4095 - yalpha;
687 int uvalpha1 = 4095 - uvalpha;
690 for (i = 0; i < (dstW >> 1); i++) {
691 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
692 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
693 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
694 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
696 output_pixels(i * 4, Y1, U, Y2, V);
700 static av_always_inline void
701 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
702 const int16_t *ubuf[2], const int16_t *vbuf[2],
703 const int16_t *abuf0, uint8_t *dest, int dstW,
704 int uvalpha, int y, enum PixelFormat target)
706 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
707 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
710 if (uvalpha < 2048) {
711 for (i = 0; i < (dstW >> 1); i++) {
712 int Y1 = buf0[i * 2] >> 7;
713 int Y2 = buf0[i * 2 + 1] >> 7;
714 int U = ubuf1[i] >> 7;
715 int V = vbuf1[i] >> 7;
717 output_pixels(i * 4, Y1, U, Y2, V);
720 for (i = 0; i < (dstW >> 1); i++) {
721 int Y1 = buf0[i * 2] >> 7;
722 int Y2 = buf0[i * 2 + 1] >> 7;
723 int U = (ubuf0[i] + ubuf1[i]) >> 8;
724 int V = (vbuf0[i] + vbuf1[i]) >> 8;
726 output_pixels(i * 4, Y1, U, Y2, V);
733 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
734 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
736 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
737 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
738 #define output_pixel(pos, val) \
739 if (isBE(target)) { \
745 static av_always_inline void
746 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
747 const int32_t **lumSrc, int lumFilterSize,
748 const int16_t *chrFilter, const int32_t **chrUSrc,
749 const int32_t **chrVSrc, int chrFilterSize,
750 const int32_t **alpSrc, uint16_t *dest, int dstW,
751 int y, enum PixelFormat target)
755 for (i = 0; i < (dstW >> 1); i++) {
759 int U = -128 << 23; // 19
763 for (j = 0; j < lumFilterSize; j++) {
764 Y1 += lumSrc[j][i * 2] * lumFilter[j];
765 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
767 for (j = 0; j < chrFilterSize; j++) {
768 U += chrUSrc[j][i] * chrFilter[j];
769 V += chrVSrc[j][i] * chrFilter[j];
772 // 8bit: 12+15=27; 16-bit: 12+19=31
778 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
779 Y1 -= c->yuv2rgb_y_offset;
780 Y2 -= c->yuv2rgb_y_offset;
781 Y1 *= c->yuv2rgb_y_coeff;
782 Y2 *= c->yuv2rgb_y_coeff;
785 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
787 R = V * c->yuv2rgb_v2r_coeff;
788 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
789 B = U * c->yuv2rgb_u2b_coeff;
791 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
792 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
793 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
794 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
795 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
796 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
797 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
802 static av_always_inline void
803 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
804 const int32_t *ubuf[2], const int32_t *vbuf[2],
805 const int32_t *abuf[2], uint16_t *dest, int dstW,
806 int yalpha, int uvalpha, int y,
807 enum PixelFormat target)
809 const int32_t *buf0 = buf[0], *buf1 = buf[1],
810 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
811 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 int yalpha1 = 4095 - yalpha;
813 int uvalpha1 = 4095 - uvalpha;
816 for (i = 0; i < (dstW >> 1); i++) {
817 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
818 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
819 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
820 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
823 Y1 -= c->yuv2rgb_y_offset;
824 Y2 -= c->yuv2rgb_y_offset;
825 Y1 *= c->yuv2rgb_y_coeff;
826 Y2 *= c->yuv2rgb_y_coeff;
830 R = V * c->yuv2rgb_v2r_coeff;
831 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
832 B = U * c->yuv2rgb_u2b_coeff;
834 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
835 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
836 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
837 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
838 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
839 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
844 static av_always_inline void
845 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
846 const int32_t *ubuf[2], const int32_t *vbuf[2],
847 const int32_t *abuf0, uint16_t *dest, int dstW,
848 int uvalpha, int y, enum PixelFormat target)
850 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
851 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
854 if (uvalpha < 2048) {
855 for (i = 0; i < (dstW >> 1); i++) {
856 int Y1 = (buf0[i * 2] ) >> 2;
857 int Y2 = (buf0[i * 2 + 1]) >> 2;
858 int U = (ubuf0[i] + (-128 << 11)) >> 2;
859 int V = (vbuf0[i] + (-128 << 11)) >> 2;
862 Y1 -= c->yuv2rgb_y_offset;
863 Y2 -= c->yuv2rgb_y_offset;
864 Y1 *= c->yuv2rgb_y_coeff;
865 Y2 *= c->yuv2rgb_y_coeff;
869 R = V * c->yuv2rgb_v2r_coeff;
870 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
871 B = U * c->yuv2rgb_u2b_coeff;
873 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
874 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
875 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
876 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
877 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
878 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 for (i = 0; i < (dstW >> 1); i++) {
883 int Y1 = (buf0[i * 2] ) >> 2;
884 int Y2 = (buf0[i * 2 + 1]) >> 2;
885 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
886 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
889 Y1 -= c->yuv2rgb_y_offset;
890 Y2 -= c->yuv2rgb_y_offset;
891 Y1 *= c->yuv2rgb_y_coeff;
892 Y2 *= c->yuv2rgb_y_coeff;
896 R = V * c->yuv2rgb_v2r_coeff;
897 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
898 B = U * c->yuv2rgb_u2b_coeff;
900 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
901 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
902 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
903 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
904 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
905 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
915 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
916 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
917 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
918 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
920 static av_always_inline void
921 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
922 int U, int V, int A1, int A2,
923 const void *_r, const void *_g, const void *_b, int y,
924 enum PixelFormat target, int hasAlpha)
926 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
927 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
928 uint32_t *dest = (uint32_t *) _dest;
929 const uint32_t *r = (const uint32_t *) _r;
930 const uint32_t *g = (const uint32_t *) _g;
931 const uint32_t *b = (const uint32_t *) _b;
934 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
936 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
937 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
940 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
942 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
943 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
945 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
946 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
949 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
950 uint8_t *dest = (uint8_t *) _dest;
951 const uint8_t *r = (const uint8_t *) _r;
952 const uint8_t *g = (const uint8_t *) _g;
953 const uint8_t *b = (const uint8_t *) _b;
955 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
956 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
957 dest[i * 6 + 0] = r_b[Y1];
958 dest[i * 6 + 1] = g[Y1];
959 dest[i * 6 + 2] = b_r[Y1];
960 dest[i * 6 + 3] = r_b[Y2];
961 dest[i * 6 + 4] = g[Y2];
962 dest[i * 6 + 5] = b_r[Y2];
965 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
966 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
967 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
968 uint16_t *dest = (uint16_t *) _dest;
969 const uint16_t *r = (const uint16_t *) _r;
970 const uint16_t *g = (const uint16_t *) _g;
971 const uint16_t *b = (const uint16_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
974 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
975 dr1 = dither_2x2_8[ y & 1 ][0];
976 dg1 = dither_2x2_4[ y & 1 ][0];
977 db1 = dither_2x2_8[(y & 1) ^ 1][0];
978 dr2 = dither_2x2_8[ y & 1 ][1];
979 dg2 = dither_2x2_4[ y & 1 ][1];
980 db2 = dither_2x2_8[(y & 1) ^ 1][1];
981 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
982 dr1 = dither_2x2_8[ y & 1 ][0];
983 dg1 = dither_2x2_8[ y & 1 ][1];
984 db1 = dither_2x2_8[(y & 1) ^ 1][0];
985 dr2 = dither_2x2_8[ y & 1 ][1];
986 dg2 = dither_2x2_8[ y & 1 ][0];
987 db2 = dither_2x2_8[(y & 1) ^ 1][1];
989 dr1 = dither_4x4_16[ y & 3 ][0];
990 dg1 = dither_4x4_16[ y & 3 ][1];
991 db1 = dither_4x4_16[(y & 3) ^ 3][0];
992 dr2 = dither_4x4_16[ y & 3 ][1];
993 dg2 = dither_4x4_16[ y & 3 ][0];
994 db2 = dither_4x4_16[(y & 3) ^ 3][1];
997 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
998 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
999 } else /* 8/4-bit */ {
1000 uint8_t *dest = (uint8_t *) _dest;
1001 const uint8_t *r = (const uint8_t *) _r;
1002 const uint8_t *g = (const uint8_t *) _g;
1003 const uint8_t *b = (const uint8_t *) _b;
1004 int dr1, dg1, db1, dr2, dg2, db2;
1006 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1007 const uint8_t * const d64 = dither_8x8_73[y & 7];
1008 const uint8_t * const d32 = dither_8x8_32[y & 7];
1009 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1010 db1 = d64[(i * 2 + 0) & 7];
1011 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1012 db2 = d64[(i * 2 + 1) & 7];
1014 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1015 const uint8_t * const d128 = dither_8x8_220[y & 7];
1016 dr1 = db1 = d128[(i * 2 + 0) & 7];
1017 dg1 = d64[(i * 2 + 0) & 7];
1018 dr2 = db2 = d128[(i * 2 + 1) & 7];
1019 dg2 = d64[(i * 2 + 1) & 7];
1022 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1023 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1024 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1026 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1027 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1032 static av_always_inline void
1033 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1034 const int16_t **lumSrc, int lumFilterSize,
1035 const int16_t *chrFilter, const int16_t **chrUSrc,
1036 const int16_t **chrVSrc, int chrFilterSize,
1037 const int16_t **alpSrc, uint8_t *dest, int dstW,
1038 int y, enum PixelFormat target, int hasAlpha)
1042 for (i = 0; i < (dstW >> 1); i++) {
1048 int av_unused A1, A2;
1049 const void *r, *g, *b;
1051 for (j = 0; j < lumFilterSize; j++) {
1052 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1053 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1055 for (j = 0; j < chrFilterSize; j++) {
1056 U += chrUSrc[j][i] * chrFilter[j];
1057 V += chrVSrc[j][i] * chrFilter[j];
1063 if ((Y1 | Y2 | U | V) & 0x100) {
1064 Y1 = av_clip_uint8(Y1);
1065 Y2 = av_clip_uint8(Y2);
1066 U = av_clip_uint8(U);
1067 V = av_clip_uint8(V);
1072 for (j = 0; j < lumFilterSize; j++) {
1073 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1074 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1078 if ((A1 | A2) & 0x100) {
1079 A1 = av_clip_uint8(A1);
1080 A2 = av_clip_uint8(A2);
1084 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1086 g = (c->table_gU[U] + c->table_gV[V]);
1089 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1090 r, g, b, y, target, hasAlpha);
1094 static av_always_inline void
1095 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1096 const int16_t *ubuf[2], const int16_t *vbuf[2],
1097 const int16_t *abuf[2], uint8_t *dest, int dstW,
1098 int yalpha, int uvalpha, int y,
1099 enum PixelFormat target, int hasAlpha)
1101 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1102 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1103 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1104 *abuf0 = hasAlpha ? abuf[0] : NULL,
1105 *abuf1 = hasAlpha ? abuf[1] : NULL;
1106 int yalpha1 = 4095 - yalpha;
1107 int uvalpha1 = 4095 - uvalpha;
1110 for (i = 0; i < (dstW >> 1); i++) {
1111 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1112 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1113 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1114 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1116 const void *r = c->table_rV[V],
1117 *g = (c->table_gU[U] + c->table_gV[V]),
1118 *b = c->table_bU[U];
1121 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1122 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1125 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1126 r, g, b, y, target, hasAlpha);
1130 static av_always_inline void
1131 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1132 const int16_t *ubuf[2], const int16_t *vbuf[2],
1133 const int16_t *abuf0, uint8_t *dest, int dstW,
1134 int uvalpha, int y, enum PixelFormat target,
1137 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1141 if (uvalpha < 2048) {
1142 for (i = 0; i < (dstW >> 1); i++) {
1143 int Y1 = buf0[i * 2] >> 7;
1144 int Y2 = buf0[i * 2 + 1] >> 7;
1145 int U = ubuf1[i] >> 7;
1146 int V = vbuf1[i] >> 7;
1148 const void *r = c->table_rV[V],
1149 *g = (c->table_gU[U] + c->table_gV[V]),
1150 *b = c->table_bU[U];
1153 A1 = abuf0[i * 2 ] >> 7;
1154 A2 = abuf0[i * 2 + 1] >> 7;
1157 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1158 r, g, b, y, target, hasAlpha);
1161 for (i = 0; i < (dstW >> 1); i++) {
1162 int Y1 = buf0[i * 2] >> 7;
1163 int Y2 = buf0[i * 2 + 1] >> 7;
1164 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1165 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1167 const void *r = c->table_rV[V],
1168 *g = (c->table_gU[U] + c->table_gV[V]),
1169 *b = c->table_bU[U];
1172 A1 = abuf0[i * 2 ] >> 7;
1173 A2 = abuf0[i * 2 + 1] >> 7;
1176 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1177 r, g, b, y, target, hasAlpha);
1182 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1183 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1184 const int16_t **lumSrc, int lumFilterSize, \
1185 const int16_t *chrFilter, const int16_t **chrUSrc, \
1186 const int16_t **chrVSrc, int chrFilterSize, \
1187 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1190 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1191 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1192 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1194 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1195 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1196 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1197 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1198 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1199 int yalpha, int uvalpha, int y) \
1201 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1202 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1205 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1206 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1207 const int16_t *abuf0, uint8_t *dest, int dstW, \
1208 int uvalpha, int y) \
1210 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1211 dstW, uvalpha, y, fmt, hasAlpha); \
1215 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1216 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1218 #if CONFIG_SWSCALE_ALPHA
1219 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1220 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1222 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1223 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1225 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1226 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1227 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1228 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1229 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1230 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1231 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1232 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1234 static av_always_inline void
1235 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1236 const int16_t **lumSrc, int lumFilterSize,
1237 const int16_t *chrFilter, const int16_t **chrUSrc,
1238 const int16_t **chrVSrc, int chrFilterSize,
1239 const int16_t **alpSrc, uint8_t *dest,
1240 int dstW, int y, enum PixelFormat target, int hasAlpha)
1243 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1245 for (i = 0; i < dstW; i++) {
1253 for (j = 0; j < lumFilterSize; j++) {
1254 Y += lumSrc[j][i] * lumFilter[j];
1256 for (j = 0; j < chrFilterSize; j++) {
1257 U += chrUSrc[j][i] * chrFilter[j];
1258 V += chrVSrc[j][i] * chrFilter[j];
1265 for (j = 0; j < lumFilterSize; j++) {
1266 A += alpSrc[j][i] * lumFilter[j];
1270 A = av_clip_uint8(A);
1272 Y -= c->yuv2rgb_y_offset;
1273 Y *= c->yuv2rgb_y_coeff;
1275 R = Y + V*c->yuv2rgb_v2r_coeff;
1276 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1277 B = Y + U*c->yuv2rgb_u2b_coeff;
1278 if ((R | G | B) & 0xC0000000) {
1279 R = av_clip_uintp2(R, 30);
1280 G = av_clip_uintp2(G, 30);
1281 B = av_clip_uintp2(B, 30);
1286 dest[0] = hasAlpha ? A : 255;
1300 dest[3] = hasAlpha ? A : 255;
1303 dest[0] = hasAlpha ? A : 255;
1318 dest[3] = hasAlpha ? A : 255;
1326 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1327 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1328 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1329 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1331 #if CONFIG_SWSCALE_ALPHA
1332 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1333 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1334 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1335 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1337 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1338 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1339 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1340 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1342 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1343 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1345 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1346 int width, int height,
1350 uint8_t *ptr = plane + stride*y;
1351 for (i=0; i<height; i++) {
1352 memset(ptr, val, width);
1357 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1359 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1360 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1362 static av_always_inline void
1363 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1364 enum PixelFormat origin)
1367 for (i = 0; i < width; i++) {
1368 unsigned int r_b = input_pixel(&src[i*3+0]);
1369 unsigned int g = input_pixel(&src[i*3+1]);
1370 unsigned int b_r = input_pixel(&src[i*3+2]);
1372 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1376 static av_always_inline void
1377 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1378 const uint16_t *src1, const uint16_t *src2,
1379 int width, enum PixelFormat origin)
1383 for (i = 0; i < width; i++) {
1384 int r_b = input_pixel(&src1[i*3+0]);
1385 int g = input_pixel(&src1[i*3+1]);
1386 int b_r = input_pixel(&src1[i*3+2]);
1388 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1393 static av_always_inline void
1394 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1395 const uint16_t *src1, const uint16_t *src2,
1396 int width, enum PixelFormat origin)
1400 for (i = 0; i < width; i++) {
1401 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1402 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1403 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1405 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1406 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1414 #define rgb48funcs(pattern, BE_LE, origin) \
1415 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1416 int width, uint32_t *unused) \
1418 const uint16_t *src = (const uint16_t *) _src; \
1419 uint16_t *dst = (uint16_t *) _dst; \
1420 rgb48ToY_c_template(dst, src, width, origin); \
1423 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1424 const uint8_t *_src1, const uint8_t *_src2, \
1425 int width, uint32_t *unused) \
1427 const uint16_t *src1 = (const uint16_t *) _src1, \
1428 *src2 = (const uint16_t *) _src2; \
1429 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1430 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1433 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1434 const uint8_t *_src1, const uint8_t *_src2, \
1435 int width, uint32_t *unused) \
1437 const uint16_t *src1 = (const uint16_t *) _src1, \
1438 *src2 = (const uint16_t *) _src2; \
1439 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1440 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1443 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1444 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1445 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1446 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1448 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1449 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1450 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1452 static av_always_inline void
1453 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1454 int width, enum PixelFormat origin,
1455 int shr, int shg, int shb, int shp,
1456 int maskr, int maskg, int maskb,
1457 int rsh, int gsh, int bsh, int S)
1459 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1460 const unsigned rnd = 33u << (S - 1);
1463 for (i = 0; i < width; i++) {
1464 int px = input_pixel(i) >> shp;
1465 int b = (px & maskb) >> shb;
1466 int g = (px & maskg) >> shg;
1467 int r = (px & maskr) >> shr;
1469 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1473 static av_always_inline void
1474 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1475 const uint8_t *src, int width,
1476 enum PixelFormat origin,
1477 int shr, int shg, int shb, int shp,
1478 int maskr, int maskg, int maskb,
1479 int rsh, int gsh, int bsh, int S)
1481 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1482 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1483 const unsigned rnd = 257u << (S - 1);
1486 for (i = 0; i < width; i++) {
1487 int px = input_pixel(i) >> shp;
1488 int b = (px & maskb) >> shb;
1489 int g = (px & maskg) >> shg;
1490 int r = (px & maskr) >> shr;
1492 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1493 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1497 static av_always_inline void
1498 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1499 const uint8_t *src, int width,
1500 enum PixelFormat origin,
1501 int shr, int shg, int shb, int shp,
1502 int maskr, int maskg, int maskb,
1503 int rsh, int gsh, int bsh, int S)
1505 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1506 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1507 maskgx = ~(maskr | maskb);
1508 const unsigned rnd = 257u << S;
1511 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1512 for (i = 0; i < width; i++) {
1513 int px0 = input_pixel(2 * i + 0) >> shp;
1514 int px1 = input_pixel(2 * i + 1) >> shp;
1515 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1516 int rb = px0 + px1 - g;
1518 b = (rb & maskb) >> shb;
1519 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1520 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1523 g = (g & maskg) >> shg;
1525 r = (rb & maskr) >> shr;
1527 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1528 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1534 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1535 maskg, maskb, rsh, gsh, bsh, S) \
1536 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1537 int width, uint32_t *unused) \
1539 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1540 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1543 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1544 const uint8_t *src, const uint8_t *dummy, \
1545 int width, uint32_t *unused) \
1547 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1548 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1551 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1552 const uint8_t *src, const uint8_t *dummy, \
1553 int width, uint32_t *unused) \
1555 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1556 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1559 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1560 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1561 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1562 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1563 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1564 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1565 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1566 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1567 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1568 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1569 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1570 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1572 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1575 for (i=0; i<width; i++) {
1580 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1583 for (i=0; i<width; i++) {
1588 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1591 for (i=0; i<width; i++) {
1594 dst[i]= pal[d] & 0xFF;
1598 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1599 const uint8_t *src1, const uint8_t *src2,
1600 int width, uint32_t *pal)
1603 assert(src1 == src2);
1604 for (i=0; i<width; i++) {
1605 int p= pal[src1[i]];
1612 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1613 int width, uint32_t *unused)
1616 for (i=0; i<width/8; i++) {
1619 dst[8*i+j]= ((d>>(7-j))&1)*255;
1623 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1624 int width, uint32_t *unused)
1627 for (i=0; i<width/8; i++) {
1630 dst[8*i+j]= ((d>>(7-j))&1)*255;
1634 //FIXME yuy2* can read up to 7 samples too much
1636 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1640 for (i=0; i<width; i++)
1644 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1645 const uint8_t *src2, int width, uint32_t *unused)
1648 for (i=0; i<width; i++) {
1649 dstU[i]= src1[4*i + 1];
1650 dstV[i]= src1[4*i + 3];
1652 assert(src1 == src2);
1655 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1658 const uint16_t *src = (const uint16_t *) _src;
1659 uint16_t *dst = (uint16_t *) _dst;
1660 for (i=0; i<width; i++) {
1661 dst[i] = av_bswap16(src[i]);
1665 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1666 const uint8_t *_src2, int width, uint32_t *unused)
1669 const uint16_t *src1 = (const uint16_t *) _src1,
1670 *src2 = (const uint16_t *) _src2;
1671 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1672 for (i=0; i<width; i++) {
1673 dstU[i] = av_bswap16(src1[i]);
1674 dstV[i] = av_bswap16(src2[i]);
1678 /* This is almost identical to the previous, end exists only because
1679 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1680 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1684 for (i=0; i<width; i++)
1688 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1689 const uint8_t *src2, int width, uint32_t *unused)
1692 for (i=0; i<width; i++) {
1693 dstU[i]= src1[4*i + 0];
1694 dstV[i]= src1[4*i + 2];
1696 assert(src1 == src2);
1699 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1700 const uint8_t *src, int width)
1703 for (i = 0; i < width; i++) {
1704 dst1[i] = src[2*i+0];
1705 dst2[i] = src[2*i+1];
1709 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1710 const uint8_t *src1, const uint8_t *src2,
1711 int width, uint32_t *unused)
1713 nvXXtoUV_c(dstU, dstV, src1, width);
1716 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1717 const uint8_t *src1, const uint8_t *src2,
1718 int width, uint32_t *unused)
1720 nvXXtoUV_c(dstV, dstU, src1, width);
1723 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1725 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1726 int width, uint32_t *unused)
1729 for (i=0; i<width; i++) {
1734 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1738 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1739 const uint8_t *src2, int width, uint32_t *unused)
1742 for (i=0; i<width; i++) {
1743 int b= src1[3*i + 0];
1744 int g= src1[3*i + 1];
1745 int r= src1[3*i + 2];
1747 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1748 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1750 assert(src1 == src2);
1753 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1754 const uint8_t *src2, int width, uint32_t *unused)
1757 for (i=0; i<width; i++) {
1758 int b= src1[6*i + 0] + src1[6*i + 3];
1759 int g= src1[6*i + 1] + src1[6*i + 4];
1760 int r= src1[6*i + 2] + src1[6*i + 5];
1762 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1763 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1765 assert(src1 == src2);
1768 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1772 for (i=0; i<width; i++) {
1777 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1781 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1782 const uint8_t *src2, int width, uint32_t *unused)
1786 for (i=0; i<width; i++) {
1787 int r= src1[3*i + 0];
1788 int g= src1[3*i + 1];
1789 int b= src1[3*i + 2];
1791 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1792 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1796 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1797 const uint8_t *src2, int width, uint32_t *unused)
1801 for (i=0; i<width; i++) {
1802 int r= src1[6*i + 0] + src1[6*i + 3];
1803 int g= src1[6*i + 1] + src1[6*i + 4];
1804 int b= src1[6*i + 2] + src1[6*i + 5];
1806 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1807 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1811 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1814 for (i = 0; i < width; i++) {
1819 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1823 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1826 const uint16_t **src = (const uint16_t **) _src;
1827 uint16_t *dst = (uint16_t *) _dst;
1828 for (i = 0; i < width; i++) {
1829 int g = AV_RL16(src[0] + i);
1830 int b = AV_RL16(src[1] + i);
1831 int r = AV_RL16(src[2] + i);
1833 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1837 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1840 const uint16_t **src = (const uint16_t **) _src;
1841 uint16_t *dst = (uint16_t *) _dst;
1842 for (i = 0; i < width; i++) {
1843 int g = AV_RB16(src[0] + i);
1844 int b = AV_RB16(src[1] + i);
1845 int r = AV_RB16(src[2] + i);
1847 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1851 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1854 for (i = 0; i < width; i++) {
1859 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1860 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1864 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1867 const uint16_t **src = (const uint16_t **) _src;
1868 uint16_t *dstU = (uint16_t *) _dstU;
1869 uint16_t *dstV = (uint16_t *) _dstV;
1870 for (i = 0; i < width; i++) {
1871 int g = AV_RL16(src[0] + i);
1872 int b = AV_RL16(src[1] + i);
1873 int r = AV_RL16(src[2] + i);
1875 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1876 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1880 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1883 const uint16_t **src = (const uint16_t **) _src;
1884 uint16_t *dstU = (uint16_t *) _dstU;
1885 uint16_t *dstV = (uint16_t *) _dstV;
1886 for (i = 0; i < width; i++) {
1887 int g = AV_RB16(src[0] + i);
1888 int b = AV_RB16(src[1] + i);
1889 int r = AV_RB16(src[2] + i);
1891 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1892 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1896 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1897 const int16_t *filter,
1898 const int16_t *filterPos, int filterSize)
1901 int32_t *dst = (int32_t *) _dst;
1902 const uint16_t *src = (const uint16_t *) _src;
1903 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1906 for (i = 0; i < dstW; i++) {
1908 int srcPos = filterPos[i];
1911 for (j = 0; j < filterSize; j++) {
1912 val += src[srcPos + j] * filter[filterSize * i + j];
1914 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1915 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1919 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1920 const int16_t *filter,
1921 const int16_t *filterPos, int filterSize)
1924 const uint16_t *src = (const uint16_t *) _src;
1925 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1927 for (i = 0; i < dstW; i++) {
1929 int srcPos = filterPos[i];
1932 for (j = 0; j < filterSize; j++) {
1933 val += src[srcPos + j] * filter[filterSize * i + j];
1935 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1936 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1940 // bilinear / bicubic scaling
1941 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1942 const int16_t *filter, const int16_t *filterPos,
1946 for (i=0; i<dstW; i++) {
1948 int srcPos= filterPos[i];
1950 for (j=0; j<filterSize; j++) {
1951 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1953 //filter += hFilterSize;
1954 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1959 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1960 const int16_t *filter, const int16_t *filterPos,
1964 int32_t *dst = (int32_t *) _dst;
1965 for (i=0; i<dstW; i++) {
1967 int srcPos= filterPos[i];
1969 for (j=0; j<filterSize; j++) {
1970 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1972 //filter += hFilterSize;
1973 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1978 //FIXME all pal and rgb srcFormats could do this convertion as well
1979 //FIXME all scalers more complex than bilinear could do half of this transform
1980 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1983 for (i = 0; i < width; i++) {
1984 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1985 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1988 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1991 for (i = 0; i < width; i++) {
1992 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1993 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1996 static void lumRangeToJpeg_c(int16_t *dst, int width)
1999 for (i = 0; i < width; i++)
2000 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2002 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2005 for (i = 0; i < width; i++)
2006 dst[i] = (dst[i]*14071 + 33561947)>>14;
2009 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2012 int32_t *dstU = (int32_t *) _dstU;
2013 int32_t *dstV = (int32_t *) _dstV;
2014 for (i = 0; i < width; i++) {
2015 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2016 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2019 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2022 int32_t *dstU = (int32_t *) _dstU;
2023 int32_t *dstV = (int32_t *) _dstV;
2024 for (i = 0; i < width; i++) {
2025 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2026 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2029 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2032 int32_t *dst = (int32_t *) _dst;
2033 for (i = 0; i < width; i++)
2034 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2036 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2039 int32_t *dst = (int32_t *) _dst;
2040 for (i = 0; i < width; i++)
2041 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2044 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2045 const uint8_t *src, int srcW, int xInc)
2048 unsigned int xpos=0;
2049 for (i=0;i<dstWidth;i++) {
2050 register unsigned int xx=xpos>>16;
2051 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2052 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2057 // *** horizontal scale Y line to temp buffer
2058 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2059 const uint8_t *src_in[4], int srcW, int xInc,
2060 const int16_t *hLumFilter,
2061 const int16_t *hLumFilterPos, int hLumFilterSize,
2062 uint8_t *formatConvBuffer,
2063 uint32_t *pal, int isAlpha)
2065 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2066 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2067 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2070 toYV12(formatConvBuffer, src, srcW, pal);
2071 src= formatConvBuffer;
2072 } else if (c->readLumPlanar && !isAlpha) {
2073 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2074 src = formatConvBuffer;
2077 if (!c->hyscale_fast) {
2078 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2079 } else { // fast bilinear upscale / crap downscale
2080 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2084 convertRange(dst, dstWidth);
2087 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2088 int dstWidth, const uint8_t *src1,
2089 const uint8_t *src2, int srcW, int xInc)
2092 unsigned int xpos=0;
2093 for (i=0;i<dstWidth;i++) {
2094 register unsigned int xx=xpos>>16;
2095 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2096 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2097 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2102 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2103 const uint8_t *src_in[4],
2104 int srcW, int xInc, const int16_t *hChrFilter,
2105 const int16_t *hChrFilterPos, int hChrFilterSize,
2106 uint8_t *formatConvBuffer, uint32_t *pal)
2108 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2110 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2111 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2112 src1= formatConvBuffer;
2114 } else if (c->readChrPlanar) {
2115 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2116 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2117 src1= formatConvBuffer;
2121 if (!c->hcscale_fast) {
2122 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2123 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2124 } else { // fast bilinear upscale / crap downscale
2125 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2128 if (c->chrConvertRange)
2129 c->chrConvertRange(dst1, dst2, dstWidth);
2132 static av_always_inline void
2133 find_c_packed_planar_out_funcs(SwsContext *c,
2134 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2135 yuv2interleavedX_fn *yuv2nv12cX,
2136 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2137 yuv2packedX_fn *yuv2packedX)
2139 enum PixelFormat dstFormat = c->dstFormat;
2141 if (is16BPS(dstFormat)) {
2142 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2143 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2144 } else if (is9_OR_10BPS(dstFormat)) {
2145 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2146 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2147 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2149 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2150 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2153 *yuv2plane1 = yuv2plane1_8_c;
2154 *yuv2planeX = yuv2planeX_8_c;
2155 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2156 *yuv2nv12cX = yuv2nv12cX_c;
2159 if(c->flags & SWS_FULL_CHR_H_INT) {
2160 switch (dstFormat) {
2163 *yuv2packedX = yuv2rgba32_full_X_c;
2165 #if CONFIG_SWSCALE_ALPHA
2167 *yuv2packedX = yuv2rgba32_full_X_c;
2169 #endif /* CONFIG_SWSCALE_ALPHA */
2171 *yuv2packedX = yuv2rgbx32_full_X_c;
2173 #endif /* !CONFIG_SMALL */
2177 *yuv2packedX = yuv2argb32_full_X_c;
2179 #if CONFIG_SWSCALE_ALPHA
2181 *yuv2packedX = yuv2argb32_full_X_c;
2183 #endif /* CONFIG_SWSCALE_ALPHA */
2185 *yuv2packedX = yuv2xrgb32_full_X_c;
2187 #endif /* !CONFIG_SMALL */
2191 *yuv2packedX = yuv2bgra32_full_X_c;
2193 #if CONFIG_SWSCALE_ALPHA
2195 *yuv2packedX = yuv2bgra32_full_X_c;
2197 #endif /* CONFIG_SWSCALE_ALPHA */
2199 *yuv2packedX = yuv2bgrx32_full_X_c;
2201 #endif /* !CONFIG_SMALL */
2205 *yuv2packedX = yuv2abgr32_full_X_c;
2207 #if CONFIG_SWSCALE_ALPHA
2209 *yuv2packedX = yuv2abgr32_full_X_c;
2211 #endif /* CONFIG_SWSCALE_ALPHA */
2213 *yuv2packedX = yuv2xbgr32_full_X_c;
2215 #endif /* !CONFIG_SMALL */
2218 *yuv2packedX = yuv2rgb24_full_X_c;
2221 *yuv2packedX = yuv2bgr24_full_X_c;
2225 switch (dstFormat) {
2226 case PIX_FMT_GRAY16BE:
2227 *yuv2packed1 = yuv2gray16BE_1_c;
2228 *yuv2packed2 = yuv2gray16BE_2_c;
2229 *yuv2packedX = yuv2gray16BE_X_c;
2231 case PIX_FMT_GRAY16LE:
2232 *yuv2packed1 = yuv2gray16LE_1_c;
2233 *yuv2packed2 = yuv2gray16LE_2_c;
2234 *yuv2packedX = yuv2gray16LE_X_c;
2236 case PIX_FMT_MONOWHITE:
2237 *yuv2packed1 = yuv2monowhite_1_c;
2238 *yuv2packed2 = yuv2monowhite_2_c;
2239 *yuv2packedX = yuv2monowhite_X_c;
2241 case PIX_FMT_MONOBLACK:
2242 *yuv2packed1 = yuv2monoblack_1_c;
2243 *yuv2packed2 = yuv2monoblack_2_c;
2244 *yuv2packedX = yuv2monoblack_X_c;
2246 case PIX_FMT_YUYV422:
2247 *yuv2packed1 = yuv2yuyv422_1_c;
2248 *yuv2packed2 = yuv2yuyv422_2_c;
2249 *yuv2packedX = yuv2yuyv422_X_c;
2251 case PIX_FMT_UYVY422:
2252 *yuv2packed1 = yuv2uyvy422_1_c;
2253 *yuv2packed2 = yuv2uyvy422_2_c;
2254 *yuv2packedX = yuv2uyvy422_X_c;
2256 case PIX_FMT_RGB48LE:
2257 *yuv2packed1 = yuv2rgb48le_1_c;
2258 *yuv2packed2 = yuv2rgb48le_2_c;
2259 *yuv2packedX = yuv2rgb48le_X_c;
2261 case PIX_FMT_RGB48BE:
2262 *yuv2packed1 = yuv2rgb48be_1_c;
2263 *yuv2packed2 = yuv2rgb48be_2_c;
2264 *yuv2packedX = yuv2rgb48be_X_c;
2266 case PIX_FMT_BGR48LE:
2267 *yuv2packed1 = yuv2bgr48le_1_c;
2268 *yuv2packed2 = yuv2bgr48le_2_c;
2269 *yuv2packedX = yuv2bgr48le_X_c;
2271 case PIX_FMT_BGR48BE:
2272 *yuv2packed1 = yuv2bgr48be_1_c;
2273 *yuv2packed2 = yuv2bgr48be_2_c;
2274 *yuv2packedX = yuv2bgr48be_X_c;
2279 *yuv2packed1 = yuv2rgb32_1_c;
2280 *yuv2packed2 = yuv2rgb32_2_c;
2281 *yuv2packedX = yuv2rgb32_X_c;
2283 #if CONFIG_SWSCALE_ALPHA
2285 *yuv2packed1 = yuv2rgba32_1_c;
2286 *yuv2packed2 = yuv2rgba32_2_c;
2287 *yuv2packedX = yuv2rgba32_X_c;
2289 #endif /* CONFIG_SWSCALE_ALPHA */
2291 *yuv2packed1 = yuv2rgbx32_1_c;
2292 *yuv2packed2 = yuv2rgbx32_2_c;
2293 *yuv2packedX = yuv2rgbx32_X_c;
2295 #endif /* !CONFIG_SMALL */
2297 case PIX_FMT_RGB32_1:
2298 case PIX_FMT_BGR32_1:
2300 *yuv2packed1 = yuv2rgb32_1_1_c;
2301 *yuv2packed2 = yuv2rgb32_1_2_c;
2302 *yuv2packedX = yuv2rgb32_1_X_c;
2304 #if CONFIG_SWSCALE_ALPHA
2306 *yuv2packed1 = yuv2rgba32_1_1_c;
2307 *yuv2packed2 = yuv2rgba32_1_2_c;
2308 *yuv2packedX = yuv2rgba32_1_X_c;
2310 #endif /* CONFIG_SWSCALE_ALPHA */
2312 *yuv2packed1 = yuv2rgbx32_1_1_c;
2313 *yuv2packed2 = yuv2rgbx32_1_2_c;
2314 *yuv2packedX = yuv2rgbx32_1_X_c;
2316 #endif /* !CONFIG_SMALL */
2319 *yuv2packed1 = yuv2rgb24_1_c;
2320 *yuv2packed2 = yuv2rgb24_2_c;
2321 *yuv2packedX = yuv2rgb24_X_c;
2324 *yuv2packed1 = yuv2bgr24_1_c;
2325 *yuv2packed2 = yuv2bgr24_2_c;
2326 *yuv2packedX = yuv2bgr24_X_c;
2328 case PIX_FMT_RGB565LE:
2329 case PIX_FMT_RGB565BE:
2330 case PIX_FMT_BGR565LE:
2331 case PIX_FMT_BGR565BE:
2332 *yuv2packed1 = yuv2rgb16_1_c;
2333 *yuv2packed2 = yuv2rgb16_2_c;
2334 *yuv2packedX = yuv2rgb16_X_c;
2336 case PIX_FMT_RGB555LE:
2337 case PIX_FMT_RGB555BE:
2338 case PIX_FMT_BGR555LE:
2339 case PIX_FMT_BGR555BE:
2340 *yuv2packed1 = yuv2rgb15_1_c;
2341 *yuv2packed2 = yuv2rgb15_2_c;
2342 *yuv2packedX = yuv2rgb15_X_c;
2344 case PIX_FMT_RGB444LE:
2345 case PIX_FMT_RGB444BE:
2346 case PIX_FMT_BGR444LE:
2347 case PIX_FMT_BGR444BE:
2348 *yuv2packed1 = yuv2rgb12_1_c;
2349 *yuv2packed2 = yuv2rgb12_2_c;
2350 *yuv2packedX = yuv2rgb12_X_c;
2354 *yuv2packed1 = yuv2rgb8_1_c;
2355 *yuv2packed2 = yuv2rgb8_2_c;
2356 *yuv2packedX = yuv2rgb8_X_c;
2360 *yuv2packed1 = yuv2rgb4_1_c;
2361 *yuv2packed2 = yuv2rgb4_2_c;
2362 *yuv2packedX = yuv2rgb4_X_c;
2364 case PIX_FMT_RGB4_BYTE:
2365 case PIX_FMT_BGR4_BYTE:
2366 *yuv2packed1 = yuv2rgb4b_1_c;
2367 *yuv2packed2 = yuv2rgb4b_2_c;
2368 *yuv2packedX = yuv2rgb4b_X_c;
2374 #define DEBUG_SWSCALE_BUFFERS 0
2375 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2377 static int swScale(SwsContext *c, const uint8_t* src[],
2378 int srcStride[], int srcSliceY,
2379 int srcSliceH, uint8_t* dst[], int dstStride[])
2381 /* load a few things into local vars to make the code more readable? and faster */
2382 const int srcW= c->srcW;
2383 const int dstW= c->dstW;
2384 const int dstH= c->dstH;
2385 const int chrDstW= c->chrDstW;
2386 const int chrSrcW= c->chrSrcW;
2387 const int lumXInc= c->lumXInc;
2388 const int chrXInc= c->chrXInc;
2389 const enum PixelFormat dstFormat= c->dstFormat;
2390 const int flags= c->flags;
2391 int16_t *vLumFilterPos= c->vLumFilterPos;
2392 int16_t *vChrFilterPos= c->vChrFilterPos;
2393 int16_t *hLumFilterPos= c->hLumFilterPos;
2394 int16_t *hChrFilterPos= c->hChrFilterPos;
2395 int16_t *vLumFilter= c->vLumFilter;
2396 int16_t *vChrFilter= c->vChrFilter;
2397 int16_t *hLumFilter= c->hLumFilter;
2398 int16_t *hChrFilter= c->hChrFilter;
2399 int32_t *lumMmxFilter= c->lumMmxFilter;
2400 int32_t *chrMmxFilter= c->chrMmxFilter;
2401 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2402 const int vLumFilterSize= c->vLumFilterSize;
2403 const int vChrFilterSize= c->vChrFilterSize;
2404 const int hLumFilterSize= c->hLumFilterSize;
2405 const int hChrFilterSize= c->hChrFilterSize;
2406 int16_t **lumPixBuf= c->lumPixBuf;
2407 int16_t **chrUPixBuf= c->chrUPixBuf;
2408 int16_t **chrVPixBuf= c->chrVPixBuf;
2409 int16_t **alpPixBuf= c->alpPixBuf;
2410 const int vLumBufSize= c->vLumBufSize;
2411 const int vChrBufSize= c->vChrBufSize;
2412 uint8_t *formatConvBuffer= c->formatConvBuffer;
2413 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2414 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2416 uint32_t *pal=c->pal_yuv;
2417 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2418 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2419 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2420 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2421 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2422 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2423 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2425 /* vars which will change and which we need to store back in the context */
2427 int lumBufIndex= c->lumBufIndex;
2428 int chrBufIndex= c->chrBufIndex;
2429 int lastInLumBuf= c->lastInLumBuf;
2430 int lastInChrBuf= c->lastInChrBuf;
2432 if (isPacked(c->srcFormat)) {
2440 srcStride[3]= srcStride[0];
2442 srcStride[1]<<= c->vChrDrop;
2443 srcStride[2]<<= c->vChrDrop;
2445 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2446 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2447 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2448 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2449 srcSliceY, srcSliceH, dstY, dstH);
2450 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2451 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2453 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2454 static int warnedAlready=0; //FIXME move this into the context perhaps
2455 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2456 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2457 " ->cannot do aligned memory accesses anymore\n");
2462 /* Note the user might start scaling the picture in the middle so this
2463 will not get executed. This is not really intended but works
2464 currently, so people might do it. */
2465 if (srcSliceY ==0) {
2473 if (!should_dither) {
2474 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2478 for (;dstY < dstH; dstY++) {
2479 const int chrDstY= dstY>>c->chrDstVSubSample;
2480 uint8_t *dest[4] = {
2481 dst[0] + dstStride[0] * dstY,
2482 dst[1] + dstStride[1] * chrDstY,
2483 dst[2] + dstStride[2] * chrDstY,
2484 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2487 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2488 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2489 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2490 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2491 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2492 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2495 //handle holes (FAST_BILINEAR & weird filters)
2496 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2497 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2498 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2499 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2501 DEBUG_BUFFERS("dstY: %d\n", dstY);
2502 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2503 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2504 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2505 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2507 // Do we have enough lines in this slice to output the dstY line
2508 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2510 if (!enough_lines) {
2511 lastLumSrcY = srcSliceY + srcSliceH - 1;
2512 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2513 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2514 lastLumSrcY, lastChrSrcY);
2517 //Do horizontal scaling
2518 while(lastInLumBuf < lastLumSrcY) {
2519 const uint8_t *src1[4] = {
2520 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2521 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2522 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2523 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2526 assert(lumBufIndex < 2*vLumBufSize);
2527 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2528 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2529 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2530 hLumFilter, hLumFilterPos, hLumFilterSize,
2533 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2534 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2535 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2539 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2540 lumBufIndex, lastInLumBuf);
2542 while(lastInChrBuf < lastChrSrcY) {
2543 const uint8_t *src1[4] = {
2544 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2545 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2546 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2547 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2550 assert(chrBufIndex < 2*vChrBufSize);
2551 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2552 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2553 //FIXME replace parameters through context struct (some at least)
2555 if (c->needs_hcscale)
2556 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2557 chrDstW, src1, chrSrcW, chrXInc,
2558 hChrFilter, hChrFilterPos, hChrFilterSize,
2559 formatConvBuffer, pal);
2561 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2562 chrBufIndex, lastInChrBuf);
2564 //wrap buf index around to stay inside the ring buffer
2565 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2566 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2568 break; //we can't output a dstY line so let's try with the next slice
2571 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2573 if (should_dither) {
2574 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2575 c->lumDither8 = dither_8x8_128[dstY & 7];
2577 if (dstY >= dstH-2) {
2578 // hmm looks like we can't use MMX here without overwriting this array's tail
2579 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2580 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2584 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2585 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2586 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2587 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2588 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2589 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2591 if (vLumFilterSize == 1) {
2592 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2594 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2595 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2598 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2600 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2601 } else if (vChrFilterSize == 1) {
2602 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2603 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2605 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2606 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2607 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2608 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2612 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2613 if (vLumFilterSize == 1) {
2614 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2616 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2617 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2621 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2622 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2623 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2624 int chrAlpha = vChrFilter[2 * dstY + 1];
2625 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2626 alpPixBuf ? *alpSrcPtr : NULL,
2627 dest[0], dstW, chrAlpha, dstY);
2628 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2629 int lumAlpha = vLumFilter[2 * dstY + 1];
2630 int chrAlpha = vChrFilter[2 * dstY + 1];
2632 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2634 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2635 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2636 alpPixBuf ? alpSrcPtr : NULL,
2637 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2638 } else { //general RGB
2639 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2640 lumSrcPtr, vLumFilterSize,
2641 vChrFilter + dstY * vChrFilterSize,
2642 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2643 alpSrcPtr, dest[0], dstW, dstY);
2649 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2650 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2653 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2654 __asm__ volatile("sfence":::"memory");
2658 /* store changed local vars back in the context */
2660 c->lumBufIndex= lumBufIndex;
2661 c->chrBufIndex= chrBufIndex;
2662 c->lastInLumBuf= lastInLumBuf;
2663 c->lastInChrBuf= lastInChrBuf;
2665 return dstY - lastDstY;
2668 static av_cold void sws_init_swScale_c(SwsContext *c)
2670 enum PixelFormat srcFormat = c->srcFormat;
2672 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2673 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2676 c->chrToYV12 = NULL;
2678 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2679 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2680 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2681 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2685 case PIX_FMT_BGR4_BYTE:
2686 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2687 case PIX_FMT_GBRP9LE:
2688 case PIX_FMT_GBRP10LE:
2689 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2690 case PIX_FMT_GBRP9BE:
2691 case PIX_FMT_GBRP10BE:
2692 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2693 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2695 case PIX_FMT_YUV444P9LE:
2696 case PIX_FMT_YUV422P9LE:
2697 case PIX_FMT_YUV420P9LE:
2698 case PIX_FMT_YUV422P10LE:
2699 case PIX_FMT_YUV444P10LE:
2700 case PIX_FMT_YUV420P10LE:
2701 case PIX_FMT_YUV420P16LE:
2702 case PIX_FMT_YUV422P16LE:
2703 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2705 case PIX_FMT_YUV444P9BE:
2706 case PIX_FMT_YUV422P9BE:
2707 case PIX_FMT_YUV420P9BE:
2708 case PIX_FMT_YUV444P10BE:
2709 case PIX_FMT_YUV422P10BE:
2710 case PIX_FMT_YUV420P10BE:
2711 case PIX_FMT_YUV420P16BE:
2712 case PIX_FMT_YUV422P16BE:
2713 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2716 if (c->chrSrcHSubSample) {
2718 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2719 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2720 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2721 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2722 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2723 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2724 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2725 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2726 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2727 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2728 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2729 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2730 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2731 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2732 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2733 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2734 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2735 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2739 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2740 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2741 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2742 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2743 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2744 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2745 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2746 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2747 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2748 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2749 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2750 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2751 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2752 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2753 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2754 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2755 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2756 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2760 c->lumToYV12 = NULL;
2761 c->alpToYV12 = NULL;
2762 switch (srcFormat) {
2763 case PIX_FMT_GBRP9LE:
2764 case PIX_FMT_GBRP10LE:
2765 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2766 case PIX_FMT_GBRP9BE:
2767 case PIX_FMT_GBRP10BE:
2768 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2769 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2771 case PIX_FMT_YUV444P9LE:
2772 case PIX_FMT_YUV422P9LE:
2773 case PIX_FMT_YUV420P9LE:
2774 case PIX_FMT_YUV444P10LE:
2775 case PIX_FMT_YUV422P10LE:
2776 case PIX_FMT_YUV420P10LE:
2777 case PIX_FMT_YUV420P16LE:
2778 case PIX_FMT_YUV422P16LE:
2779 case PIX_FMT_YUV444P16LE:
2780 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2782 case PIX_FMT_YUV444P9BE:
2783 case PIX_FMT_YUV422P9BE:
2784 case PIX_FMT_YUV420P9BE:
2785 case PIX_FMT_YUV444P10BE:
2786 case PIX_FMT_YUV422P10BE:
2787 case PIX_FMT_YUV420P10BE:
2788 case PIX_FMT_YUV420P16BE:
2789 case PIX_FMT_YUV422P16BE:
2790 case PIX_FMT_YUV444P16BE:
2791 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2793 case PIX_FMT_YUYV422 :
2794 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2795 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2796 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2797 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2798 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2799 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2800 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2801 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2802 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2803 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2804 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2805 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2809 case PIX_FMT_BGR4_BYTE:
2810 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2811 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2812 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2813 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2814 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2815 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2816 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2817 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2818 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2819 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2820 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2823 switch (srcFormat) {
2825 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2827 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2828 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2832 if (c->srcBpc == 8) {
2833 if (c->dstBpc <= 10) {
2834 c->hyScale = c->hcScale = hScale8To15_c;
2835 if (c->flags & SWS_FAST_BILINEAR) {
2836 c->hyscale_fast = hyscale_fast_c;
2837 c->hcscale_fast = hcscale_fast_c;
2840 c->hyScale = c->hcScale = hScale8To19_c;
2843 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2846 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2847 if (c->dstBpc <= 10) {
2849 c->lumConvertRange = lumRangeFromJpeg_c;
2850 c->chrConvertRange = chrRangeFromJpeg_c;
2852 c->lumConvertRange = lumRangeToJpeg_c;
2853 c->chrConvertRange = chrRangeToJpeg_c;
2857 c->lumConvertRange = lumRangeFromJpeg16_c;
2858 c->chrConvertRange = chrRangeFromJpeg16_c;
2860 c->lumConvertRange = lumRangeToJpeg16_c;
2861 c->chrConvertRange = chrRangeToJpeg16_c;
2866 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2867 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2868 c->needs_hcscale = 1;
2871 SwsFunc ff_getSwsFunc(SwsContext *c)
2873 sws_init_swScale_c(c);
2876 ff_sws_init_swScale_mmx(c);
2878 ff_sws_init_swScale_altivec(c);