2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 #define output_pixel(pos, val, bias, signedness) \
200 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
202 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
205 static av_always_inline void
206 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
207 int big_endian, int output_bits)
210 int shift = 19 - output_bits;
212 for (i = 0; i < dstW; i++) {
213 int val = src[i] + (1 << (shift - 1));
214 output_pixel(&dest[i], val, 0, uint);
218 static av_always_inline void
219 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
220 const int32_t **src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 + 16 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits);
230 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
231 * filters (or anything with negative coeffs, the range can be slightly
232 * wider in both directions. To account for this overflow, we subtract
233 * a constant so it always fits in the signed range (assuming a
234 * reasonable filterSize), and re-add that at the end. */
236 for (j = 0; j < filterSize; j++)
237 val += src[j][i] * filter[j];
239 output_pixel(&dest[i], val, 0x8000, int);
245 #define output_pixel(pos, val) \
247 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
249 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
252 static av_always_inline void
253 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
254 int big_endian, int output_bits)
257 int shift = 15 - output_bits;
259 for (i = 0; i < dstW; i++) {
260 int val = src[i] + (1 << (shift - 1));
261 output_pixel(&dest[i], val);
265 static av_always_inline void
266 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
267 const int16_t **src, uint16_t *dest, int dstW,
268 int big_endian, int output_bits)
271 int shift = 11 + 16 - output_bits;
273 for (i = 0; i < dstW; i++) {
274 int val = 1 << (26-output_bits);
277 for (j = 0; j < filterSize; j++)
278 val += src[j][i] * filter[j];
280 output_pixel(&dest[i], val);
286 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
287 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
288 uint8_t *dest, int dstW, \
289 const uint8_t *dither, int offset)\
291 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
292 (uint16_t *) dest, dstW, is_be, bits); \
294 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
295 const int16_t **src, uint8_t *dest, int dstW, \
296 const uint8_t *dither, int offset)\
298 yuv2planeX_## template_size ## _c_template(filter, \
299 filterSize, (const typeX_t **) src, \
300 (uint16_t *) dest, dstW, is_be, bits); \
302 yuv2NBPS( 9, BE, 1, 10, int16_t)
303 yuv2NBPS( 9, LE, 0, 10, int16_t)
304 yuv2NBPS(10, BE, 1, 10, int16_t)
305 yuv2NBPS(10, LE, 0, 10, int16_t)
306 yuv2NBPS(16, BE, 1, 16, int32_t)
307 yuv2NBPS(16, LE, 0, 16, int32_t)
309 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
310 const int16_t **src, uint8_t *dest, int dstW,
311 const uint8_t *dither, int offset)
314 for (i=0; i<dstW; i++) {
315 int val = dither[(i + offset) & 7] << 12;
317 for (j=0; j<filterSize; j++)
318 val += src[j][i] * filter[j];
320 dest[i]= av_clip_uint8(val>>19);
324 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
325 const uint8_t *dither, int offset)
328 for (i=0; i<dstW; i++) {
329 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
330 dest[i]= av_clip_uint8(val);
334 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
335 const int16_t **chrUSrc, const int16_t **chrVSrc,
336 uint8_t *dest, int chrDstW)
338 enum PixelFormat dstFormat = c->dstFormat;
339 const uint8_t *chrDither = c->chrDither8;
342 if (dstFormat == PIX_FMT_NV12)
343 for (i=0; i<chrDstW; i++) {
344 int u = chrDither[i & 7] << 12;
345 int v = chrDither[(i + 3) & 7] << 12;
347 for (j=0; j<chrFilterSize; j++) {
348 u += chrUSrc[j][i] * chrFilter[j];
349 v += chrVSrc[j][i] * chrFilter[j];
352 dest[2*i]= av_clip_uint8(u>>19);
353 dest[2*i+1]= av_clip_uint8(v>>19);
356 for (i=0; i<chrDstW; i++) {
357 int u = chrDither[i & 7] << 12;
358 int v = chrDither[(i + 3) & 7] << 12;
360 for (j=0; j<chrFilterSize; j++) {
361 u += chrUSrc[j][i] * chrFilter[j];
362 v += chrVSrc[j][i] * chrFilter[j];
365 dest[2*i]= av_clip_uint8(v>>19);
366 dest[2*i+1]= av_clip_uint8(u>>19);
370 #define output_pixel(pos, val) \
371 if (target == PIX_FMT_GRAY16BE) { \
377 static av_always_inline void
378 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
379 const int32_t **lumSrc, int lumFilterSize,
380 const int16_t *chrFilter, const int32_t **chrUSrc,
381 const int32_t **chrVSrc, int chrFilterSize,
382 const int32_t **alpSrc, uint16_t *dest, int dstW,
383 int y, enum PixelFormat target)
387 for (i = 0; i < (dstW >> 1); i++) {
392 for (j = 0; j < lumFilterSize; j++) {
393 Y1 += lumSrc[j][i * 2] * lumFilter[j];
394 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
398 if ((Y1 | Y2) & 0x10000) {
399 Y1 = av_clip_uint16(Y1);
400 Y2 = av_clip_uint16(Y2);
402 output_pixel(&dest[i * 2 + 0], Y1);
403 output_pixel(&dest[i * 2 + 1], Y2);
407 static av_always_inline void
408 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
409 const int32_t *ubuf[2], const int32_t *vbuf[2],
410 const int32_t *abuf[2], uint16_t *dest, int dstW,
411 int yalpha, int uvalpha, int y,
412 enum PixelFormat target)
414 int yalpha1 = 4095 - yalpha;
416 const int32_t *buf0 = buf[0], *buf1 = buf[1];
418 for (i = 0; i < (dstW >> 1); i++) {
419 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
420 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
422 output_pixel(&dest[i * 2 + 0], Y1);
423 output_pixel(&dest[i * 2 + 1], Y2);
427 static av_always_inline void
428 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
429 const int32_t *ubuf[2], const int32_t *vbuf[2],
430 const int32_t *abuf0, uint16_t *dest, int dstW,
431 int uvalpha, int y, enum PixelFormat target)
435 for (i = 0; i < (dstW >> 1); i++) {
436 int Y1 = buf0[i * 2 ] << 1;
437 int Y2 = buf0[i * 2 + 1] << 1;
439 output_pixel(&dest[i * 2 + 0], Y1);
440 output_pixel(&dest[i * 2 + 1], Y2);
446 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
447 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
448 const int16_t **_lumSrc, int lumFilterSize, \
449 const int16_t *chrFilter, const int16_t **_chrUSrc, \
450 const int16_t **_chrVSrc, int chrFilterSize, \
451 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
454 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
455 **chrUSrc = (const int32_t **) _chrUSrc, \
456 **chrVSrc = (const int32_t **) _chrVSrc, \
457 **alpSrc = (const int32_t **) _alpSrc; \
458 uint16_t *dest = (uint16_t *) _dest; \
459 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
460 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
461 alpSrc, dest, dstW, y, fmt); \
464 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
465 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
466 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
467 int yalpha, int uvalpha, int y) \
469 const int32_t **buf = (const int32_t **) _buf, \
470 **ubuf = (const int32_t **) _ubuf, \
471 **vbuf = (const int32_t **) _vbuf, \
472 **abuf = (const int32_t **) _abuf; \
473 uint16_t *dest = (uint16_t *) _dest; \
474 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
475 dest, dstW, yalpha, uvalpha, y, fmt); \
478 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
479 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
480 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
481 int uvalpha, int y) \
483 const int32_t *buf0 = (const int32_t *) _buf0, \
484 **ubuf = (const int32_t **) _ubuf, \
485 **vbuf = (const int32_t **) _vbuf, \
486 *abuf0 = (const int32_t *) _abuf0; \
487 uint16_t *dest = (uint16_t *) _dest; \
488 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
489 dstW, uvalpha, y, fmt); \
492 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
493 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
495 #define output_pixel(pos, acc) \
496 if (target == PIX_FMT_MONOBLACK) { \
502 static av_always_inline void
503 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
504 const int16_t **lumSrc, int lumFilterSize,
505 const int16_t *chrFilter, const int16_t **chrUSrc,
506 const int16_t **chrVSrc, int chrFilterSize,
507 const int16_t **alpSrc, uint8_t *dest, int dstW,
508 int y, enum PixelFormat target)
510 const uint8_t * const d128=dither_8x8_220[y&7];
511 uint8_t *g = c->table_gU[128] + c->table_gV[128];
515 for (i = 0; i < dstW - 1; i += 2) {
520 for (j = 0; j < lumFilterSize; j++) {
521 Y1 += lumSrc[j][i] * lumFilter[j];
522 Y2 += lumSrc[j][i+1] * lumFilter[j];
526 if ((Y1 | Y2) & 0x100) {
527 Y1 = av_clip_uint8(Y1);
528 Y2 = av_clip_uint8(Y2);
530 acc += acc + g[Y1 + d128[(i + 0) & 7]];
531 acc += acc + g[Y2 + d128[(i + 1) & 7]];
533 output_pixel(*dest++, acc);
538 static av_always_inline void
539 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
540 const int16_t *ubuf[2], const int16_t *vbuf[2],
541 const int16_t *abuf[2], uint8_t *dest, int dstW,
542 int yalpha, int uvalpha, int y,
543 enum PixelFormat target)
545 const int16_t *buf0 = buf[0], *buf1 = buf[1];
546 const uint8_t * const d128 = dither_8x8_220[y & 7];
547 uint8_t *g = c->table_gU[128] + c->table_gV[128];
548 int yalpha1 = 4095 - yalpha;
551 for (i = 0; i < dstW - 7; i += 8) {
552 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
553 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
554 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
555 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
556 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
557 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
558 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
559 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
560 output_pixel(*dest++, acc);
564 static av_always_inline void
565 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
566 const int16_t *ubuf[2], const int16_t *vbuf[2],
567 const int16_t *abuf0, uint8_t *dest, int dstW,
568 int uvalpha, int y, enum PixelFormat target)
570 const uint8_t * const d128 = dither_8x8_220[y & 7];
571 uint8_t *g = c->table_gU[128] + c->table_gV[128];
574 for (i = 0; i < dstW - 7; i += 8) {
575 int acc = g[(buf0[i ] >> 7) + d128[0]];
576 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
577 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
578 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
579 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
580 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
581 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
582 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
583 output_pixel(*dest++, acc);
589 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
590 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
591 const int16_t **lumSrc, int lumFilterSize, \
592 const int16_t *chrFilter, const int16_t **chrUSrc, \
593 const int16_t **chrVSrc, int chrFilterSize, \
594 const int16_t **alpSrc, uint8_t *dest, int dstW, \
597 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
598 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
599 alpSrc, dest, dstW, y, fmt); \
602 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
603 const int16_t *ubuf[2], const int16_t *vbuf[2], \
604 const int16_t *abuf[2], uint8_t *dest, int dstW, \
605 int yalpha, int uvalpha, int y) \
607 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
608 dest, dstW, yalpha, uvalpha, y, fmt); \
611 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
612 const int16_t *ubuf[2], const int16_t *vbuf[2], \
613 const int16_t *abuf0, uint8_t *dest, int dstW, \
614 int uvalpha, int y) \
616 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
617 abuf0, dest, dstW, uvalpha, \
621 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
622 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
624 #define output_pixels(pos, Y1, U, Y2, V) \
625 if (target == PIX_FMT_YUYV422) { \
626 dest[pos + 0] = Y1; \
628 dest[pos + 2] = Y2; \
632 dest[pos + 1] = Y1; \
634 dest[pos + 3] = Y2; \
637 static av_always_inline void
638 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
639 const int16_t **lumSrc, int lumFilterSize,
640 const int16_t *chrFilter, const int16_t **chrUSrc,
641 const int16_t **chrVSrc, int chrFilterSize,
642 const int16_t **alpSrc, uint8_t *dest, int dstW,
643 int y, enum PixelFormat target)
647 for (i = 0; i < (dstW >> 1); i++) {
654 for (j = 0; j < lumFilterSize; j++) {
655 Y1 += lumSrc[j][i * 2] * lumFilter[j];
656 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
658 for (j = 0; j < chrFilterSize; j++) {
659 U += chrUSrc[j][i] * chrFilter[j];
660 V += chrVSrc[j][i] * chrFilter[j];
666 if ((Y1 | Y2 | U | V) & 0x100) {
667 Y1 = av_clip_uint8(Y1);
668 Y2 = av_clip_uint8(Y2);
669 U = av_clip_uint8(U);
670 V = av_clip_uint8(V);
672 output_pixels(4*i, Y1, U, Y2, V);
676 static av_always_inline void
677 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
678 const int16_t *ubuf[2], const int16_t *vbuf[2],
679 const int16_t *abuf[2], uint8_t *dest, int dstW,
680 int yalpha, int uvalpha, int y,
681 enum PixelFormat target)
683 const int16_t *buf0 = buf[0], *buf1 = buf[1],
684 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
685 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
686 int yalpha1 = 4095 - yalpha;
687 int uvalpha1 = 4095 - uvalpha;
690 for (i = 0; i < (dstW >> 1); i++) {
691 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
692 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
693 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
694 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
696 output_pixels(i * 4, Y1, U, Y2, V);
700 static av_always_inline void
701 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
702 const int16_t *ubuf[2], const int16_t *vbuf[2],
703 const int16_t *abuf0, uint8_t *dest, int dstW,
704 int uvalpha, int y, enum PixelFormat target)
706 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
707 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
710 if (uvalpha < 2048) {
711 for (i = 0; i < (dstW >> 1); i++) {
712 int Y1 = buf0[i * 2] >> 7;
713 int Y2 = buf0[i * 2 + 1] >> 7;
714 int U = ubuf1[i] >> 7;
715 int V = vbuf1[i] >> 7;
717 output_pixels(i * 4, Y1, U, Y2, V);
720 for (i = 0; i < (dstW >> 1); i++) {
721 int Y1 = buf0[i * 2] >> 7;
722 int Y2 = buf0[i * 2 + 1] >> 7;
723 int U = (ubuf0[i] + ubuf1[i]) >> 8;
724 int V = (vbuf0[i] + vbuf1[i]) >> 8;
726 output_pixels(i * 4, Y1, U, Y2, V);
733 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
734 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
736 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
737 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
738 #define output_pixel(pos, val) \
739 if (isBE(target)) { \
745 static av_always_inline void
746 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
747 const int32_t **lumSrc, int lumFilterSize,
748 const int16_t *chrFilter, const int32_t **chrUSrc,
749 const int32_t **chrVSrc, int chrFilterSize,
750 const int32_t **alpSrc, uint16_t *dest, int dstW,
751 int y, enum PixelFormat target)
755 for (i = 0; i < (dstW >> 1); i++) {
759 int U = -128 << 23; // 19
763 for (j = 0; j < lumFilterSize; j++) {
764 Y1 += lumSrc[j][i * 2] * lumFilter[j];
765 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
767 for (j = 0; j < chrFilterSize; j++) {
768 U += chrUSrc[j][i] * chrFilter[j];
769 V += chrVSrc[j][i] * chrFilter[j];
772 // 8bit: 12+15=27; 16-bit: 12+19=31
778 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
779 Y1 -= c->yuv2rgb_y_offset;
780 Y2 -= c->yuv2rgb_y_offset;
781 Y1 *= c->yuv2rgb_y_coeff;
782 Y2 *= c->yuv2rgb_y_coeff;
785 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
787 R = V * c->yuv2rgb_v2r_coeff;
788 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
789 B = U * c->yuv2rgb_u2b_coeff;
791 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
792 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
793 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
794 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
795 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
796 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
797 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
802 static av_always_inline void
803 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
804 const int32_t *ubuf[2], const int32_t *vbuf[2],
805 const int32_t *abuf[2], uint16_t *dest, int dstW,
806 int yalpha, int uvalpha, int y,
807 enum PixelFormat target)
809 const int32_t *buf0 = buf[0], *buf1 = buf[1],
810 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
811 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 int yalpha1 = 4095 - yalpha;
813 int uvalpha1 = 4095 - uvalpha;
816 for (i = 0; i < (dstW >> 1); i++) {
817 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
818 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
819 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
820 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
823 Y1 -= c->yuv2rgb_y_offset;
824 Y2 -= c->yuv2rgb_y_offset;
825 Y1 *= c->yuv2rgb_y_coeff;
826 Y2 *= c->yuv2rgb_y_coeff;
830 R = V * c->yuv2rgb_v2r_coeff;
831 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
832 B = U * c->yuv2rgb_u2b_coeff;
834 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
835 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
836 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
837 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
838 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
839 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
844 static av_always_inline void
845 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
846 const int32_t *ubuf[2], const int32_t *vbuf[2],
847 const int32_t *abuf0, uint16_t *dest, int dstW,
848 int uvalpha, int y, enum PixelFormat target)
850 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
851 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
854 if (uvalpha < 2048) {
855 for (i = 0; i < (dstW >> 1); i++) {
856 int Y1 = (buf0[i * 2] ) >> 2;
857 int Y2 = (buf0[i * 2 + 1]) >> 2;
858 int U = (ubuf0[i] + (-128 << 11)) >> 2;
859 int V = (vbuf0[i] + (-128 << 11)) >> 2;
862 Y1 -= c->yuv2rgb_y_offset;
863 Y2 -= c->yuv2rgb_y_offset;
864 Y1 *= c->yuv2rgb_y_coeff;
865 Y2 *= c->yuv2rgb_y_coeff;
869 R = V * c->yuv2rgb_v2r_coeff;
870 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
871 B = U * c->yuv2rgb_u2b_coeff;
873 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
874 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
875 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
876 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
877 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
878 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 for (i = 0; i < (dstW >> 1); i++) {
883 int Y1 = (buf0[i * 2] ) >> 2;
884 int Y2 = (buf0[i * 2 + 1]) >> 2;
885 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
886 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
889 Y1 -= c->yuv2rgb_y_offset;
890 Y2 -= c->yuv2rgb_y_offset;
891 Y1 *= c->yuv2rgb_y_coeff;
892 Y2 *= c->yuv2rgb_y_coeff;
896 R = V * c->yuv2rgb_v2r_coeff;
897 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
898 B = U * c->yuv2rgb_u2b_coeff;
900 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
901 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
902 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
903 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
904 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
905 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
915 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
916 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
917 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
918 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
920 static av_always_inline void
921 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
922 int U, int V, int A1, int A2,
923 const void *_r, const void *_g, const void *_b, int y,
924 enum PixelFormat target, int hasAlpha)
926 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
927 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
928 uint32_t *dest = (uint32_t *) _dest;
929 const uint32_t *r = (const uint32_t *) _r;
930 const uint32_t *g = (const uint32_t *) _g;
931 const uint32_t *b = (const uint32_t *) _b;
934 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
936 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
937 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
940 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
942 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
943 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
945 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
946 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
949 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
950 uint8_t *dest = (uint8_t *) _dest;
951 const uint8_t *r = (const uint8_t *) _r;
952 const uint8_t *g = (const uint8_t *) _g;
953 const uint8_t *b = (const uint8_t *) _b;
955 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
956 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
957 dest[i * 6 + 0] = r_b[Y1];
958 dest[i * 6 + 1] = g[Y1];
959 dest[i * 6 + 2] = b_r[Y1];
960 dest[i * 6 + 3] = r_b[Y2];
961 dest[i * 6 + 4] = g[Y2];
962 dest[i * 6 + 5] = b_r[Y2];
965 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
966 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
967 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
968 uint16_t *dest = (uint16_t *) _dest;
969 const uint16_t *r = (const uint16_t *) _r;
970 const uint16_t *g = (const uint16_t *) _g;
971 const uint16_t *b = (const uint16_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
974 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
975 dr1 = dither_2x2_8[ y & 1 ][0];
976 dg1 = dither_2x2_4[ y & 1 ][0];
977 db1 = dither_2x2_8[(y & 1) ^ 1][0];
978 dr2 = dither_2x2_8[ y & 1 ][1];
979 dg2 = dither_2x2_4[ y & 1 ][1];
980 db2 = dither_2x2_8[(y & 1) ^ 1][1];
981 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
982 dr1 = dither_2x2_8[ y & 1 ][0];
983 dg1 = dither_2x2_8[ y & 1 ][1];
984 db1 = dither_2x2_8[(y & 1) ^ 1][0];
985 dr2 = dither_2x2_8[ y & 1 ][1];
986 dg2 = dither_2x2_8[ y & 1 ][0];
987 db2 = dither_2x2_8[(y & 1) ^ 1][1];
989 dr1 = dither_4x4_16[ y & 3 ][0];
990 dg1 = dither_4x4_16[ y & 3 ][1];
991 db1 = dither_4x4_16[(y & 3) ^ 3][0];
992 dr2 = dither_4x4_16[ y & 3 ][1];
993 dg2 = dither_4x4_16[ y & 3 ][0];
994 db2 = dither_4x4_16[(y & 3) ^ 3][1];
997 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
998 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
999 } else /* 8/4-bit */ {
1000 uint8_t *dest = (uint8_t *) _dest;
1001 const uint8_t *r = (const uint8_t *) _r;
1002 const uint8_t *g = (const uint8_t *) _g;
1003 const uint8_t *b = (const uint8_t *) _b;
1004 int dr1, dg1, db1, dr2, dg2, db2;
1006 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1007 const uint8_t * const d64 = dither_8x8_73[y & 7];
1008 const uint8_t * const d32 = dither_8x8_32[y & 7];
1009 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1010 db1 = d64[(i * 2 + 0) & 7];
1011 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1012 db2 = d64[(i * 2 + 1) & 7];
1014 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1015 const uint8_t * const d128 = dither_8x8_220[y & 7];
1016 dr1 = db1 = d128[(i * 2 + 0) & 7];
1017 dg1 = d64[(i * 2 + 0) & 7];
1018 dr2 = db2 = d128[(i * 2 + 1) & 7];
1019 dg2 = d64[(i * 2 + 1) & 7];
1022 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1023 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1024 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1026 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1027 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1032 static av_always_inline void
1033 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1034 const int16_t **lumSrc, int lumFilterSize,
1035 const int16_t *chrFilter, const int16_t **chrUSrc,
1036 const int16_t **chrVSrc, int chrFilterSize,
1037 const int16_t **alpSrc, uint8_t *dest, int dstW,
1038 int y, enum PixelFormat target, int hasAlpha)
1042 for (i = 0; i < (dstW >> 1); i++) {
1048 int av_unused A1, A2;
1049 const void *r, *g, *b;
1051 for (j = 0; j < lumFilterSize; j++) {
1052 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1053 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1055 for (j = 0; j < chrFilterSize; j++) {
1056 U += chrUSrc[j][i] * chrFilter[j];
1057 V += chrVSrc[j][i] * chrFilter[j];
1063 if ((Y1 | Y2 | U | V) & 0x100) {
1064 Y1 = av_clip_uint8(Y1);
1065 Y2 = av_clip_uint8(Y2);
1066 U = av_clip_uint8(U);
1067 V = av_clip_uint8(V);
1072 for (j = 0; j < lumFilterSize; j++) {
1073 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1074 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1078 if ((A1 | A2) & 0x100) {
1079 A1 = av_clip_uint8(A1);
1080 A2 = av_clip_uint8(A2);
1084 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1086 g = (c->table_gU[U] + c->table_gV[V]);
1089 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1090 r, g, b, y, target, hasAlpha);
1094 static av_always_inline void
1095 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1096 const int16_t *ubuf[2], const int16_t *vbuf[2],
1097 const int16_t *abuf[2], uint8_t *dest, int dstW,
1098 int yalpha, int uvalpha, int y,
1099 enum PixelFormat target, int hasAlpha)
1101 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1102 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1103 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1104 *abuf0 = hasAlpha ? abuf[0] : NULL,
1105 *abuf1 = hasAlpha ? abuf[1] : NULL;
1106 int yalpha1 = 4095 - yalpha;
1107 int uvalpha1 = 4095 - uvalpha;
1110 for (i = 0; i < (dstW >> 1); i++) {
1111 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1112 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1113 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1114 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1116 const void *r = c->table_rV[V],
1117 *g = (c->table_gU[U] + c->table_gV[V]),
1118 *b = c->table_bU[U];
1121 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1122 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1125 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1126 r, g, b, y, target, hasAlpha);
1130 static av_always_inline void
1131 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1132 const int16_t *ubuf[2], const int16_t *vbuf[2],
1133 const int16_t *abuf0, uint8_t *dest, int dstW,
1134 int uvalpha, int y, enum PixelFormat target,
1137 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1141 if (uvalpha < 2048) {
1142 for (i = 0; i < (dstW >> 1); i++) {
1143 int Y1 = buf0[i * 2] >> 7;
1144 int Y2 = buf0[i * 2 + 1] >> 7;
1145 int U = ubuf1[i] >> 7;
1146 int V = vbuf1[i] >> 7;
1148 const void *r = c->table_rV[V],
1149 *g = (c->table_gU[U] + c->table_gV[V]),
1150 *b = c->table_bU[U];
1153 A1 = abuf0[i * 2 ] >> 7;
1154 A2 = abuf0[i * 2 + 1] >> 7;
1157 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1158 r, g, b, y, target, hasAlpha);
1161 for (i = 0; i < (dstW >> 1); i++) {
1162 int Y1 = buf0[i * 2] >> 7;
1163 int Y2 = buf0[i * 2 + 1] >> 7;
1164 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1165 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1167 const void *r = c->table_rV[V],
1168 *g = (c->table_gU[U] + c->table_gV[V]),
1169 *b = c->table_bU[U];
1172 A1 = abuf0[i * 2 ] >> 7;
1173 A2 = abuf0[i * 2 + 1] >> 7;
1176 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1177 r, g, b, y, target, hasAlpha);
1182 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1183 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1184 const int16_t **lumSrc, int lumFilterSize, \
1185 const int16_t *chrFilter, const int16_t **chrUSrc, \
1186 const int16_t **chrVSrc, int chrFilterSize, \
1187 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1190 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1191 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1192 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1194 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1195 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1196 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1197 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1198 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1199 int yalpha, int uvalpha, int y) \
1201 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1202 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1205 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1206 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1207 const int16_t *abuf0, uint8_t *dest, int dstW, \
1208 int uvalpha, int y) \
1210 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1211 dstW, uvalpha, y, fmt, hasAlpha); \
1215 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1216 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1218 #if CONFIG_SWSCALE_ALPHA
1219 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1220 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1222 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1223 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1225 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1226 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1227 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1228 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1229 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1230 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1231 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1232 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1234 static av_always_inline void
1235 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1236 const int16_t **lumSrc, int lumFilterSize,
1237 const int16_t *chrFilter, const int16_t **chrUSrc,
1238 const int16_t **chrVSrc, int chrFilterSize,
1239 const int16_t **alpSrc, uint8_t *dest,
1240 int dstW, int y, enum PixelFormat target, int hasAlpha)
1243 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1245 for (i = 0; i < dstW; i++) {
1253 for (j = 0; j < lumFilterSize; j++) {
1254 Y += lumSrc[j][i] * lumFilter[j];
1256 for (j = 0; j < chrFilterSize; j++) {
1257 U += chrUSrc[j][i] * chrFilter[j];
1258 V += chrVSrc[j][i] * chrFilter[j];
1265 for (j = 0; j < lumFilterSize; j++) {
1266 A += alpSrc[j][i] * lumFilter[j];
1270 A = av_clip_uint8(A);
1272 Y -= c->yuv2rgb_y_offset;
1273 Y *= c->yuv2rgb_y_coeff;
1275 R = Y + V*c->yuv2rgb_v2r_coeff;
1276 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1277 B = Y + U*c->yuv2rgb_u2b_coeff;
1278 if ((R | G | B) & 0xC0000000) {
1279 R = av_clip_uintp2(R, 30);
1280 G = av_clip_uintp2(G, 30);
1281 B = av_clip_uintp2(B, 30);
1286 dest[0] = hasAlpha ? A : 255;
1300 dest[3] = hasAlpha ? A : 255;
1303 dest[0] = hasAlpha ? A : 255;
1318 dest[3] = hasAlpha ? A : 255;
1326 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1327 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1328 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1329 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1331 #if CONFIG_SWSCALE_ALPHA
1332 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1333 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1334 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1335 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1337 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1338 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1339 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1340 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1342 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1343 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1345 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1346 int width, int height,
1350 uint8_t *ptr = plane + stride*y;
1351 for (i=0; i<height; i++) {
1352 memset(ptr, val, width);
1357 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1359 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1360 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1362 static av_always_inline void
1363 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1364 enum PixelFormat origin)
1367 for (i = 0; i < width; i++) {
1368 unsigned int r_b = input_pixel(&src[i*3+0]);
1369 unsigned int g = input_pixel(&src[i*3+1]);
1370 unsigned int b_r = input_pixel(&src[i*3+2]);
1372 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1376 static av_always_inline void
1377 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1378 const uint16_t *src1, const uint16_t *src2,
1379 int width, enum PixelFormat origin)
1383 for (i = 0; i < width; i++) {
1384 int r_b = input_pixel(&src1[i*3+0]);
1385 int g = input_pixel(&src1[i*3+1]);
1386 int b_r = input_pixel(&src1[i*3+2]);
1388 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1393 static av_always_inline void
1394 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1395 const uint16_t *src1, const uint16_t *src2,
1396 int width, enum PixelFormat origin)
1400 for (i = 0; i < width; i++) {
1401 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1402 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1403 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1405 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1406 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1414 #define rgb48funcs(pattern, BE_LE, origin) \
1415 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1416 int width, uint32_t *unused) \
1418 const uint16_t *src = (const uint16_t *) _src; \
1419 uint16_t *dst = (uint16_t *) _dst; \
1420 rgb48ToY_c_template(dst, src, width, origin); \
1423 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1424 const uint8_t *_src1, const uint8_t *_src2, \
1425 int width, uint32_t *unused) \
1427 const uint16_t *src1 = (const uint16_t *) _src1, \
1428 *src2 = (const uint16_t *) _src2; \
1429 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1430 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1433 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1434 const uint8_t *_src1, const uint8_t *_src2, \
1435 int width, uint32_t *unused) \
1437 const uint16_t *src1 = (const uint16_t *) _src1, \
1438 *src2 = (const uint16_t *) _src2; \
1439 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1440 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1443 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1444 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1445 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1446 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1448 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1449 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1450 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1452 static av_always_inline void
1453 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1454 int width, enum PixelFormat origin,
1455 int shr, int shg, int shb, int shp,
1456 int maskr, int maskg, int maskb,
1457 int rsh, int gsh, int bsh, int S)
1459 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1460 rnd = 33 << (S - 1);
1463 for (i = 0; i < width; i++) {
1464 int px = input_pixel(i) >> shp;
1465 int b = (px & maskb) >> shb;
1466 int g = (px & maskg) >> shg;
1467 int r = (px & maskr) >> shr;
1469 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1473 static av_always_inline void
1474 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1475 const uint8_t *src, int width,
1476 enum PixelFormat origin,
1477 int shr, int shg, int shb, int shp,
1478 int maskr, int maskg, int maskb,
1479 int rsh, int gsh, int bsh, int S)
1481 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1482 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1483 rnd = 257 << (S - 1);
1486 for (i = 0; i < width; i++) {
1487 int px = input_pixel(i) >> shp;
1488 int b = (px & maskb) >> shb;
1489 int g = (px & maskg) >> shg;
1490 int r = (px & maskr) >> shr;
1492 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1493 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1497 static av_always_inline void
1498 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1499 const uint8_t *src, int width,
1500 enum PixelFormat origin,
1501 int shr, int shg, int shb, int shp,
1502 int maskr, int maskg, int maskb,
1503 int rsh, int gsh, int bsh, int S)
1505 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1506 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1507 rnd = 257 << S, maskgx = ~(maskr | maskb);
1510 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1511 for (i = 0; i < width; i++) {
1512 int px0 = input_pixel(2 * i + 0) >> shp;
1513 int px1 = input_pixel(2 * i + 1) >> shp;
1514 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1515 int rb = px0 + px1 - g;
1517 b = (rb & maskb) >> shb;
1518 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1519 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1522 g = (g & maskg) >> shg;
1524 r = (rb & maskr) >> shr;
1526 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1527 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1533 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1534 maskg, maskb, rsh, gsh, bsh, S) \
1535 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1536 int width, uint32_t *unused) \
1538 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1539 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1542 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1543 const uint8_t *src, const uint8_t *dummy, \
1544 int width, uint32_t *unused) \
1546 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1547 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1550 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1551 const uint8_t *src, const uint8_t *dummy, \
1552 int width, uint32_t *unused) \
1554 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1555 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1558 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1559 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1560 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1561 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1562 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1563 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1564 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1565 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1566 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1567 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1568 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1569 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1571 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1574 for (i=0; i<width; i++) {
1579 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1582 for (i=0; i<width; i++) {
1587 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1590 for (i=0; i<width; i++) {
1593 dst[i]= pal[d] & 0xFF;
1597 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1598 const uint8_t *src1, const uint8_t *src2,
1599 int width, uint32_t *pal)
1602 assert(src1 == src2);
1603 for (i=0; i<width; i++) {
1604 int p= pal[src1[i]];
1611 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1612 int width, uint32_t *unused)
1615 for (i=0; i<width/8; i++) {
1618 dst[8*i+j]= ((d>>(7-j))&1)*255;
1622 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1623 int width, uint32_t *unused)
1626 for (i=0; i<width/8; i++) {
1629 dst[8*i+j]= ((d>>(7-j))&1)*255;
1633 //FIXME yuy2* can read up to 7 samples too much
1635 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1639 for (i=0; i<width; i++)
1643 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1644 const uint8_t *src2, int width, uint32_t *unused)
1647 for (i=0; i<width; i++) {
1648 dstU[i]= src1[4*i + 1];
1649 dstV[i]= src1[4*i + 3];
1651 assert(src1 == src2);
1654 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1657 const uint16_t *src = (const uint16_t *) _src;
1658 uint16_t *dst = (uint16_t *) _dst;
1659 for (i=0; i<width; i++) {
1660 dst[i] = av_bswap16(src[i]);
1664 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1665 const uint8_t *_src2, int width, uint32_t *unused)
1668 const uint16_t *src1 = (const uint16_t *) _src1,
1669 *src2 = (const uint16_t *) _src2;
1670 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1671 for (i=0; i<width; i++) {
1672 dstU[i] = av_bswap16(src1[i]);
1673 dstV[i] = av_bswap16(src2[i]);
1677 /* This is almost identical to the previous, end exists only because
1678 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1679 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1683 for (i=0; i<width; i++)
1687 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1688 const uint8_t *src2, int width, uint32_t *unused)
1691 for (i=0; i<width; i++) {
1692 dstU[i]= src1[4*i + 0];
1693 dstV[i]= src1[4*i + 2];
1695 assert(src1 == src2);
1698 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1699 const uint8_t *src, int width)
1702 for (i = 0; i < width; i++) {
1703 dst1[i] = src[2*i+0];
1704 dst2[i] = src[2*i+1];
1708 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1709 const uint8_t *src1, const uint8_t *src2,
1710 int width, uint32_t *unused)
1712 nvXXtoUV_c(dstU, dstV, src1, width);
1715 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1716 const uint8_t *src1, const uint8_t *src2,
1717 int width, uint32_t *unused)
1719 nvXXtoUV_c(dstV, dstU, src1, width);
1722 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1724 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1725 int width, uint32_t *unused)
1728 for (i=0; i<width; i++) {
1733 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1737 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1738 const uint8_t *src2, int width, uint32_t *unused)
1741 for (i=0; i<width; i++) {
1742 int b= src1[3*i + 0];
1743 int g= src1[3*i + 1];
1744 int r= src1[3*i + 2];
1746 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1747 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1749 assert(src1 == src2);
1752 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1753 const uint8_t *src2, int width, uint32_t *unused)
1756 for (i=0; i<width; i++) {
1757 int b= src1[6*i + 0] + src1[6*i + 3];
1758 int g= src1[6*i + 1] + src1[6*i + 4];
1759 int r= src1[6*i + 2] + src1[6*i + 5];
1761 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1762 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1764 assert(src1 == src2);
1767 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1771 for (i=0; i<width; i++) {
1776 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1780 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1781 const uint8_t *src2, int width, uint32_t *unused)
1785 for (i=0; i<width; i++) {
1786 int r= src1[3*i + 0];
1787 int g= src1[3*i + 1];
1788 int b= src1[3*i + 2];
1790 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1791 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1795 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1796 const uint8_t *src2, int width, uint32_t *unused)
1800 for (i=0; i<width; i++) {
1801 int r= src1[6*i + 0] + src1[6*i + 3];
1802 int g= src1[6*i + 1] + src1[6*i + 4];
1803 int b= src1[6*i + 2] + src1[6*i + 5];
1805 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1806 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1810 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1813 for (i = 0; i < width; i++) {
1818 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1822 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1825 const uint16_t **src = (const uint16_t **) _src;
1826 uint16_t *dst = (uint16_t *) _dst;
1827 for (i = 0; i < width; i++) {
1828 int g = AV_RL16(src[0] + i);
1829 int b = AV_RL16(src[1] + i);
1830 int r = AV_RL16(src[2] + i);
1832 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1836 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1839 const uint16_t **src = (const uint16_t **) _src;
1840 uint16_t *dst = (uint16_t *) _dst;
1841 for (i = 0; i < width; i++) {
1842 int g = AV_RB16(src[0] + i);
1843 int b = AV_RB16(src[1] + i);
1844 int r = AV_RB16(src[2] + i);
1846 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1850 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1853 for (i = 0; i < width; i++) {
1858 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1859 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1863 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1866 const uint16_t **src = (const uint16_t **) _src;
1867 uint16_t *dstU = (uint16_t *) _dstU;
1868 uint16_t *dstV = (uint16_t *) _dstV;
1869 for (i = 0; i < width; i++) {
1870 int g = AV_RL16(src[0] + i);
1871 int b = AV_RL16(src[1] + i);
1872 int r = AV_RL16(src[2] + i);
1874 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1875 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1879 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1882 const uint16_t **src = (const uint16_t **) _src;
1883 uint16_t *dstU = (uint16_t *) _dstU;
1884 uint16_t *dstV = (uint16_t *) _dstV;
1885 for (i = 0; i < width; i++) {
1886 int g = AV_RB16(src[0] + i);
1887 int b = AV_RB16(src[1] + i);
1888 int r = AV_RB16(src[2] + i);
1890 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1891 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1895 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1896 const int16_t *filter,
1897 const int16_t *filterPos, int filterSize)
1900 int32_t *dst = (int32_t *) _dst;
1901 const uint16_t *src = (const uint16_t *) _src;
1902 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1905 for (i = 0; i < dstW; i++) {
1907 int srcPos = filterPos[i];
1910 for (j = 0; j < filterSize; j++) {
1911 val += src[srcPos + j] * filter[filterSize * i + j];
1913 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1914 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1918 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1919 const int16_t *filter,
1920 const int16_t *filterPos, int filterSize)
1923 const uint16_t *src = (const uint16_t *) _src;
1924 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1926 for (i = 0; i < dstW; i++) {
1928 int srcPos = filterPos[i];
1931 for (j = 0; j < filterSize; j++) {
1932 val += src[srcPos + j] * filter[filterSize * i + j];
1934 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1935 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1939 // bilinear / bicubic scaling
1940 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1941 const int16_t *filter, const int16_t *filterPos,
1945 for (i=0; i<dstW; i++) {
1947 int srcPos= filterPos[i];
1949 for (j=0; j<filterSize; j++) {
1950 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1952 //filter += hFilterSize;
1953 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1958 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1959 const int16_t *filter, const int16_t *filterPos,
1963 int32_t *dst = (int32_t *) _dst;
1964 for (i=0; i<dstW; i++) {
1966 int srcPos= filterPos[i];
1968 for (j=0; j<filterSize; j++) {
1969 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1971 //filter += hFilterSize;
1972 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1977 //FIXME all pal and rgb srcFormats could do this convertion as well
1978 //FIXME all scalers more complex than bilinear could do half of this transform
1979 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1982 for (i = 0; i < width; i++) {
1983 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1984 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1987 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1990 for (i = 0; i < width; i++) {
1991 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1992 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1995 static void lumRangeToJpeg_c(int16_t *dst, int width)
1998 for (i = 0; i < width; i++)
1999 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2001 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2004 for (i = 0; i < width; i++)
2005 dst[i] = (dst[i]*14071 + 33561947)>>14;
2008 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2011 int32_t *dstU = (int32_t *) _dstU;
2012 int32_t *dstV = (int32_t *) _dstV;
2013 for (i = 0; i < width; i++) {
2014 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2015 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2018 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2021 int32_t *dstU = (int32_t *) _dstU;
2022 int32_t *dstV = (int32_t *) _dstV;
2023 for (i = 0; i < width; i++) {
2024 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2025 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2028 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2031 int32_t *dst = (int32_t *) _dst;
2032 for (i = 0; i < width; i++)
2033 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2035 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2038 int32_t *dst = (int32_t *) _dst;
2039 for (i = 0; i < width; i++)
2040 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2043 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2044 const uint8_t *src, int srcW, int xInc)
2047 unsigned int xpos=0;
2048 for (i=0;i<dstWidth;i++) {
2049 register unsigned int xx=xpos>>16;
2050 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2051 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2056 // *** horizontal scale Y line to temp buffer
2057 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2058 const uint8_t *src_in[4], int srcW, int xInc,
2059 const int16_t *hLumFilter,
2060 const int16_t *hLumFilterPos, int hLumFilterSize,
2061 uint8_t *formatConvBuffer,
2062 uint32_t *pal, int isAlpha)
2064 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2065 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2066 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2069 toYV12(formatConvBuffer, src, srcW, pal);
2070 src= formatConvBuffer;
2071 } else if (c->readLumPlanar && !isAlpha) {
2072 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2073 src = formatConvBuffer;
2076 if (!c->hyscale_fast) {
2077 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2078 } else { // fast bilinear upscale / crap downscale
2079 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2083 convertRange(dst, dstWidth);
2086 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2087 int dstWidth, const uint8_t *src1,
2088 const uint8_t *src2, int srcW, int xInc)
2091 unsigned int xpos=0;
2092 for (i=0;i<dstWidth;i++) {
2093 register unsigned int xx=xpos>>16;
2094 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2095 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2096 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2101 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2102 const uint8_t *src_in[4],
2103 int srcW, int xInc, const int16_t *hChrFilter,
2104 const int16_t *hChrFilterPos, int hChrFilterSize,
2105 uint8_t *formatConvBuffer, uint32_t *pal)
2107 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2109 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2110 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2111 src1= formatConvBuffer;
2113 } else if (c->readChrPlanar) {
2114 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2115 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2116 src1= formatConvBuffer;
2120 if (!c->hcscale_fast) {
2121 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2122 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2123 } else { // fast bilinear upscale / crap downscale
2124 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2127 if (c->chrConvertRange)
2128 c->chrConvertRange(dst1, dst2, dstWidth);
2131 static av_always_inline void
2132 find_c_packed_planar_out_funcs(SwsContext *c,
2133 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2134 yuv2interleavedX_fn *yuv2nv12cX,
2135 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2136 yuv2packedX_fn *yuv2packedX)
2138 enum PixelFormat dstFormat = c->dstFormat;
2140 if (is16BPS(dstFormat)) {
2141 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2142 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2143 } else if (is9_OR_10BPS(dstFormat)) {
2144 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2145 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2146 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2148 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2149 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2152 *yuv2plane1 = yuv2plane1_8_c;
2153 *yuv2planeX = yuv2planeX_8_c;
2154 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2155 *yuv2nv12cX = yuv2nv12cX_c;
2158 if(c->flags & SWS_FULL_CHR_H_INT) {
2159 switch (dstFormat) {
2162 *yuv2packedX = yuv2rgba32_full_X_c;
2164 #if CONFIG_SWSCALE_ALPHA
2166 *yuv2packedX = yuv2rgba32_full_X_c;
2168 #endif /* CONFIG_SWSCALE_ALPHA */
2170 *yuv2packedX = yuv2rgbx32_full_X_c;
2172 #endif /* !CONFIG_SMALL */
2176 *yuv2packedX = yuv2argb32_full_X_c;
2178 #if CONFIG_SWSCALE_ALPHA
2180 *yuv2packedX = yuv2argb32_full_X_c;
2182 #endif /* CONFIG_SWSCALE_ALPHA */
2184 *yuv2packedX = yuv2xrgb32_full_X_c;
2186 #endif /* !CONFIG_SMALL */
2190 *yuv2packedX = yuv2bgra32_full_X_c;
2192 #if CONFIG_SWSCALE_ALPHA
2194 *yuv2packedX = yuv2bgra32_full_X_c;
2196 #endif /* CONFIG_SWSCALE_ALPHA */
2198 *yuv2packedX = yuv2bgrx32_full_X_c;
2200 #endif /* !CONFIG_SMALL */
2204 *yuv2packedX = yuv2abgr32_full_X_c;
2206 #if CONFIG_SWSCALE_ALPHA
2208 *yuv2packedX = yuv2abgr32_full_X_c;
2210 #endif /* CONFIG_SWSCALE_ALPHA */
2212 *yuv2packedX = yuv2xbgr32_full_X_c;
2214 #endif /* !CONFIG_SMALL */
2217 *yuv2packedX = yuv2rgb24_full_X_c;
2220 *yuv2packedX = yuv2bgr24_full_X_c;
2224 switch (dstFormat) {
2225 case PIX_FMT_GRAY16BE:
2226 *yuv2packed1 = yuv2gray16BE_1_c;
2227 *yuv2packed2 = yuv2gray16BE_2_c;
2228 *yuv2packedX = yuv2gray16BE_X_c;
2230 case PIX_FMT_GRAY16LE:
2231 *yuv2packed1 = yuv2gray16LE_1_c;
2232 *yuv2packed2 = yuv2gray16LE_2_c;
2233 *yuv2packedX = yuv2gray16LE_X_c;
2235 case PIX_FMT_MONOWHITE:
2236 *yuv2packed1 = yuv2monowhite_1_c;
2237 *yuv2packed2 = yuv2monowhite_2_c;
2238 *yuv2packedX = yuv2monowhite_X_c;
2240 case PIX_FMT_MONOBLACK:
2241 *yuv2packed1 = yuv2monoblack_1_c;
2242 *yuv2packed2 = yuv2monoblack_2_c;
2243 *yuv2packedX = yuv2monoblack_X_c;
2245 case PIX_FMT_YUYV422:
2246 *yuv2packed1 = yuv2yuyv422_1_c;
2247 *yuv2packed2 = yuv2yuyv422_2_c;
2248 *yuv2packedX = yuv2yuyv422_X_c;
2250 case PIX_FMT_UYVY422:
2251 *yuv2packed1 = yuv2uyvy422_1_c;
2252 *yuv2packed2 = yuv2uyvy422_2_c;
2253 *yuv2packedX = yuv2uyvy422_X_c;
2255 case PIX_FMT_RGB48LE:
2256 *yuv2packed1 = yuv2rgb48le_1_c;
2257 *yuv2packed2 = yuv2rgb48le_2_c;
2258 *yuv2packedX = yuv2rgb48le_X_c;
2260 case PIX_FMT_RGB48BE:
2261 *yuv2packed1 = yuv2rgb48be_1_c;
2262 *yuv2packed2 = yuv2rgb48be_2_c;
2263 *yuv2packedX = yuv2rgb48be_X_c;
2265 case PIX_FMT_BGR48LE:
2266 *yuv2packed1 = yuv2bgr48le_1_c;
2267 *yuv2packed2 = yuv2bgr48le_2_c;
2268 *yuv2packedX = yuv2bgr48le_X_c;
2270 case PIX_FMT_BGR48BE:
2271 *yuv2packed1 = yuv2bgr48be_1_c;
2272 *yuv2packed2 = yuv2bgr48be_2_c;
2273 *yuv2packedX = yuv2bgr48be_X_c;
2278 *yuv2packed1 = yuv2rgb32_1_c;
2279 *yuv2packed2 = yuv2rgb32_2_c;
2280 *yuv2packedX = yuv2rgb32_X_c;
2282 #if CONFIG_SWSCALE_ALPHA
2284 *yuv2packed1 = yuv2rgba32_1_c;
2285 *yuv2packed2 = yuv2rgba32_2_c;
2286 *yuv2packedX = yuv2rgba32_X_c;
2288 #endif /* CONFIG_SWSCALE_ALPHA */
2290 *yuv2packed1 = yuv2rgbx32_1_c;
2291 *yuv2packed2 = yuv2rgbx32_2_c;
2292 *yuv2packedX = yuv2rgbx32_X_c;
2294 #endif /* !CONFIG_SMALL */
2296 case PIX_FMT_RGB32_1:
2297 case PIX_FMT_BGR32_1:
2299 *yuv2packed1 = yuv2rgb32_1_1_c;
2300 *yuv2packed2 = yuv2rgb32_1_2_c;
2301 *yuv2packedX = yuv2rgb32_1_X_c;
2303 #if CONFIG_SWSCALE_ALPHA
2305 *yuv2packed1 = yuv2rgba32_1_1_c;
2306 *yuv2packed2 = yuv2rgba32_1_2_c;
2307 *yuv2packedX = yuv2rgba32_1_X_c;
2309 #endif /* CONFIG_SWSCALE_ALPHA */
2311 *yuv2packed1 = yuv2rgbx32_1_1_c;
2312 *yuv2packed2 = yuv2rgbx32_1_2_c;
2313 *yuv2packedX = yuv2rgbx32_1_X_c;
2315 #endif /* !CONFIG_SMALL */
2318 *yuv2packed1 = yuv2rgb24_1_c;
2319 *yuv2packed2 = yuv2rgb24_2_c;
2320 *yuv2packedX = yuv2rgb24_X_c;
2323 *yuv2packed1 = yuv2bgr24_1_c;
2324 *yuv2packed2 = yuv2bgr24_2_c;
2325 *yuv2packedX = yuv2bgr24_X_c;
2327 case PIX_FMT_RGB565LE:
2328 case PIX_FMT_RGB565BE:
2329 case PIX_FMT_BGR565LE:
2330 case PIX_FMT_BGR565BE:
2331 *yuv2packed1 = yuv2rgb16_1_c;
2332 *yuv2packed2 = yuv2rgb16_2_c;
2333 *yuv2packedX = yuv2rgb16_X_c;
2335 case PIX_FMT_RGB555LE:
2336 case PIX_FMT_RGB555BE:
2337 case PIX_FMT_BGR555LE:
2338 case PIX_FMT_BGR555BE:
2339 *yuv2packed1 = yuv2rgb15_1_c;
2340 *yuv2packed2 = yuv2rgb15_2_c;
2341 *yuv2packedX = yuv2rgb15_X_c;
2343 case PIX_FMT_RGB444LE:
2344 case PIX_FMT_RGB444BE:
2345 case PIX_FMT_BGR444LE:
2346 case PIX_FMT_BGR444BE:
2347 *yuv2packed1 = yuv2rgb12_1_c;
2348 *yuv2packed2 = yuv2rgb12_2_c;
2349 *yuv2packedX = yuv2rgb12_X_c;
2353 *yuv2packed1 = yuv2rgb8_1_c;
2354 *yuv2packed2 = yuv2rgb8_2_c;
2355 *yuv2packedX = yuv2rgb8_X_c;
2359 *yuv2packed1 = yuv2rgb4_1_c;
2360 *yuv2packed2 = yuv2rgb4_2_c;
2361 *yuv2packedX = yuv2rgb4_X_c;
2363 case PIX_FMT_RGB4_BYTE:
2364 case PIX_FMT_BGR4_BYTE:
2365 *yuv2packed1 = yuv2rgb4b_1_c;
2366 *yuv2packed2 = yuv2rgb4b_2_c;
2367 *yuv2packedX = yuv2rgb4b_X_c;
2373 #define DEBUG_SWSCALE_BUFFERS 0
2374 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2376 static int swScale(SwsContext *c, const uint8_t* src[],
2377 int srcStride[], int srcSliceY,
2378 int srcSliceH, uint8_t* dst[], int dstStride[])
2380 /* load a few things into local vars to make the code more readable? and faster */
2381 const int srcW= c->srcW;
2382 const int dstW= c->dstW;
2383 const int dstH= c->dstH;
2384 const int chrDstW= c->chrDstW;
2385 const int chrSrcW= c->chrSrcW;
2386 const int lumXInc= c->lumXInc;
2387 const int chrXInc= c->chrXInc;
2388 const enum PixelFormat dstFormat= c->dstFormat;
2389 const int flags= c->flags;
2390 int16_t *vLumFilterPos= c->vLumFilterPos;
2391 int16_t *vChrFilterPos= c->vChrFilterPos;
2392 int16_t *hLumFilterPos= c->hLumFilterPos;
2393 int16_t *hChrFilterPos= c->hChrFilterPos;
2394 int16_t *vLumFilter= c->vLumFilter;
2395 int16_t *vChrFilter= c->vChrFilter;
2396 int16_t *hLumFilter= c->hLumFilter;
2397 int16_t *hChrFilter= c->hChrFilter;
2398 int32_t *lumMmxFilter= c->lumMmxFilter;
2399 int32_t *chrMmxFilter= c->chrMmxFilter;
2400 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2401 const int vLumFilterSize= c->vLumFilterSize;
2402 const int vChrFilterSize= c->vChrFilterSize;
2403 const int hLumFilterSize= c->hLumFilterSize;
2404 const int hChrFilterSize= c->hChrFilterSize;
2405 int16_t **lumPixBuf= c->lumPixBuf;
2406 int16_t **chrUPixBuf= c->chrUPixBuf;
2407 int16_t **chrVPixBuf= c->chrVPixBuf;
2408 int16_t **alpPixBuf= c->alpPixBuf;
2409 const int vLumBufSize= c->vLumBufSize;
2410 const int vChrBufSize= c->vChrBufSize;
2411 uint8_t *formatConvBuffer= c->formatConvBuffer;
2412 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2413 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2415 uint32_t *pal=c->pal_yuv;
2416 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2417 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2418 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2419 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2420 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2421 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2422 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2424 /* vars which will change and which we need to store back in the context */
2426 int lumBufIndex= c->lumBufIndex;
2427 int chrBufIndex= c->chrBufIndex;
2428 int lastInLumBuf= c->lastInLumBuf;
2429 int lastInChrBuf= c->lastInChrBuf;
2431 if (isPacked(c->srcFormat)) {
2439 srcStride[3]= srcStride[0];
2441 srcStride[1]<<= c->vChrDrop;
2442 srcStride[2]<<= c->vChrDrop;
2444 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2445 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2446 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2447 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2448 srcSliceY, srcSliceH, dstY, dstH);
2449 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2450 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2452 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2453 static int warnedAlready=0; //FIXME move this into the context perhaps
2454 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2455 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2456 " ->cannot do aligned memory accesses anymore\n");
2461 /* Note the user might start scaling the picture in the middle so this
2462 will not get executed. This is not really intended but works
2463 currently, so people might do it. */
2464 if (srcSliceY ==0) {
2472 if (!should_dither) {
2473 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2477 for (;dstY < dstH; dstY++) {
2478 const int chrDstY= dstY>>c->chrDstVSubSample;
2479 uint8_t *dest[4] = {
2480 dst[0] + dstStride[0] * dstY,
2481 dst[1] + dstStride[1] * chrDstY,
2482 dst[2] + dstStride[2] * chrDstY,
2483 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2486 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2487 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2488 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2489 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2490 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2491 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2494 //handle holes (FAST_BILINEAR & weird filters)
2495 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2496 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2497 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2498 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2500 DEBUG_BUFFERS("dstY: %d\n", dstY);
2501 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2502 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2503 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2504 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2506 // Do we have enough lines in this slice to output the dstY line
2507 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2509 if (!enough_lines) {
2510 lastLumSrcY = srcSliceY + srcSliceH - 1;
2511 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2512 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2513 lastLumSrcY, lastChrSrcY);
2516 //Do horizontal scaling
2517 while(lastInLumBuf < lastLumSrcY) {
2518 const uint8_t *src1[4] = {
2519 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2520 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2521 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2522 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2525 assert(lumBufIndex < 2*vLumBufSize);
2526 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2527 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2528 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2529 hLumFilter, hLumFilterPos, hLumFilterSize,
2532 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2533 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2534 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2538 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2539 lumBufIndex, lastInLumBuf);
2541 while(lastInChrBuf < lastChrSrcY) {
2542 const uint8_t *src1[4] = {
2543 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2544 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2545 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2546 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2549 assert(chrBufIndex < 2*vChrBufSize);
2550 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2551 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2552 //FIXME replace parameters through context struct (some at least)
2554 if (c->needs_hcscale)
2555 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2556 chrDstW, src1, chrSrcW, chrXInc,
2557 hChrFilter, hChrFilterPos, hChrFilterSize,
2558 formatConvBuffer, pal);
2560 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2561 chrBufIndex, lastInChrBuf);
2563 //wrap buf index around to stay inside the ring buffer
2564 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2565 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2567 break; //we can't output a dstY line so let's try with the next slice
2570 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2572 if (should_dither) {
2573 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2574 c->lumDither8 = dither_8x8_128[dstY & 7];
2576 if (dstY >= dstH-2) {
2577 // hmm looks like we can't use MMX here without overwriting this array's tail
2578 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2579 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2583 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2584 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2585 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2586 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2587 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2588 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2590 if (vLumFilterSize == 1) {
2591 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2593 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2594 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2597 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2599 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2600 } else if (vChrFilterSize == 1) {
2601 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2602 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2604 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2605 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2606 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2607 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2611 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2612 if (vLumFilterSize == 1) {
2613 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2615 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2616 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2620 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2621 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2622 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2623 int chrAlpha = vChrFilter[2 * dstY + 1];
2624 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2625 alpPixBuf ? *alpSrcPtr : NULL,
2626 dest[0], dstW, chrAlpha, dstY);
2627 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2628 int lumAlpha = vLumFilter[2 * dstY + 1];
2629 int chrAlpha = vChrFilter[2 * dstY + 1];
2631 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2633 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2634 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2635 alpPixBuf ? alpSrcPtr : NULL,
2636 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2637 } else { //general RGB
2638 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2639 lumSrcPtr, vLumFilterSize,
2640 vChrFilter + dstY * vChrFilterSize,
2641 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2642 alpSrcPtr, dest[0], dstW, dstY);
2648 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2649 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2652 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2653 __asm__ volatile("sfence":::"memory");
2657 /* store changed local vars back in the context */
2659 c->lumBufIndex= lumBufIndex;
2660 c->chrBufIndex= chrBufIndex;
2661 c->lastInLumBuf= lastInLumBuf;
2662 c->lastInChrBuf= lastInChrBuf;
2664 return dstY - lastDstY;
2667 static av_cold void sws_init_swScale_c(SwsContext *c)
2669 enum PixelFormat srcFormat = c->srcFormat;
2671 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2672 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2675 c->chrToYV12 = NULL;
2677 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2678 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2679 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2680 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2684 case PIX_FMT_BGR4_BYTE:
2685 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2686 case PIX_FMT_GBRP9LE:
2687 case PIX_FMT_GBRP10LE:
2688 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2689 case PIX_FMT_GBRP9BE:
2690 case PIX_FMT_GBRP10BE:
2691 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2692 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2694 case PIX_FMT_YUV444P9LE:
2695 case PIX_FMT_YUV422P9LE:
2696 case PIX_FMT_YUV420P9LE:
2697 case PIX_FMT_YUV422P10LE:
2698 case PIX_FMT_YUV444P10LE:
2699 case PIX_FMT_YUV420P10LE:
2700 case PIX_FMT_YUV420P16LE:
2701 case PIX_FMT_YUV422P16LE:
2702 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2704 case PIX_FMT_YUV444P9BE:
2705 case PIX_FMT_YUV422P9BE:
2706 case PIX_FMT_YUV420P9BE:
2707 case PIX_FMT_YUV444P10BE:
2708 case PIX_FMT_YUV422P10BE:
2709 case PIX_FMT_YUV420P10BE:
2710 case PIX_FMT_YUV420P16BE:
2711 case PIX_FMT_YUV422P16BE:
2712 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2715 if (c->chrSrcHSubSample) {
2717 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2718 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2719 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2720 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2721 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2722 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2723 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2724 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2725 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2726 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2727 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2728 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2729 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2730 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2731 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2732 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2733 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2734 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2738 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2739 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2740 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2741 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2742 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2743 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2744 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2745 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2746 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2747 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2748 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2749 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2750 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2751 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2752 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2753 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2754 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2755 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2759 c->lumToYV12 = NULL;
2760 c->alpToYV12 = NULL;
2761 switch (srcFormat) {
2762 case PIX_FMT_GBRP9LE:
2763 case PIX_FMT_GBRP10LE:
2764 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2765 case PIX_FMT_GBRP9BE:
2766 case PIX_FMT_GBRP10BE:
2767 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2768 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2770 case PIX_FMT_YUV444P9LE:
2771 case PIX_FMT_YUV422P9LE:
2772 case PIX_FMT_YUV420P9LE:
2773 case PIX_FMT_YUV444P10LE:
2774 case PIX_FMT_YUV422P10LE:
2775 case PIX_FMT_YUV420P10LE:
2776 case PIX_FMT_YUV420P16LE:
2777 case PIX_FMT_YUV422P16LE:
2778 case PIX_FMT_YUV444P16LE:
2779 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2781 case PIX_FMT_YUV444P9BE:
2782 case PIX_FMT_YUV422P9BE:
2783 case PIX_FMT_YUV420P9BE:
2784 case PIX_FMT_YUV444P10BE:
2785 case PIX_FMT_YUV422P10BE:
2786 case PIX_FMT_YUV420P10BE:
2787 case PIX_FMT_YUV420P16BE:
2788 case PIX_FMT_YUV422P16BE:
2789 case PIX_FMT_YUV444P16BE:
2790 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2792 case PIX_FMT_YUYV422 :
2793 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2794 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2795 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2796 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2797 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2798 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2799 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2800 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2801 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2802 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2803 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2804 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2808 case PIX_FMT_BGR4_BYTE:
2809 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2810 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2811 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2812 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2813 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2814 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2815 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2816 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2817 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2818 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2819 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2822 switch (srcFormat) {
2824 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2826 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2827 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2831 if (c->srcBpc == 8) {
2832 if (c->dstBpc <= 10) {
2833 c->hyScale = c->hcScale = hScale8To15_c;
2834 if (c->flags & SWS_FAST_BILINEAR) {
2835 c->hyscale_fast = hyscale_fast_c;
2836 c->hcscale_fast = hcscale_fast_c;
2839 c->hyScale = c->hcScale = hScale8To19_c;
2842 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2845 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2846 if (c->dstBpc <= 10) {
2848 c->lumConvertRange = lumRangeFromJpeg_c;
2849 c->chrConvertRange = chrRangeFromJpeg_c;
2851 c->lumConvertRange = lumRangeToJpeg_c;
2852 c->chrConvertRange = chrRangeToJpeg_c;
2856 c->lumConvertRange = lumRangeFromJpeg16_c;
2857 c->chrConvertRange = chrRangeFromJpeg16_c;
2859 c->lumConvertRange = lumRangeToJpeg16_c;
2860 c->chrConvertRange = chrRangeToJpeg16_c;
2865 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2866 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2867 c->needs_hcscale = 1;
2870 SwsFunc ff_getSwsFunc(SwsContext *c)
2872 sws_init_swScale_c(c);
2875 ff_sws_init_swScale_mmx(c);
2877 ff_sws_init_swScale_altivec(c);