2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 #define output_pixel(pos, val, bias, signedness) \
200 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
202 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
205 static av_always_inline void
206 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
207 int big_endian, int output_bits)
210 int shift = 19 - output_bits;
212 for (i = 0; i < dstW; i++) {
213 int val = src[i] + (1 << (shift - 1));
214 output_pixel(&dest[i], val, 0, uint);
218 static av_always_inline void
219 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
220 const int32_t **src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 + 16 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits);
230 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
231 * filters (or anything with negative coeffs, the range can be slightly
232 * wider in both directions. To account for this overflow, we subtract
233 * a constant so it always fits in the signed range (assuming a
234 * reasonable filterSize), and re-add that at the end. */
236 for (j = 0; j < filterSize; j++)
237 val += src[j][i] * filter[j];
239 output_pixel(&dest[i], val, 0x8000, int);
245 #define output_pixel(pos, val) \
247 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
249 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
252 static av_always_inline void
253 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
254 int big_endian, int output_bits)
257 int shift = 15 - output_bits;
259 for (i = 0; i < dstW; i++) {
260 int val = src[i] + (1 << (shift - 1));
261 output_pixel(&dest[i], val);
265 static av_always_inline void
266 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
267 const int16_t **src, uint16_t *dest, int dstW,
268 int big_endian, int output_bits)
271 int shift = 11 + 16 - output_bits;
273 for (i = 0; i < dstW; i++) {
274 int val = 1 << (26-output_bits);
277 for (j = 0; j < filterSize; j++)
278 val += src[j][i] * filter[j];
280 output_pixel(&dest[i], val);
286 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
287 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
288 uint8_t *dest, int dstW, \
289 const uint8_t *dither, int offset)\
291 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
292 (uint16_t *) dest, dstW, is_be, bits); \
294 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
295 const int16_t **src, uint8_t *dest, int dstW, \
296 const uint8_t *dither, int offset)\
298 yuv2planeX_## template_size ## _c_template(filter, \
299 filterSize, (const typeX_t **) src, \
300 (uint16_t *) dest, dstW, is_be, bits); \
302 yuv2NBPS( 9, BE, 1, 10, int16_t)
303 yuv2NBPS( 9, LE, 0, 10, int16_t)
304 yuv2NBPS(10, BE, 1, 10, int16_t)
305 yuv2NBPS(10, LE, 0, 10, int16_t)
306 yuv2NBPS(16, BE, 1, 16, int32_t)
307 yuv2NBPS(16, LE, 0, 16, int32_t)
309 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
310 const int16_t **src, uint8_t *dest, int dstW,
311 const uint8_t *dither, int offset)
314 for (i=0; i<dstW; i++) {
315 int val = dither[(i + offset) & 7] << 12;
317 for (j=0; j<filterSize; j++)
318 val += src[j][i] * filter[j];
320 dest[i]= av_clip_uint8(val>>19);
324 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
325 const uint8_t *dither, int offset)
328 for (i=0; i<dstW; i++) {
329 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
330 dest[i]= av_clip_uint8(val);
334 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
335 const int16_t **chrUSrc, const int16_t **chrVSrc,
336 uint8_t *dest, int chrDstW)
338 enum PixelFormat dstFormat = c->dstFormat;
339 const uint8_t *chrDither = c->chrDither8;
342 if (dstFormat == PIX_FMT_NV12)
343 for (i=0; i<chrDstW; i++) {
344 int u = chrDither[i & 7] << 12;
345 int v = chrDither[(i + 3) & 7] << 12;
347 for (j=0; j<chrFilterSize; j++) {
348 u += chrUSrc[j][i] * chrFilter[j];
349 v += chrVSrc[j][i] * chrFilter[j];
352 dest[2*i]= av_clip_uint8(u>>19);
353 dest[2*i+1]= av_clip_uint8(v>>19);
356 for (i=0; i<chrDstW; i++) {
357 int u = chrDither[i & 7] << 12;
358 int v = chrDither[(i + 3) & 7] << 12;
360 for (j=0; j<chrFilterSize; j++) {
361 u += chrUSrc[j][i] * chrFilter[j];
362 v += chrVSrc[j][i] * chrFilter[j];
365 dest[2*i]= av_clip_uint8(v>>19);
366 dest[2*i+1]= av_clip_uint8(u>>19);
370 #define output_pixel(pos, val) \
371 if (target == PIX_FMT_GRAY16BE) { \
377 static av_always_inline void
378 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
379 const int32_t **lumSrc, int lumFilterSize,
380 const int16_t *chrFilter, const int32_t **chrUSrc,
381 const int32_t **chrVSrc, int chrFilterSize,
382 const int32_t **alpSrc, uint16_t *dest, int dstW,
383 int y, enum PixelFormat target)
387 for (i = 0; i < (dstW >> 1); i++) {
392 for (j = 0; j < lumFilterSize; j++) {
393 Y1 += lumSrc[j][i * 2] * lumFilter[j];
394 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
398 if ((Y1 | Y2) & 0x10000) {
399 Y1 = av_clip_uint16(Y1);
400 Y2 = av_clip_uint16(Y2);
402 output_pixel(&dest[i * 2 + 0], Y1);
403 output_pixel(&dest[i * 2 + 1], Y2);
407 static av_always_inline void
408 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
409 const int32_t *ubuf[2], const int32_t *vbuf[2],
410 const int32_t *abuf[2], uint16_t *dest, int dstW,
411 int yalpha, int uvalpha, int y,
412 enum PixelFormat target)
414 int yalpha1 = 4095 - yalpha;
416 const int32_t *buf0 = buf[0], *buf1 = buf[1];
418 for (i = 0; i < (dstW >> 1); i++) {
419 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
420 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
422 output_pixel(&dest[i * 2 + 0], Y1);
423 output_pixel(&dest[i * 2 + 1], Y2);
427 static av_always_inline void
428 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
429 const int32_t *ubuf[2], const int32_t *vbuf[2],
430 const int32_t *abuf0, uint16_t *dest, int dstW,
431 int uvalpha, int y, enum PixelFormat target)
435 for (i = 0; i < (dstW >> 1); i++) {
436 int Y1 = buf0[i * 2 ] << 1;
437 int Y2 = buf0[i * 2 + 1] << 1;
439 output_pixel(&dest[i * 2 + 0], Y1);
440 output_pixel(&dest[i * 2 + 1], Y2);
446 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
447 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
448 const int16_t **_lumSrc, int lumFilterSize, \
449 const int16_t *chrFilter, const int16_t **_chrUSrc, \
450 const int16_t **_chrVSrc, int chrFilterSize, \
451 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
454 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
455 **chrUSrc = (const int32_t **) _chrUSrc, \
456 **chrVSrc = (const int32_t **) _chrVSrc, \
457 **alpSrc = (const int32_t **) _alpSrc; \
458 uint16_t *dest = (uint16_t *) _dest; \
459 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
460 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
461 alpSrc, dest, dstW, y, fmt); \
464 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
465 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
466 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
467 int yalpha, int uvalpha, int y) \
469 const int32_t **buf = (const int32_t **) _buf, \
470 **ubuf = (const int32_t **) _ubuf, \
471 **vbuf = (const int32_t **) _vbuf, \
472 **abuf = (const int32_t **) _abuf; \
473 uint16_t *dest = (uint16_t *) _dest; \
474 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
475 dest, dstW, yalpha, uvalpha, y, fmt); \
478 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
479 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
480 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
481 int uvalpha, int y) \
483 const int32_t *buf0 = (const int32_t *) _buf0, \
484 **ubuf = (const int32_t **) _ubuf, \
485 **vbuf = (const int32_t **) _vbuf, \
486 *abuf0 = (const int32_t *) _abuf0; \
487 uint16_t *dest = (uint16_t *) _dest; \
488 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
489 dstW, uvalpha, y, fmt); \
492 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
493 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
495 #define output_pixel(pos, acc) \
496 if (target == PIX_FMT_MONOBLACK) { \
502 static av_always_inline void
503 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
504 const int16_t **lumSrc, int lumFilterSize,
505 const int16_t *chrFilter, const int16_t **chrUSrc,
506 const int16_t **chrVSrc, int chrFilterSize,
507 const int16_t **alpSrc, uint8_t *dest, int dstW,
508 int y, enum PixelFormat target)
510 const uint8_t * const d128=dither_8x8_220[y&7];
511 uint8_t *g = c->table_gU[128] + c->table_gV[128];
515 for (i = 0; i < dstW - 1; i += 2) {
520 for (j = 0; j < lumFilterSize; j++) {
521 Y1 += lumSrc[j][i] * lumFilter[j];
522 Y2 += lumSrc[j][i+1] * lumFilter[j];
526 if ((Y1 | Y2) & 0x100) {
527 Y1 = av_clip_uint8(Y1);
528 Y2 = av_clip_uint8(Y2);
530 acc += acc + g[Y1 + d128[(i + 0) & 7]];
531 acc += acc + g[Y2 + d128[(i + 1) & 7]];
533 output_pixel(*dest++, acc);
538 static av_always_inline void
539 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
540 const int16_t *ubuf[2], const int16_t *vbuf[2],
541 const int16_t *abuf[2], uint8_t *dest, int dstW,
542 int yalpha, int uvalpha, int y,
543 enum PixelFormat target)
545 const int16_t *buf0 = buf[0], *buf1 = buf[1];
546 const uint8_t * const d128 = dither_8x8_220[y & 7];
547 uint8_t *g = c->table_gU[128] + c->table_gV[128];
548 int yalpha1 = 4095 - yalpha;
551 for (i = 0; i < dstW - 7; i += 8) {
552 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
553 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
554 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
555 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
556 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
557 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
558 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
559 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
560 output_pixel(*dest++, acc);
564 static av_always_inline void
565 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
566 const int16_t *ubuf[2], const int16_t *vbuf[2],
567 const int16_t *abuf0, uint8_t *dest, int dstW,
568 int uvalpha, int y, enum PixelFormat target)
570 const uint8_t * const d128 = dither_8x8_220[y & 7];
571 uint8_t *g = c->table_gU[128] + c->table_gV[128];
574 for (i = 0; i < dstW - 7; i += 8) {
575 int acc = g[(buf0[i ] >> 7) + d128[0]];
576 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
577 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
578 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
579 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
580 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
581 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
582 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
583 output_pixel(*dest++, acc);
589 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
590 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
591 const int16_t **lumSrc, int lumFilterSize, \
592 const int16_t *chrFilter, const int16_t **chrUSrc, \
593 const int16_t **chrVSrc, int chrFilterSize, \
594 const int16_t **alpSrc, uint8_t *dest, int dstW, \
597 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
598 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
599 alpSrc, dest, dstW, y, fmt); \
602 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
603 const int16_t *ubuf[2], const int16_t *vbuf[2], \
604 const int16_t *abuf[2], uint8_t *dest, int dstW, \
605 int yalpha, int uvalpha, int y) \
607 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
608 dest, dstW, yalpha, uvalpha, y, fmt); \
611 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
612 const int16_t *ubuf[2], const int16_t *vbuf[2], \
613 const int16_t *abuf0, uint8_t *dest, int dstW, \
614 int uvalpha, int y) \
616 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
617 abuf0, dest, dstW, uvalpha, \
621 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
622 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
624 #define output_pixels(pos, Y1, U, Y2, V) \
625 if (target == PIX_FMT_YUYV422) { \
626 dest[pos + 0] = Y1; \
628 dest[pos + 2] = Y2; \
632 dest[pos + 1] = Y1; \
634 dest[pos + 3] = Y2; \
637 static av_always_inline void
638 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
639 const int16_t **lumSrc, int lumFilterSize,
640 const int16_t *chrFilter, const int16_t **chrUSrc,
641 const int16_t **chrVSrc, int chrFilterSize,
642 const int16_t **alpSrc, uint8_t *dest, int dstW,
643 int y, enum PixelFormat target)
647 for (i = 0; i < (dstW >> 1); i++) {
654 for (j = 0; j < lumFilterSize; j++) {
655 Y1 += lumSrc[j][i * 2] * lumFilter[j];
656 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
658 for (j = 0; j < chrFilterSize; j++) {
659 U += chrUSrc[j][i] * chrFilter[j];
660 V += chrVSrc[j][i] * chrFilter[j];
666 if ((Y1 | Y2 | U | V) & 0x100) {
667 Y1 = av_clip_uint8(Y1);
668 Y2 = av_clip_uint8(Y2);
669 U = av_clip_uint8(U);
670 V = av_clip_uint8(V);
672 output_pixels(4*i, Y1, U, Y2, V);
676 static av_always_inline void
677 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
678 const int16_t *ubuf[2], const int16_t *vbuf[2],
679 const int16_t *abuf[2], uint8_t *dest, int dstW,
680 int yalpha, int uvalpha, int y,
681 enum PixelFormat target)
683 const int16_t *buf0 = buf[0], *buf1 = buf[1],
684 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
685 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
686 int yalpha1 = 4095 - yalpha;
687 int uvalpha1 = 4095 - uvalpha;
690 for (i = 0; i < (dstW >> 1); i++) {
691 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
692 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
693 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
694 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
696 output_pixels(i * 4, Y1, U, Y2, V);
700 static av_always_inline void
701 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
702 const int16_t *ubuf[2], const int16_t *vbuf[2],
703 const int16_t *abuf0, uint8_t *dest, int dstW,
704 int uvalpha, int y, enum PixelFormat target)
706 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
707 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
710 if (uvalpha < 2048) {
711 for (i = 0; i < (dstW >> 1); i++) {
712 int Y1 = buf0[i * 2] >> 7;
713 int Y2 = buf0[i * 2 + 1] >> 7;
714 int U = ubuf1[i] >> 7;
715 int V = vbuf1[i] >> 7;
717 output_pixels(i * 4, Y1, U, Y2, V);
720 for (i = 0; i < (dstW >> 1); i++) {
721 int Y1 = buf0[i * 2] >> 7;
722 int Y2 = buf0[i * 2 + 1] >> 7;
723 int U = (ubuf0[i] + ubuf1[i]) >> 8;
724 int V = (vbuf0[i] + vbuf1[i]) >> 8;
726 output_pixels(i * 4, Y1, U, Y2, V);
733 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
734 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
736 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
737 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
738 #define output_pixel(pos, val) \
739 if (isBE(target)) { \
745 static av_always_inline void
746 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
747 const int32_t **lumSrc, int lumFilterSize,
748 const int16_t *chrFilter, const int32_t **chrUSrc,
749 const int32_t **chrVSrc, int chrFilterSize,
750 const int32_t **alpSrc, uint16_t *dest, int dstW,
751 int y, enum PixelFormat target)
755 for (i = 0; i < (dstW >> 1); i++) {
757 int Y1 = -0x40000000;
758 int Y2 = -0x40000000;
759 int U = -128 << 23; // 19
763 for (j = 0; j < lumFilterSize; j++) {
764 Y1 += lumSrc[j][i * 2] * lumFilter[j];
765 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
767 for (j = 0; j < chrFilterSize; j++) {
768 U += chrUSrc[j][i] * chrFilter[j];
769 V += chrVSrc[j][i] * chrFilter[j];
772 // 8bit: 12+15=27; 16-bit: 12+19=31
780 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
781 Y1 -= c->yuv2rgb_y_offset;
782 Y2 -= c->yuv2rgb_y_offset;
783 Y1 *= c->yuv2rgb_y_coeff;
784 Y2 *= c->yuv2rgb_y_coeff;
787 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
789 R = V * c->yuv2rgb_v2r_coeff;
790 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
791 B = U * c->yuv2rgb_u2b_coeff;
793 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
794 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
795 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
796 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
797 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
798 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
799 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
804 static av_always_inline void
805 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
806 const int32_t *ubuf[2], const int32_t *vbuf[2],
807 const int32_t *abuf[2], uint16_t *dest, int dstW,
808 int yalpha, int uvalpha, int y,
809 enum PixelFormat target)
811 const int32_t *buf0 = buf[0], *buf1 = buf[1],
812 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
813 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
814 int yalpha1 = 4095 - yalpha;
815 int uvalpha1 = 4095 - uvalpha;
818 for (i = 0; i < (dstW >> 1); i++) {
819 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
820 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
821 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
822 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
825 Y1 -= c->yuv2rgb_y_offset;
826 Y2 -= c->yuv2rgb_y_offset;
827 Y1 *= c->yuv2rgb_y_coeff;
828 Y2 *= c->yuv2rgb_y_coeff;
832 R = V * c->yuv2rgb_v2r_coeff;
833 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
834 B = U * c->yuv2rgb_u2b_coeff;
836 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
837 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
838 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
839 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
840 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
841 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
846 static av_always_inline void
847 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
848 const int32_t *ubuf[2], const int32_t *vbuf[2],
849 const int32_t *abuf0, uint16_t *dest, int dstW,
850 int uvalpha, int y, enum PixelFormat target)
852 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
853 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
856 if (uvalpha < 2048) {
857 for (i = 0; i < (dstW >> 1); i++) {
858 int Y1 = (buf0[i * 2] ) >> 2;
859 int Y2 = (buf0[i * 2 + 1]) >> 2;
860 int U = (ubuf0[i] + (-128 << 11)) >> 2;
861 int V = (vbuf0[i] + (-128 << 11)) >> 2;
864 Y1 -= c->yuv2rgb_y_offset;
865 Y2 -= c->yuv2rgb_y_offset;
866 Y1 *= c->yuv2rgb_y_coeff;
867 Y2 *= c->yuv2rgb_y_coeff;
871 R = V * c->yuv2rgb_v2r_coeff;
872 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
873 B = U * c->yuv2rgb_u2b_coeff;
875 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
876 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
877 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
878 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
879 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
880 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
884 for (i = 0; i < (dstW >> 1); i++) {
885 int Y1 = (buf0[i * 2] ) >> 2;
886 int Y2 = (buf0[i * 2 + 1]) >> 2;
887 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
888 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
891 Y1 -= c->yuv2rgb_y_offset;
892 Y2 -= c->yuv2rgb_y_offset;
893 Y1 *= c->yuv2rgb_y_coeff;
894 Y2 *= c->yuv2rgb_y_coeff;
898 R = V * c->yuv2rgb_v2r_coeff;
899 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
900 B = U * c->yuv2rgb_u2b_coeff;
902 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
903 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
904 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
905 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
906 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
907 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
917 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
918 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
919 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
920 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
922 static av_always_inline void
923 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
924 unsigned U, unsigned V, unsigned A1, unsigned A2,
925 const void *_r, const void *_g, const void *_b, int y,
926 enum PixelFormat target, int hasAlpha)
928 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
929 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
930 uint32_t *dest = (uint32_t *) _dest;
931 const uint32_t *r = (const uint32_t *) _r;
932 const uint32_t *g = (const uint32_t *) _g;
933 const uint32_t *b = (const uint32_t *) _b;
936 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
938 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
939 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
942 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
944 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
945 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
947 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
948 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
951 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
952 uint8_t *dest = (uint8_t *) _dest;
953 const uint8_t *r = (const uint8_t *) _r;
954 const uint8_t *g = (const uint8_t *) _g;
955 const uint8_t *b = (const uint8_t *) _b;
957 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
958 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
959 dest[i * 6 + 0] = r_b[Y1];
960 dest[i * 6 + 1] = g[Y1];
961 dest[i * 6 + 2] = b_r[Y1];
962 dest[i * 6 + 3] = r_b[Y2];
963 dest[i * 6 + 4] = g[Y2];
964 dest[i * 6 + 5] = b_r[Y2];
967 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
968 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
969 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
970 uint16_t *dest = (uint16_t *) _dest;
971 const uint16_t *r = (const uint16_t *) _r;
972 const uint16_t *g = (const uint16_t *) _g;
973 const uint16_t *b = (const uint16_t *) _b;
974 int dr1, dg1, db1, dr2, dg2, db2;
976 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
977 dr1 = dither_2x2_8[ y & 1 ][0];
978 dg1 = dither_2x2_4[ y & 1 ][0];
979 db1 = dither_2x2_8[(y & 1) ^ 1][0];
980 dr2 = dither_2x2_8[ y & 1 ][1];
981 dg2 = dither_2x2_4[ y & 1 ][1];
982 db2 = dither_2x2_8[(y & 1) ^ 1][1];
983 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
984 dr1 = dither_2x2_8[ y & 1 ][0];
985 dg1 = dither_2x2_8[ y & 1 ][1];
986 db1 = dither_2x2_8[(y & 1) ^ 1][0];
987 dr2 = dither_2x2_8[ y & 1 ][1];
988 dg2 = dither_2x2_8[ y & 1 ][0];
989 db2 = dither_2x2_8[(y & 1) ^ 1][1];
991 dr1 = dither_4x4_16[ y & 3 ][0];
992 dg1 = dither_4x4_16[ y & 3 ][1];
993 db1 = dither_4x4_16[(y & 3) ^ 3][0];
994 dr2 = dither_4x4_16[ y & 3 ][1];
995 dg2 = dither_4x4_16[ y & 3 ][0];
996 db2 = dither_4x4_16[(y & 3) ^ 3][1];
999 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1000 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1001 } else /* 8/4-bit */ {
1002 uint8_t *dest = (uint8_t *) _dest;
1003 const uint8_t *r = (const uint8_t *) _r;
1004 const uint8_t *g = (const uint8_t *) _g;
1005 const uint8_t *b = (const uint8_t *) _b;
1006 int dr1, dg1, db1, dr2, dg2, db2;
1008 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1009 const uint8_t * const d64 = dither_8x8_73[y & 7];
1010 const uint8_t * const d32 = dither_8x8_32[y & 7];
1011 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1012 db1 = d64[(i * 2 + 0) & 7];
1013 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1014 db2 = d64[(i * 2 + 1) & 7];
1016 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1017 const uint8_t * const d128 = dither_8x8_220[y & 7];
1018 dr1 = db1 = d128[(i * 2 + 0) & 7];
1019 dg1 = d64[(i * 2 + 0) & 7];
1020 dr2 = db2 = d128[(i * 2 + 1) & 7];
1021 dg2 = d64[(i * 2 + 1) & 7];
1024 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1025 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1026 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1028 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1029 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1034 static av_always_inline void
1035 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1036 const int16_t **lumSrc, int lumFilterSize,
1037 const int16_t *chrFilter, const int16_t **chrUSrc,
1038 const int16_t **chrVSrc, int chrFilterSize,
1039 const int16_t **alpSrc, uint8_t *dest, int dstW,
1040 int y, enum PixelFormat target, int hasAlpha)
1044 for (i = 0; i < (dstW >> 1); i++) {
1050 int av_unused A1, A2;
1051 const void *r, *g, *b;
1053 for (j = 0; j < lumFilterSize; j++) {
1054 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1055 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1057 for (j = 0; j < chrFilterSize; j++) {
1058 U += chrUSrc[j][i] * chrFilter[j];
1059 V += chrVSrc[j][i] * chrFilter[j];
1065 if ((Y1 | Y2 | U | V) & 0x100) {
1066 Y1 = av_clip_uint8(Y1);
1067 Y2 = av_clip_uint8(Y2);
1068 U = av_clip_uint8(U);
1069 V = av_clip_uint8(V);
1074 for (j = 0; j < lumFilterSize; j++) {
1075 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1076 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1080 if ((A1 | A2) & 0x100) {
1081 A1 = av_clip_uint8(A1);
1082 A2 = av_clip_uint8(A2);
1086 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1088 g = (c->table_gU[U] + c->table_gV[V]);
1091 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1092 r, g, b, y, target, hasAlpha);
1096 static av_always_inline void
1097 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1098 const int16_t *ubuf[2], const int16_t *vbuf[2],
1099 const int16_t *abuf[2], uint8_t *dest, int dstW,
1100 int yalpha, int uvalpha, int y,
1101 enum PixelFormat target, int hasAlpha)
1103 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1104 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1105 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1106 *abuf0 = hasAlpha ? abuf[0] : NULL,
1107 *abuf1 = hasAlpha ? abuf[1] : NULL;
1108 int yalpha1 = 4095 - yalpha;
1109 int uvalpha1 = 4095 - uvalpha;
1112 for (i = 0; i < (dstW >> 1); i++) {
1113 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1114 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1115 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1116 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1118 const void *r = c->table_rV[V],
1119 *g = (c->table_gU[U] + c->table_gV[V]),
1120 *b = c->table_bU[U];
1123 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1124 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1127 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1128 r, g, b, y, target, hasAlpha);
1132 static av_always_inline void
1133 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1134 const int16_t *ubuf[2], const int16_t *vbuf[2],
1135 const int16_t *abuf0, uint8_t *dest, int dstW,
1136 int uvalpha, int y, enum PixelFormat target,
1139 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1140 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1143 if (uvalpha < 2048) {
1144 for (i = 0; i < (dstW >> 1); i++) {
1145 int Y1 = buf0[i * 2] >> 7;
1146 int Y2 = buf0[i * 2 + 1] >> 7;
1147 int U = ubuf1[i] >> 7;
1148 int V = vbuf1[i] >> 7;
1150 const void *r = c->table_rV[V],
1151 *g = (c->table_gU[U] + c->table_gV[V]),
1152 *b = c->table_bU[U];
1155 A1 = abuf0[i * 2 ] >> 7;
1156 A2 = abuf0[i * 2 + 1] >> 7;
1159 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1160 r, g, b, y, target, hasAlpha);
1163 for (i = 0; i < (dstW >> 1); i++) {
1164 int Y1 = buf0[i * 2] >> 7;
1165 int Y2 = buf0[i * 2 + 1] >> 7;
1166 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1167 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1169 const void *r = c->table_rV[V],
1170 *g = (c->table_gU[U] + c->table_gV[V]),
1171 *b = c->table_bU[U];
1174 A1 = abuf0[i * 2 ] >> 7;
1175 A2 = abuf0[i * 2 + 1] >> 7;
1178 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1179 r, g, b, y, target, hasAlpha);
1184 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1185 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1186 const int16_t **lumSrc, int lumFilterSize, \
1187 const int16_t *chrFilter, const int16_t **chrUSrc, \
1188 const int16_t **chrVSrc, int chrFilterSize, \
1189 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1192 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1193 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1194 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1196 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1197 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1198 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1199 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1200 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1201 int yalpha, int uvalpha, int y) \
1203 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1204 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1207 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1208 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1209 const int16_t *abuf0, uint8_t *dest, int dstW, \
1210 int uvalpha, int y) \
1212 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1213 dstW, uvalpha, y, fmt, hasAlpha); \
1217 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1218 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1220 #if CONFIG_SWSCALE_ALPHA
1221 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1222 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1224 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1225 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1227 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1228 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1229 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1230 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1231 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1232 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1233 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1234 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1236 static av_always_inline void
1237 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1238 const int16_t **lumSrc, int lumFilterSize,
1239 const int16_t *chrFilter, const int16_t **chrUSrc,
1240 const int16_t **chrVSrc, int chrFilterSize,
1241 const int16_t **alpSrc, uint8_t *dest,
1242 int dstW, int y, enum PixelFormat target, int hasAlpha)
1245 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1247 for (i = 0; i < dstW; i++) {
1255 for (j = 0; j < lumFilterSize; j++) {
1256 Y += lumSrc[j][i] * lumFilter[j];
1258 for (j = 0; j < chrFilterSize; j++) {
1259 U += chrUSrc[j][i] * chrFilter[j];
1260 V += chrVSrc[j][i] * chrFilter[j];
1267 for (j = 0; j < lumFilterSize; j++) {
1268 A += alpSrc[j][i] * lumFilter[j];
1272 A = av_clip_uint8(A);
1274 Y -= c->yuv2rgb_y_offset;
1275 Y *= c->yuv2rgb_y_coeff;
1277 R = Y + V*c->yuv2rgb_v2r_coeff;
1278 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1279 B = Y + U*c->yuv2rgb_u2b_coeff;
1280 if ((R | G | B) & 0xC0000000) {
1281 R = av_clip_uintp2(R, 30);
1282 G = av_clip_uintp2(G, 30);
1283 B = av_clip_uintp2(B, 30);
1288 dest[0] = hasAlpha ? A : 255;
1302 dest[3] = hasAlpha ? A : 255;
1305 dest[0] = hasAlpha ? A : 255;
1320 dest[3] = hasAlpha ? A : 255;
1328 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1329 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1330 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1331 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1333 #if CONFIG_SWSCALE_ALPHA
1334 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1335 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1336 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1337 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1339 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1340 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1341 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1342 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1344 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1345 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1347 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1348 int width, int height,
1352 uint8_t *ptr = plane + stride*y;
1353 for (i=0; i<height; i++) {
1354 memset(ptr, val, width);
1359 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1361 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1362 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1364 static av_always_inline void
1365 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1366 enum PixelFormat origin)
1369 for (i = 0; i < width; i++) {
1370 unsigned int r_b = input_pixel(&src[i*3+0]);
1371 unsigned int g = input_pixel(&src[i*3+1]);
1372 unsigned int b_r = input_pixel(&src[i*3+2]);
1374 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1378 static av_always_inline void
1379 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1380 const uint16_t *src1, const uint16_t *src2,
1381 int width, enum PixelFormat origin)
1385 for (i = 0; i < width; i++) {
1386 int r_b = input_pixel(&src1[i*3+0]);
1387 int g = input_pixel(&src1[i*3+1]);
1388 int b_r = input_pixel(&src1[i*3+2]);
1390 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1391 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1395 static av_always_inline void
1396 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1397 const uint16_t *src1, const uint16_t *src2,
1398 int width, enum PixelFormat origin)
1402 for (i = 0; i < width; i++) {
1403 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1404 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1405 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1407 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1408 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1416 #define rgb48funcs(pattern, BE_LE, origin) \
1417 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1418 int width, uint32_t *unused) \
1420 const uint16_t *src = (const uint16_t *) _src; \
1421 uint16_t *dst = (uint16_t *) _dst; \
1422 rgb48ToY_c_template(dst, src, width, origin); \
1425 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1426 const uint8_t *_src1, const uint8_t *_src2, \
1427 int width, uint32_t *unused) \
1429 const uint16_t *src1 = (const uint16_t *) _src1, \
1430 *src2 = (const uint16_t *) _src2; \
1431 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1432 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1435 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1436 const uint8_t *_src1, const uint8_t *_src2, \
1437 int width, uint32_t *unused) \
1439 const uint16_t *src1 = (const uint16_t *) _src1, \
1440 *src2 = (const uint16_t *) _src2; \
1441 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1442 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1445 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1446 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1447 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1448 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1450 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1451 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1452 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1454 static av_always_inline void
1455 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1456 int width, enum PixelFormat origin,
1457 int shr, int shg, int shb, int shp,
1458 int maskr, int maskg, int maskb,
1459 int rsh, int gsh, int bsh, int S)
1461 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1462 const unsigned rnd = 33u << (S - 1);
1465 for (i = 0; i < width; i++) {
1466 int px = input_pixel(i) >> shp;
1467 int b = (px & maskb) >> shb;
1468 int g = (px & maskg) >> shg;
1469 int r = (px & maskr) >> shr;
1471 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1475 static av_always_inline void
1476 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1477 const uint8_t *src, int width,
1478 enum PixelFormat origin,
1479 int shr, int shg, int shb, int shp,
1480 int maskr, int maskg, int maskb,
1481 int rsh, int gsh, int bsh, int S)
1483 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1484 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1485 const unsigned rnd = 257u << (S - 1);
1488 for (i = 0; i < width; i++) {
1489 int px = input_pixel(i) >> shp;
1490 int b = (px & maskb) >> shb;
1491 int g = (px & maskg) >> shg;
1492 int r = (px & maskr) >> shr;
1494 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1495 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1499 static av_always_inline void
1500 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1501 const uint8_t *src, int width,
1502 enum PixelFormat origin,
1503 int shr, int shg, int shb, int shp,
1504 int maskr, int maskg, int maskb,
1505 int rsh, int gsh, int bsh, int S)
1507 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1508 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1509 maskgx = ~(maskr | maskb);
1510 const unsigned rnd = 257u << S;
1513 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1514 for (i = 0; i < width; i++) {
1515 int px0 = input_pixel(2 * i + 0) >> shp;
1516 int px1 = input_pixel(2 * i + 1) >> shp;
1517 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1518 int rb = px0 + px1 - g;
1520 b = (rb & maskb) >> shb;
1521 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1522 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1525 g = (g & maskg) >> shg;
1527 r = (rb & maskr) >> shr;
1529 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1530 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1536 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1537 maskg, maskb, rsh, gsh, bsh, S) \
1538 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1539 int width, uint32_t *unused) \
1541 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1542 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1545 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1546 const uint8_t *src, const uint8_t *dummy, \
1547 int width, uint32_t *unused) \
1549 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1550 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1553 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1554 const uint8_t *src, const uint8_t *dummy, \
1555 int width, uint32_t *unused) \
1557 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1558 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1561 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1562 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1563 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1564 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1565 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1566 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1567 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1568 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1569 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1570 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1571 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1572 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1574 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1577 for (i=0; i<width; i++) {
1582 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1585 for (i=0; i<width; i++) {
1590 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1593 for (i=0; i<width; i++) {
1596 dst[i]= pal[d] & 0xFF;
1600 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1601 const uint8_t *src1, const uint8_t *src2,
1602 int width, uint32_t *pal)
1605 assert(src1 == src2);
1606 for (i=0; i<width; i++) {
1607 int p= pal[src1[i]];
1614 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1615 int width, uint32_t *unused)
1618 for (i=0; i<width/8; i++) {
1621 dst[8*i+j]= ((d>>(7-j))&1)*255;
1625 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1626 int width, uint32_t *unused)
1629 for (i=0; i<width/8; i++) {
1632 dst[8*i+j]= ((d>>(7-j))&1)*255;
1636 //FIXME yuy2* can read up to 7 samples too much
1638 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1642 for (i=0; i<width; i++)
1646 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1647 const uint8_t *src2, int width, uint32_t *unused)
1650 for (i=0; i<width; i++) {
1651 dstU[i]= src1[4*i + 1];
1652 dstV[i]= src1[4*i + 3];
1654 assert(src1 == src2);
1657 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1660 const uint16_t *src = (const uint16_t *) _src;
1661 uint16_t *dst = (uint16_t *) _dst;
1662 for (i=0; i<width; i++) {
1663 dst[i] = av_bswap16(src[i]);
1667 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1668 const uint8_t *_src2, int width, uint32_t *unused)
1671 const uint16_t *src1 = (const uint16_t *) _src1,
1672 *src2 = (const uint16_t *) _src2;
1673 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1674 for (i=0; i<width; i++) {
1675 dstU[i] = av_bswap16(src1[i]);
1676 dstV[i] = av_bswap16(src2[i]);
1680 /* This is almost identical to the previous, end exists only because
1681 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1682 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1686 for (i=0; i<width; i++)
1690 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1691 const uint8_t *src2, int width, uint32_t *unused)
1694 for (i=0; i<width; i++) {
1695 dstU[i]= src1[4*i + 0];
1696 dstV[i]= src1[4*i + 2];
1698 assert(src1 == src2);
1701 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1702 const uint8_t *src, int width)
1705 for (i = 0; i < width; i++) {
1706 dst1[i] = src[2*i+0];
1707 dst2[i] = src[2*i+1];
1711 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1712 const uint8_t *src1, const uint8_t *src2,
1713 int width, uint32_t *unused)
1715 nvXXtoUV_c(dstU, dstV, src1, width);
1718 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1719 const uint8_t *src1, const uint8_t *src2,
1720 int width, uint32_t *unused)
1722 nvXXtoUV_c(dstV, dstU, src1, width);
1725 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1727 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1728 int width, uint32_t *unused)
1731 for (i=0; i<width; i++) {
1736 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1740 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1741 const uint8_t *src2, int width, uint32_t *unused)
1744 for (i=0; i<width; i++) {
1745 int b= src1[3*i + 0];
1746 int g= src1[3*i + 1];
1747 int r= src1[3*i + 2];
1749 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1750 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1752 assert(src1 == src2);
1755 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1756 const uint8_t *src2, int width, uint32_t *unused)
1759 for (i=0; i<width; i++) {
1760 int b= src1[6*i + 0] + src1[6*i + 3];
1761 int g= src1[6*i + 1] + src1[6*i + 4];
1762 int r= src1[6*i + 2] + src1[6*i + 5];
1764 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1765 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1767 assert(src1 == src2);
1770 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1774 for (i=0; i<width; i++) {
1779 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1783 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1784 const uint8_t *src2, int width, uint32_t *unused)
1788 for (i=0; i<width; i++) {
1789 int r= src1[3*i + 0];
1790 int g= src1[3*i + 1];
1791 int b= src1[3*i + 2];
1793 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1794 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1798 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1799 const uint8_t *src2, int width, uint32_t *unused)
1803 for (i=0; i<width; i++) {
1804 int r= src1[6*i + 0] + src1[6*i + 3];
1805 int g= src1[6*i + 1] + src1[6*i + 4];
1806 int b= src1[6*i + 2] + src1[6*i + 5];
1808 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1809 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1813 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1816 for (i = 0; i < width; i++) {
1821 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1825 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1828 const uint16_t **src = (const uint16_t **) _src;
1829 uint16_t *dst = (uint16_t *) _dst;
1830 for (i = 0; i < width; i++) {
1831 int g = AV_RL16(src[0] + i);
1832 int b = AV_RL16(src[1] + i);
1833 int r = AV_RL16(src[2] + i);
1835 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1839 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1842 const uint16_t **src = (const uint16_t **) _src;
1843 uint16_t *dst = (uint16_t *) _dst;
1844 for (i = 0; i < width; i++) {
1845 int g = AV_RB16(src[0] + i);
1846 int b = AV_RB16(src[1] + i);
1847 int r = AV_RB16(src[2] + i);
1849 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1853 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1856 for (i = 0; i < width; i++) {
1861 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1862 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1866 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1869 const uint16_t **src = (const uint16_t **) _src;
1870 uint16_t *dstU = (uint16_t *) _dstU;
1871 uint16_t *dstV = (uint16_t *) _dstV;
1872 for (i = 0; i < width; i++) {
1873 int g = AV_RL16(src[0] + i);
1874 int b = AV_RL16(src[1] + i);
1875 int r = AV_RL16(src[2] + i);
1877 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1878 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1882 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1885 const uint16_t **src = (const uint16_t **) _src;
1886 uint16_t *dstU = (uint16_t *) _dstU;
1887 uint16_t *dstV = (uint16_t *) _dstV;
1888 for (i = 0; i < width; i++) {
1889 int g = AV_RB16(src[0] + i);
1890 int b = AV_RB16(src[1] + i);
1891 int r = AV_RB16(src[2] + i);
1893 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1894 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1898 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1899 const int16_t *filter,
1900 const int16_t *filterPos, int filterSize)
1903 int32_t *dst = (int32_t *) _dst;
1904 const uint16_t *src = (const uint16_t *) _src;
1905 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1908 for (i = 0; i < dstW; i++) {
1910 int srcPos = filterPos[i];
1913 for (j = 0; j < filterSize; j++) {
1914 val += src[srcPos + j] * filter[filterSize * i + j];
1916 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1917 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1921 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1922 const int16_t *filter,
1923 const int16_t *filterPos, int filterSize)
1926 const uint16_t *src = (const uint16_t *) _src;
1927 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1929 for (i = 0; i < dstW; i++) {
1931 int srcPos = filterPos[i];
1934 for (j = 0; j < filterSize; j++) {
1935 val += src[srcPos + j] * filter[filterSize * i + j];
1937 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1938 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1942 // bilinear / bicubic scaling
1943 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1944 const int16_t *filter, const int16_t *filterPos,
1948 for (i=0; i<dstW; i++) {
1950 int srcPos= filterPos[i];
1952 for (j=0; j<filterSize; j++) {
1953 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1955 //filter += hFilterSize;
1956 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1961 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1962 const int16_t *filter, const int16_t *filterPos,
1966 int32_t *dst = (int32_t *) _dst;
1967 for (i=0; i<dstW; i++) {
1969 int srcPos= filterPos[i];
1971 for (j=0; j<filterSize; j++) {
1972 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1974 //filter += hFilterSize;
1975 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1980 //FIXME all pal and rgb srcFormats could do this convertion as well
1981 //FIXME all scalers more complex than bilinear could do half of this transform
1982 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1985 for (i = 0; i < width; i++) {
1986 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1987 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1990 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1993 for (i = 0; i < width; i++) {
1994 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1995 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1998 static void lumRangeToJpeg_c(int16_t *dst, int width)
2001 for (i = 0; i < width; i++)
2002 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2004 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2007 for (i = 0; i < width; i++)
2008 dst[i] = (dst[i]*14071 + 33561947)>>14;
2011 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2014 int32_t *dstU = (int32_t *) _dstU;
2015 int32_t *dstV = (int32_t *) _dstV;
2016 for (i = 0; i < width; i++) {
2017 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2018 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2021 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2024 int32_t *dstU = (int32_t *) _dstU;
2025 int32_t *dstV = (int32_t *) _dstV;
2026 for (i = 0; i < width; i++) {
2027 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2028 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2031 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2034 int32_t *dst = (int32_t *) _dst;
2035 for (i = 0; i < width; i++)
2036 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2038 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2041 int32_t *dst = (int32_t *) _dst;
2042 for (i = 0; i < width; i++)
2043 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2046 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2047 const uint8_t *src, int srcW, int xInc)
2050 unsigned int xpos=0;
2051 for (i=0;i<dstWidth;i++) {
2052 register unsigned int xx=xpos>>16;
2053 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2054 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2059 // *** horizontal scale Y line to temp buffer
2060 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2061 const uint8_t *src_in[4], int srcW, int xInc,
2062 const int16_t *hLumFilter,
2063 const int16_t *hLumFilterPos, int hLumFilterSize,
2064 uint8_t *formatConvBuffer,
2065 uint32_t *pal, int isAlpha)
2067 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2068 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2069 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2072 toYV12(formatConvBuffer, src, srcW, pal);
2073 src= formatConvBuffer;
2074 } else if (c->readLumPlanar && !isAlpha) {
2075 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2076 src = formatConvBuffer;
2079 if (!c->hyscale_fast) {
2080 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2081 } else { // fast bilinear upscale / crap downscale
2082 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2086 convertRange(dst, dstWidth);
2089 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2090 int dstWidth, const uint8_t *src1,
2091 const uint8_t *src2, int srcW, int xInc)
2094 unsigned int xpos=0;
2095 for (i=0;i<dstWidth;i++) {
2096 register unsigned int xx=xpos>>16;
2097 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2098 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2099 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2104 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2105 const uint8_t *src_in[4],
2106 int srcW, int xInc, const int16_t *hChrFilter,
2107 const int16_t *hChrFilterPos, int hChrFilterSize,
2108 uint8_t *formatConvBuffer, uint32_t *pal)
2110 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2112 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2113 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2114 src1= formatConvBuffer;
2116 } else if (c->readChrPlanar) {
2117 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2118 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2119 src1= formatConvBuffer;
2123 if (!c->hcscale_fast) {
2124 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2125 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2126 } else { // fast bilinear upscale / crap downscale
2127 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2130 if (c->chrConvertRange)
2131 c->chrConvertRange(dst1, dst2, dstWidth);
2134 static av_always_inline void
2135 find_c_packed_planar_out_funcs(SwsContext *c,
2136 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2137 yuv2interleavedX_fn *yuv2nv12cX,
2138 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2139 yuv2packedX_fn *yuv2packedX)
2141 enum PixelFormat dstFormat = c->dstFormat;
2143 if (is16BPS(dstFormat)) {
2144 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2145 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2146 } else if (is9_OR_10BPS(dstFormat)) {
2147 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2148 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2149 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2151 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2152 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2155 *yuv2plane1 = yuv2plane1_8_c;
2156 *yuv2planeX = yuv2planeX_8_c;
2157 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2158 *yuv2nv12cX = yuv2nv12cX_c;
2161 if(c->flags & SWS_FULL_CHR_H_INT) {
2162 switch (dstFormat) {
2165 *yuv2packedX = yuv2rgba32_full_X_c;
2167 #if CONFIG_SWSCALE_ALPHA
2169 *yuv2packedX = yuv2rgba32_full_X_c;
2171 #endif /* CONFIG_SWSCALE_ALPHA */
2173 *yuv2packedX = yuv2rgbx32_full_X_c;
2175 #endif /* !CONFIG_SMALL */
2179 *yuv2packedX = yuv2argb32_full_X_c;
2181 #if CONFIG_SWSCALE_ALPHA
2183 *yuv2packedX = yuv2argb32_full_X_c;
2185 #endif /* CONFIG_SWSCALE_ALPHA */
2187 *yuv2packedX = yuv2xrgb32_full_X_c;
2189 #endif /* !CONFIG_SMALL */
2193 *yuv2packedX = yuv2bgra32_full_X_c;
2195 #if CONFIG_SWSCALE_ALPHA
2197 *yuv2packedX = yuv2bgra32_full_X_c;
2199 #endif /* CONFIG_SWSCALE_ALPHA */
2201 *yuv2packedX = yuv2bgrx32_full_X_c;
2203 #endif /* !CONFIG_SMALL */
2207 *yuv2packedX = yuv2abgr32_full_X_c;
2209 #if CONFIG_SWSCALE_ALPHA
2211 *yuv2packedX = yuv2abgr32_full_X_c;
2213 #endif /* CONFIG_SWSCALE_ALPHA */
2215 *yuv2packedX = yuv2xbgr32_full_X_c;
2217 #endif /* !CONFIG_SMALL */
2220 *yuv2packedX = yuv2rgb24_full_X_c;
2223 *yuv2packedX = yuv2bgr24_full_X_c;
2227 switch (dstFormat) {
2228 case PIX_FMT_GRAY16BE:
2229 *yuv2packed1 = yuv2gray16BE_1_c;
2230 *yuv2packed2 = yuv2gray16BE_2_c;
2231 *yuv2packedX = yuv2gray16BE_X_c;
2233 case PIX_FMT_GRAY16LE:
2234 *yuv2packed1 = yuv2gray16LE_1_c;
2235 *yuv2packed2 = yuv2gray16LE_2_c;
2236 *yuv2packedX = yuv2gray16LE_X_c;
2238 case PIX_FMT_MONOWHITE:
2239 *yuv2packed1 = yuv2monowhite_1_c;
2240 *yuv2packed2 = yuv2monowhite_2_c;
2241 *yuv2packedX = yuv2monowhite_X_c;
2243 case PIX_FMT_MONOBLACK:
2244 *yuv2packed1 = yuv2monoblack_1_c;
2245 *yuv2packed2 = yuv2monoblack_2_c;
2246 *yuv2packedX = yuv2monoblack_X_c;
2248 case PIX_FMT_YUYV422:
2249 *yuv2packed1 = yuv2yuyv422_1_c;
2250 *yuv2packed2 = yuv2yuyv422_2_c;
2251 *yuv2packedX = yuv2yuyv422_X_c;
2253 case PIX_FMT_UYVY422:
2254 *yuv2packed1 = yuv2uyvy422_1_c;
2255 *yuv2packed2 = yuv2uyvy422_2_c;
2256 *yuv2packedX = yuv2uyvy422_X_c;
2258 case PIX_FMT_RGB48LE:
2259 *yuv2packed1 = yuv2rgb48le_1_c;
2260 *yuv2packed2 = yuv2rgb48le_2_c;
2261 *yuv2packedX = yuv2rgb48le_X_c;
2263 case PIX_FMT_RGB48BE:
2264 *yuv2packed1 = yuv2rgb48be_1_c;
2265 *yuv2packed2 = yuv2rgb48be_2_c;
2266 *yuv2packedX = yuv2rgb48be_X_c;
2268 case PIX_FMT_BGR48LE:
2269 *yuv2packed1 = yuv2bgr48le_1_c;
2270 *yuv2packed2 = yuv2bgr48le_2_c;
2271 *yuv2packedX = yuv2bgr48le_X_c;
2273 case PIX_FMT_BGR48BE:
2274 *yuv2packed1 = yuv2bgr48be_1_c;
2275 *yuv2packed2 = yuv2bgr48be_2_c;
2276 *yuv2packedX = yuv2bgr48be_X_c;
2281 *yuv2packed1 = yuv2rgb32_1_c;
2282 *yuv2packed2 = yuv2rgb32_2_c;
2283 *yuv2packedX = yuv2rgb32_X_c;
2285 #if CONFIG_SWSCALE_ALPHA
2287 *yuv2packed1 = yuv2rgba32_1_c;
2288 *yuv2packed2 = yuv2rgba32_2_c;
2289 *yuv2packedX = yuv2rgba32_X_c;
2291 #endif /* CONFIG_SWSCALE_ALPHA */
2293 *yuv2packed1 = yuv2rgbx32_1_c;
2294 *yuv2packed2 = yuv2rgbx32_2_c;
2295 *yuv2packedX = yuv2rgbx32_X_c;
2297 #endif /* !CONFIG_SMALL */
2299 case PIX_FMT_RGB32_1:
2300 case PIX_FMT_BGR32_1:
2302 *yuv2packed1 = yuv2rgb32_1_1_c;
2303 *yuv2packed2 = yuv2rgb32_1_2_c;
2304 *yuv2packedX = yuv2rgb32_1_X_c;
2306 #if CONFIG_SWSCALE_ALPHA
2308 *yuv2packed1 = yuv2rgba32_1_1_c;
2309 *yuv2packed2 = yuv2rgba32_1_2_c;
2310 *yuv2packedX = yuv2rgba32_1_X_c;
2312 #endif /* CONFIG_SWSCALE_ALPHA */
2314 *yuv2packed1 = yuv2rgbx32_1_1_c;
2315 *yuv2packed2 = yuv2rgbx32_1_2_c;
2316 *yuv2packedX = yuv2rgbx32_1_X_c;
2318 #endif /* !CONFIG_SMALL */
2321 *yuv2packed1 = yuv2rgb24_1_c;
2322 *yuv2packed2 = yuv2rgb24_2_c;
2323 *yuv2packedX = yuv2rgb24_X_c;
2326 *yuv2packed1 = yuv2bgr24_1_c;
2327 *yuv2packed2 = yuv2bgr24_2_c;
2328 *yuv2packedX = yuv2bgr24_X_c;
2330 case PIX_FMT_RGB565LE:
2331 case PIX_FMT_RGB565BE:
2332 case PIX_FMT_BGR565LE:
2333 case PIX_FMT_BGR565BE:
2334 *yuv2packed1 = yuv2rgb16_1_c;
2335 *yuv2packed2 = yuv2rgb16_2_c;
2336 *yuv2packedX = yuv2rgb16_X_c;
2338 case PIX_FMT_RGB555LE:
2339 case PIX_FMT_RGB555BE:
2340 case PIX_FMT_BGR555LE:
2341 case PIX_FMT_BGR555BE:
2342 *yuv2packed1 = yuv2rgb15_1_c;
2343 *yuv2packed2 = yuv2rgb15_2_c;
2344 *yuv2packedX = yuv2rgb15_X_c;
2346 case PIX_FMT_RGB444LE:
2347 case PIX_FMT_RGB444BE:
2348 case PIX_FMT_BGR444LE:
2349 case PIX_FMT_BGR444BE:
2350 *yuv2packed1 = yuv2rgb12_1_c;
2351 *yuv2packed2 = yuv2rgb12_2_c;
2352 *yuv2packedX = yuv2rgb12_X_c;
2356 *yuv2packed1 = yuv2rgb8_1_c;
2357 *yuv2packed2 = yuv2rgb8_2_c;
2358 *yuv2packedX = yuv2rgb8_X_c;
2362 *yuv2packed1 = yuv2rgb4_1_c;
2363 *yuv2packed2 = yuv2rgb4_2_c;
2364 *yuv2packedX = yuv2rgb4_X_c;
2366 case PIX_FMT_RGB4_BYTE:
2367 case PIX_FMT_BGR4_BYTE:
2368 *yuv2packed1 = yuv2rgb4b_1_c;
2369 *yuv2packed2 = yuv2rgb4b_2_c;
2370 *yuv2packedX = yuv2rgb4b_X_c;
2376 #define DEBUG_SWSCALE_BUFFERS 0
2377 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2379 static int swScale(SwsContext *c, const uint8_t* src[],
2380 int srcStride[], int srcSliceY,
2381 int srcSliceH, uint8_t* dst[], int dstStride[])
2383 /* load a few things into local vars to make the code more readable? and faster */
2384 const int srcW= c->srcW;
2385 const int dstW= c->dstW;
2386 const int dstH= c->dstH;
2387 const int chrDstW= c->chrDstW;
2388 const int chrSrcW= c->chrSrcW;
2389 const int lumXInc= c->lumXInc;
2390 const int chrXInc= c->chrXInc;
2391 const enum PixelFormat dstFormat= c->dstFormat;
2392 const int flags= c->flags;
2393 int16_t *vLumFilterPos= c->vLumFilterPos;
2394 int16_t *vChrFilterPos= c->vChrFilterPos;
2395 int16_t *hLumFilterPos= c->hLumFilterPos;
2396 int16_t *hChrFilterPos= c->hChrFilterPos;
2397 int16_t *vLumFilter= c->vLumFilter;
2398 int16_t *vChrFilter= c->vChrFilter;
2399 int16_t *hLumFilter= c->hLumFilter;
2400 int16_t *hChrFilter= c->hChrFilter;
2401 int32_t *lumMmxFilter= c->lumMmxFilter;
2402 int32_t *chrMmxFilter= c->chrMmxFilter;
2403 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2404 const int vLumFilterSize= c->vLumFilterSize;
2405 const int vChrFilterSize= c->vChrFilterSize;
2406 const int hLumFilterSize= c->hLumFilterSize;
2407 const int hChrFilterSize= c->hChrFilterSize;
2408 int16_t **lumPixBuf= c->lumPixBuf;
2409 int16_t **chrUPixBuf= c->chrUPixBuf;
2410 int16_t **chrVPixBuf= c->chrVPixBuf;
2411 int16_t **alpPixBuf= c->alpPixBuf;
2412 const int vLumBufSize= c->vLumBufSize;
2413 const int vChrBufSize= c->vChrBufSize;
2414 uint8_t *formatConvBuffer= c->formatConvBuffer;
2415 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2416 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2418 uint32_t *pal=c->pal_yuv;
2419 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2420 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2421 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2422 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2423 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2424 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2425 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2427 /* vars which will change and which we need to store back in the context */
2429 int lumBufIndex= c->lumBufIndex;
2430 int chrBufIndex= c->chrBufIndex;
2431 int lastInLumBuf= c->lastInLumBuf;
2432 int lastInChrBuf= c->lastInChrBuf;
2434 if (isPacked(c->srcFormat)) {
2442 srcStride[3]= srcStride[0];
2444 srcStride[1]<<= c->vChrDrop;
2445 srcStride[2]<<= c->vChrDrop;
2447 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2448 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2449 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2450 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2451 srcSliceY, srcSliceH, dstY, dstH);
2452 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2453 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2455 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2456 static int warnedAlready=0; //FIXME move this into the context perhaps
2457 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2458 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2459 " ->cannot do aligned memory accesses anymore\n");
2464 /* Note the user might start scaling the picture in the middle so this
2465 will not get executed. This is not really intended but works
2466 currently, so people might do it. */
2467 if (srcSliceY ==0) {
2475 if (!should_dither) {
2476 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2480 for (;dstY < dstH; dstY++) {
2481 const int chrDstY= dstY>>c->chrDstVSubSample;
2482 uint8_t *dest[4] = {
2483 dst[0] + dstStride[0] * dstY,
2484 dst[1] + dstStride[1] * chrDstY,
2485 dst[2] + dstStride[2] * chrDstY,
2486 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2489 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2490 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2491 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2492 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2493 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2494 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2497 //handle holes (FAST_BILINEAR & weird filters)
2498 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2499 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2500 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2501 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2503 DEBUG_BUFFERS("dstY: %d\n", dstY);
2504 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2505 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2506 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2507 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2509 // Do we have enough lines in this slice to output the dstY line
2510 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2512 if (!enough_lines) {
2513 lastLumSrcY = srcSliceY + srcSliceH - 1;
2514 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2515 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2516 lastLumSrcY, lastChrSrcY);
2519 //Do horizontal scaling
2520 while(lastInLumBuf < lastLumSrcY) {
2521 const uint8_t *src1[4] = {
2522 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2523 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2524 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2525 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2528 assert(lumBufIndex < 2*vLumBufSize);
2529 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2530 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2531 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2532 hLumFilter, hLumFilterPos, hLumFilterSize,
2535 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2536 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2537 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2541 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2542 lumBufIndex, lastInLumBuf);
2544 while(lastInChrBuf < lastChrSrcY) {
2545 const uint8_t *src1[4] = {
2546 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2547 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2548 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2549 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2552 assert(chrBufIndex < 2*vChrBufSize);
2553 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2554 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2555 //FIXME replace parameters through context struct (some at least)
2557 if (c->needs_hcscale)
2558 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2559 chrDstW, src1, chrSrcW, chrXInc,
2560 hChrFilter, hChrFilterPos, hChrFilterSize,
2561 formatConvBuffer, pal);
2563 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2564 chrBufIndex, lastInChrBuf);
2566 //wrap buf index around to stay inside the ring buffer
2567 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2568 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2570 break; //we can't output a dstY line so let's try with the next slice
2573 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2575 if (should_dither) {
2576 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2577 c->lumDither8 = dither_8x8_128[dstY & 7];
2579 if (dstY >= dstH-2) {
2580 // hmm looks like we can't use MMX here without overwriting this array's tail
2581 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2582 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2586 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2587 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2588 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2589 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2590 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2591 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2593 if (vLumFilterSize == 1) {
2594 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2596 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2597 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2600 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2602 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2603 } else if (vChrFilterSize == 1) {
2604 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2605 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2607 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2608 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2609 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2610 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2614 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2615 if (vLumFilterSize == 1) {
2616 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2618 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2619 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2623 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2624 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2625 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2626 int chrAlpha = vChrFilter[2 * dstY + 1];
2627 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2628 alpPixBuf ? *alpSrcPtr : NULL,
2629 dest[0], dstW, chrAlpha, dstY);
2630 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2631 int lumAlpha = vLumFilter[2 * dstY + 1];
2632 int chrAlpha = vChrFilter[2 * dstY + 1];
2634 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2636 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2637 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2638 alpPixBuf ? alpSrcPtr : NULL,
2639 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2640 } else { //general RGB
2641 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2642 lumSrcPtr, vLumFilterSize,
2643 vChrFilter + dstY * vChrFilterSize,
2644 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2645 alpSrcPtr, dest[0], dstW, dstY);
2651 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2652 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2655 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2656 __asm__ volatile("sfence":::"memory");
2660 /* store changed local vars back in the context */
2662 c->lumBufIndex= lumBufIndex;
2663 c->chrBufIndex= chrBufIndex;
2664 c->lastInLumBuf= lastInLumBuf;
2665 c->lastInChrBuf= lastInChrBuf;
2667 return dstY - lastDstY;
2670 static av_cold void sws_init_swScale_c(SwsContext *c)
2672 enum PixelFormat srcFormat = c->srcFormat;
2674 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2675 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2678 c->chrToYV12 = NULL;
2680 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2681 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2682 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2683 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2687 case PIX_FMT_BGR4_BYTE:
2688 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2689 case PIX_FMT_GBRP9LE:
2690 case PIX_FMT_GBRP10LE:
2691 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2692 case PIX_FMT_GBRP9BE:
2693 case PIX_FMT_GBRP10BE:
2694 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2695 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2697 case PIX_FMT_YUV444P9LE:
2698 case PIX_FMT_YUV422P9LE:
2699 case PIX_FMT_YUV420P9LE:
2700 case PIX_FMT_YUV422P10LE:
2701 case PIX_FMT_YUV444P10LE:
2702 case PIX_FMT_YUV420P10LE:
2703 case PIX_FMT_YUV420P16LE:
2704 case PIX_FMT_YUV422P16LE:
2705 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2707 case PIX_FMT_YUV444P9BE:
2708 case PIX_FMT_YUV422P9BE:
2709 case PIX_FMT_YUV420P9BE:
2710 case PIX_FMT_YUV444P10BE:
2711 case PIX_FMT_YUV422P10BE:
2712 case PIX_FMT_YUV420P10BE:
2713 case PIX_FMT_YUV420P16BE:
2714 case PIX_FMT_YUV422P16BE:
2715 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2718 if (c->chrSrcHSubSample) {
2720 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2721 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2722 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2723 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2724 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2725 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2726 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2727 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2728 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2729 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2730 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2731 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2732 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2733 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2734 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2735 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2736 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2737 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2741 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2742 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2743 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2744 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2745 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2746 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2747 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2748 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2749 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2750 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2751 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2752 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2753 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2754 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2755 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2756 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2757 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2758 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2762 c->lumToYV12 = NULL;
2763 c->alpToYV12 = NULL;
2764 switch (srcFormat) {
2765 case PIX_FMT_GBRP9LE:
2766 case PIX_FMT_GBRP10LE:
2767 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2768 case PIX_FMT_GBRP9BE:
2769 case PIX_FMT_GBRP10BE:
2770 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2771 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2773 case PIX_FMT_YUV444P9LE:
2774 case PIX_FMT_YUV422P9LE:
2775 case PIX_FMT_YUV420P9LE:
2776 case PIX_FMT_YUV444P10LE:
2777 case PIX_FMT_YUV422P10LE:
2778 case PIX_FMT_YUV420P10LE:
2779 case PIX_FMT_YUV420P16LE:
2780 case PIX_FMT_YUV422P16LE:
2781 case PIX_FMT_YUV444P16LE:
2782 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2784 case PIX_FMT_YUV444P9BE:
2785 case PIX_FMT_YUV422P9BE:
2786 case PIX_FMT_YUV420P9BE:
2787 case PIX_FMT_YUV444P10BE:
2788 case PIX_FMT_YUV422P10BE:
2789 case PIX_FMT_YUV420P10BE:
2790 case PIX_FMT_YUV420P16BE:
2791 case PIX_FMT_YUV422P16BE:
2792 case PIX_FMT_YUV444P16BE:
2793 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2795 case PIX_FMT_YUYV422 :
2796 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2797 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2798 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2799 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2800 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2801 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2802 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2803 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2804 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2805 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2806 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2807 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2811 case PIX_FMT_BGR4_BYTE:
2812 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2813 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2814 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2815 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2816 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2817 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2818 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2819 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2820 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2821 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2822 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2825 switch (srcFormat) {
2827 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2829 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2830 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2834 if (c->srcBpc == 8) {
2835 if (c->dstBpc <= 10) {
2836 c->hyScale = c->hcScale = hScale8To15_c;
2837 if (c->flags & SWS_FAST_BILINEAR) {
2838 c->hyscale_fast = hyscale_fast_c;
2839 c->hcscale_fast = hcscale_fast_c;
2842 c->hyScale = c->hcScale = hScale8To19_c;
2845 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2848 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2849 if (c->dstBpc <= 10) {
2851 c->lumConvertRange = lumRangeFromJpeg_c;
2852 c->chrConvertRange = chrRangeFromJpeg_c;
2854 c->lumConvertRange = lumRangeToJpeg_c;
2855 c->chrConvertRange = chrRangeToJpeg_c;
2859 c->lumConvertRange = lumRangeFromJpeg16_c;
2860 c->chrConvertRange = chrRangeFromJpeg16_c;
2862 c->lumConvertRange = lumRangeToJpeg16_c;
2863 c->chrConvertRange = chrRangeToJpeg16_c;
2868 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2869 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2870 c->needs_hcscale = 1;
2873 SwsFunc ff_getSwsFunc(SwsContext *c)
2875 sws_init_swScale_c(c);
2878 ff_sws_init_swScale_mmx(c);
2880 ff_sws_init_swScale_altivec(c);