2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 #define output_pixel(pos, val, bias, signedness) \
200 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
202 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
205 static av_always_inline void
206 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
207 int big_endian, int output_bits)
210 int shift = 19 - output_bits;
212 for (i = 0; i < dstW; i++) {
213 int val = src[i] + (1 << (shift - 1));
214 output_pixel(&dest[i], val, 0, uint);
218 static av_always_inline void
219 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
220 const int32_t **src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 + 16 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits);
230 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
231 * filters (or anything with negative coeffs, the range can be slightly
232 * wider in both directions. To account for this overflow, we subtract
233 * a constant so it always fits in the signed range (assuming a
234 * reasonable filterSize), and re-add that at the end. */
236 for (j = 0; j < filterSize; j++)
237 val += src[j][i] * filter[j];
239 output_pixel(&dest[i], val, 0x8000, int);
245 #define output_pixel(pos, val) \
247 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
249 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
252 static av_always_inline void
253 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
254 int big_endian, int output_bits)
257 int shift = 15 - output_bits;
259 for (i = 0; i < dstW; i++) {
260 int val = src[i] + (1 << (shift - 1));
261 output_pixel(&dest[i], val);
265 static av_always_inline void
266 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
267 const int16_t **src, uint16_t *dest, int dstW,
268 int big_endian, int output_bits)
271 int shift = 11 + 16 - output_bits;
273 for (i = 0; i < dstW; i++) {
274 int val = 1 << (26-output_bits);
277 for (j = 0; j < filterSize; j++)
278 val += src[j][i] * filter[j];
280 output_pixel(&dest[i], val);
286 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
287 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
288 uint8_t *dest, int dstW, \
289 const uint8_t *dither, int offset)\
291 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
292 (uint16_t *) dest, dstW, is_be, bits); \
294 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
295 const int16_t **src, uint8_t *dest, int dstW, \
296 const uint8_t *dither, int offset)\
298 yuv2planeX_## template_size ## _c_template(filter, \
299 filterSize, (const typeX_t **) src, \
300 (uint16_t *) dest, dstW, is_be, bits); \
302 yuv2NBPS( 9, BE, 1, 10, int16_t);
303 yuv2NBPS( 9, LE, 0, 10, int16_t);
304 yuv2NBPS(10, BE, 1, 10, int16_t);
305 yuv2NBPS(10, LE, 0, 10, int16_t);
306 yuv2NBPS(16, BE, 1, 16, int32_t);
307 yuv2NBPS(16, LE, 0, 16, int32_t);
309 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
310 const int16_t **src, uint8_t *dest, int dstW,
311 const uint8_t *dither, int offset)
314 for (i=0; i<dstW; i++) {
315 int val = dither[(i + offset) & 7] << 12;
317 for (j=0; j<filterSize; j++)
318 val += src[j][i] * filter[j];
320 dest[i]= av_clip_uint8(val>>19);
324 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
325 const uint8_t *dither, int offset)
328 for (i=0; i<dstW; i++) {
329 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
330 dest[i]= av_clip_uint8(val);
334 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
335 const int16_t **chrUSrc, const int16_t **chrVSrc,
336 uint8_t *dest, int chrDstW)
338 enum PixelFormat dstFormat = c->dstFormat;
339 const uint8_t *chrDither = c->chrDither8;
342 if (dstFormat == PIX_FMT_NV12)
343 for (i=0; i<chrDstW; i++) {
344 int u = chrDither[i & 7] << 12;
345 int v = chrDither[(i + 3) & 7] << 12;
347 for (j=0; j<chrFilterSize; j++) {
348 u += chrUSrc[j][i] * chrFilter[j];
349 v += chrVSrc[j][i] * chrFilter[j];
352 dest[2*i]= av_clip_uint8(u>>19);
353 dest[2*i+1]= av_clip_uint8(v>>19);
356 for (i=0; i<chrDstW; i++) {
357 int u = chrDither[i & 7] << 12;
358 int v = chrDither[(i + 3) & 7] << 12;
360 for (j=0; j<chrFilterSize; j++) {
361 u += chrUSrc[j][i] * chrFilter[j];
362 v += chrVSrc[j][i] * chrFilter[j];
365 dest[2*i]= av_clip_uint8(v>>19);
366 dest[2*i+1]= av_clip_uint8(u>>19);
370 #define output_pixel(pos, val) \
371 if (target == PIX_FMT_GRAY16BE) { \
377 static av_always_inline void
378 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
379 const int32_t **lumSrc, int lumFilterSize,
380 const int16_t *chrFilter, const int32_t **chrUSrc,
381 const int32_t **chrVSrc, int chrFilterSize,
382 const int32_t **alpSrc, uint16_t *dest, int dstW,
383 int y, enum PixelFormat target)
387 for (i = 0; i < (dstW >> 1); i++) {
392 for (j = 0; j < lumFilterSize; j++) {
393 Y1 += lumSrc[j][i * 2] * lumFilter[j];
394 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
398 if ((Y1 | Y2) & 0x10000) {
399 Y1 = av_clip_uint16(Y1);
400 Y2 = av_clip_uint16(Y2);
402 output_pixel(&dest[i * 2 + 0], Y1);
403 output_pixel(&dest[i * 2 + 1], Y2);
407 static av_always_inline void
408 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
409 const int32_t *ubuf[2], const int32_t *vbuf[2],
410 const int32_t *abuf[2], uint16_t *dest, int dstW,
411 int yalpha, int uvalpha, int y,
412 enum PixelFormat target)
414 int yalpha1 = 4095 - yalpha;
416 const int32_t *buf0 = buf[0], *buf1 = buf[1];
418 for (i = 0; i < (dstW >> 1); i++) {
419 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
420 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
422 output_pixel(&dest[i * 2 + 0], Y1);
423 output_pixel(&dest[i * 2 + 1], Y2);
427 static av_always_inline void
428 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
429 const int32_t *ubuf[2], const int32_t *vbuf[2],
430 const int32_t *abuf0, uint16_t *dest, int dstW,
431 int uvalpha, int y, enum PixelFormat target)
435 for (i = 0; i < (dstW >> 1); i++) {
436 int Y1 = buf0[i * 2 ] << 1;
437 int Y2 = buf0[i * 2 + 1] << 1;
439 output_pixel(&dest[i * 2 + 0], Y1);
440 output_pixel(&dest[i * 2 + 1], Y2);
446 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
447 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
448 const int16_t **_lumSrc, int lumFilterSize, \
449 const int16_t *chrFilter, const int16_t **_chrUSrc, \
450 const int16_t **_chrVSrc, int chrFilterSize, \
451 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
454 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
455 **chrUSrc = (const int32_t **) _chrUSrc, \
456 **chrVSrc = (const int32_t **) _chrVSrc, \
457 **alpSrc = (const int32_t **) _alpSrc; \
458 uint16_t *dest = (uint16_t *) _dest; \
459 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
460 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
461 alpSrc, dest, dstW, y, fmt); \
464 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
465 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
466 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
467 int yalpha, int uvalpha, int y) \
469 const int32_t **buf = (const int32_t **) _buf, \
470 **ubuf = (const int32_t **) _ubuf, \
471 **vbuf = (const int32_t **) _vbuf, \
472 **abuf = (const int32_t **) _abuf; \
473 uint16_t *dest = (uint16_t *) _dest; \
474 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
475 dest, dstW, yalpha, uvalpha, y, fmt); \
478 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
479 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
480 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
481 int uvalpha, int y) \
483 const int32_t *buf0 = (const int32_t *) _buf0, \
484 **ubuf = (const int32_t **) _ubuf, \
485 **vbuf = (const int32_t **) _vbuf, \
486 *abuf0 = (const int32_t *) _abuf0; \
487 uint16_t *dest = (uint16_t *) _dest; \
488 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
489 dstW, uvalpha, y, fmt); \
492 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
493 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
495 #define output_pixel(pos, acc) \
496 if (target == PIX_FMT_MONOBLACK) { \
502 static av_always_inline void
503 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
504 const int16_t **lumSrc, int lumFilterSize,
505 const int16_t *chrFilter, const int16_t **chrUSrc,
506 const int16_t **chrVSrc, int chrFilterSize,
507 const int16_t **alpSrc, uint8_t *dest, int dstW,
508 int y, enum PixelFormat target)
510 const uint8_t * const d128=dither_8x8_220[y&7];
511 uint8_t *g = c->table_gU[128] + c->table_gV[128];
515 for (i = 0; i < dstW - 1; i += 2) {
520 for (j = 0; j < lumFilterSize; j++) {
521 Y1 += lumSrc[j][i] * lumFilter[j];
522 Y2 += lumSrc[j][i+1] * lumFilter[j];
526 if ((Y1 | Y2) & 0x100) {
527 Y1 = av_clip_uint8(Y1);
528 Y2 = av_clip_uint8(Y2);
530 acc += acc + g[Y1 + d128[(i + 0) & 7]];
531 acc += acc + g[Y2 + d128[(i + 1) & 7]];
533 output_pixel(*dest++, acc);
538 static av_always_inline void
539 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
540 const int16_t *ubuf[2], const int16_t *vbuf[2],
541 const int16_t *abuf[2], uint8_t *dest, int dstW,
542 int yalpha, int uvalpha, int y,
543 enum PixelFormat target)
545 const int16_t *buf0 = buf[0], *buf1 = buf[1];
546 const uint8_t * const d128 = dither_8x8_220[y & 7];
547 uint8_t *g = c->table_gU[128] + c->table_gV[128];
548 int yalpha1 = 4095 - yalpha;
551 for (i = 0; i < dstW - 7; i += 8) {
552 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
553 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
554 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
555 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
556 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
557 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
558 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
559 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
560 output_pixel(*dest++, acc);
564 static av_always_inline void
565 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
566 const int16_t *ubuf[2], const int16_t *vbuf[2],
567 const int16_t *abuf0, uint8_t *dest, int dstW,
568 int uvalpha, int y, enum PixelFormat target)
570 const uint8_t * const d128 = dither_8x8_220[y & 7];
571 uint8_t *g = c->table_gU[128] + c->table_gV[128];
574 for (i = 0; i < dstW - 7; i += 8) {
575 int acc = g[(buf0[i ] >> 7) + d128[0]];
576 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
577 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
578 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
579 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
580 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
581 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
582 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
583 output_pixel(*dest++, acc);
589 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
590 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
591 const int16_t **lumSrc, int lumFilterSize, \
592 const int16_t *chrFilter, const int16_t **chrUSrc, \
593 const int16_t **chrVSrc, int chrFilterSize, \
594 const int16_t **alpSrc, uint8_t *dest, int dstW, \
597 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
598 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
599 alpSrc, dest, dstW, y, fmt); \
602 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
603 const int16_t *ubuf[2], const int16_t *vbuf[2], \
604 const int16_t *abuf[2], uint8_t *dest, int dstW, \
605 int yalpha, int uvalpha, int y) \
607 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
608 dest, dstW, yalpha, uvalpha, y, fmt); \
611 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
612 const int16_t *ubuf[2], const int16_t *vbuf[2], \
613 const int16_t *abuf0, uint8_t *dest, int dstW, \
614 int uvalpha, int y) \
616 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
617 abuf0, dest, dstW, uvalpha, \
621 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
622 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
624 #define output_pixels(pos, Y1, U, Y2, V) \
625 if (target == PIX_FMT_YUYV422) { \
626 dest[pos + 0] = Y1; \
628 dest[pos + 2] = Y2; \
632 dest[pos + 1] = Y1; \
634 dest[pos + 3] = Y2; \
637 static av_always_inline void
638 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
639 const int16_t **lumSrc, int lumFilterSize,
640 const int16_t *chrFilter, const int16_t **chrUSrc,
641 const int16_t **chrVSrc, int chrFilterSize,
642 const int16_t **alpSrc, uint8_t *dest, int dstW,
643 int y, enum PixelFormat target)
647 for (i = 0; i < (dstW >> 1); i++) {
654 for (j = 0; j < lumFilterSize; j++) {
655 Y1 += lumSrc[j][i * 2] * lumFilter[j];
656 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
658 for (j = 0; j < chrFilterSize; j++) {
659 U += chrUSrc[j][i] * chrFilter[j];
660 V += chrVSrc[j][i] * chrFilter[j];
666 if ((Y1 | Y2 | U | V) & 0x100) {
667 Y1 = av_clip_uint8(Y1);
668 Y2 = av_clip_uint8(Y2);
669 U = av_clip_uint8(U);
670 V = av_clip_uint8(V);
672 output_pixels(4*i, Y1, U, Y2, V);
676 static av_always_inline void
677 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
678 const int16_t *ubuf[2], const int16_t *vbuf[2],
679 const int16_t *abuf[2], uint8_t *dest, int dstW,
680 int yalpha, int uvalpha, int y,
681 enum PixelFormat target)
683 const int16_t *buf0 = buf[0], *buf1 = buf[1],
684 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
685 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
686 int yalpha1 = 4095 - yalpha;
687 int uvalpha1 = 4095 - uvalpha;
690 for (i = 0; i < (dstW >> 1); i++) {
691 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
692 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
693 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
694 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
696 output_pixels(i * 4, Y1, U, Y2, V);
700 static av_always_inline void
701 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
702 const int16_t *ubuf[2], const int16_t *vbuf[2],
703 const int16_t *abuf0, uint8_t *dest, int dstW,
704 int uvalpha, int y, enum PixelFormat target)
706 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
707 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
710 if (uvalpha < 2048) {
711 for (i = 0; i < (dstW >> 1); i++) {
712 int Y1 = buf0[i * 2] >> 7;
713 int Y2 = buf0[i * 2 + 1] >> 7;
714 int U = ubuf1[i] >> 7;
715 int V = vbuf1[i] >> 7;
717 output_pixels(i * 4, Y1, U, Y2, V);
720 for (i = 0; i < (dstW >> 1); i++) {
721 int Y1 = buf0[i * 2] >> 7;
722 int Y2 = buf0[i * 2 + 1] >> 7;
723 int U = (ubuf0[i] + ubuf1[i]) >> 8;
724 int V = (vbuf0[i] + vbuf1[i]) >> 8;
726 output_pixels(i * 4, Y1, U, Y2, V);
733 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
734 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
736 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
737 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
738 #define output_pixel(pos, val) \
739 if (isBE(target)) { \
745 static av_always_inline void
746 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
747 const int32_t **lumSrc, int lumFilterSize,
748 const int16_t *chrFilter, const int32_t **chrUSrc,
749 const int32_t **chrVSrc, int chrFilterSize,
750 const int32_t **alpSrc, uint16_t *dest, int dstW,
751 int y, enum PixelFormat target)
755 for (i = 0; i < (dstW >> 1); i++) {
759 int U = -128 << 23; // 19
763 for (j = 0; j < lumFilterSize; j++) {
764 Y1 += lumSrc[j][i * 2] * lumFilter[j];
765 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
767 for (j = 0; j < chrFilterSize; j++) {
768 U += chrUSrc[j][i] * chrFilter[j];
769 V += chrVSrc[j][i] * chrFilter[j];
772 // 8bit: 12+15=27; 16-bit: 12+19=31
778 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
779 Y1 -= c->yuv2rgb_y_offset;
780 Y2 -= c->yuv2rgb_y_offset;
781 Y1 *= c->yuv2rgb_y_coeff;
782 Y2 *= c->yuv2rgb_y_coeff;
785 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
787 R = V * c->yuv2rgb_v2r_coeff;
788 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
789 B = U * c->yuv2rgb_u2b_coeff;
791 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
792 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
793 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
794 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
795 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
796 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
797 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
802 static av_always_inline void
803 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
804 const int32_t *ubuf[2], const int32_t *vbuf[2],
805 const int32_t *abuf[2], uint16_t *dest, int dstW,
806 int yalpha, int uvalpha, int y,
807 enum PixelFormat target)
809 const int32_t *buf0 = buf[0], *buf1 = buf[1],
810 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
811 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 int yalpha1 = 4095 - yalpha;
813 int uvalpha1 = 4095 - uvalpha;
816 for (i = 0; i < (dstW >> 1); i++) {
817 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
818 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
819 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
820 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
823 Y1 -= c->yuv2rgb_y_offset;
824 Y2 -= c->yuv2rgb_y_offset;
825 Y1 *= c->yuv2rgb_y_coeff;
826 Y2 *= c->yuv2rgb_y_coeff;
830 R = V * c->yuv2rgb_v2r_coeff;
831 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
832 B = U * c->yuv2rgb_u2b_coeff;
834 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
835 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
836 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
837 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
838 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
839 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
844 static av_always_inline void
845 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
846 const int32_t *ubuf[2], const int32_t *vbuf[2],
847 const int32_t *abuf0, uint16_t *dest, int dstW,
848 int uvalpha, int y, enum PixelFormat target)
850 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
851 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
854 if (uvalpha < 2048) {
855 for (i = 0; i < (dstW >> 1); i++) {
856 int Y1 = (buf0[i * 2] ) >> 2;
857 int Y2 = (buf0[i * 2 + 1]) >> 2;
858 int U = (ubuf0[i] + (-128 << 11)) >> 2;
859 int V = (vbuf0[i] + (-128 << 11)) >> 2;
862 Y1 -= c->yuv2rgb_y_offset;
863 Y2 -= c->yuv2rgb_y_offset;
864 Y1 *= c->yuv2rgb_y_coeff;
865 Y2 *= c->yuv2rgb_y_coeff;
869 R = V * c->yuv2rgb_v2r_coeff;
870 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
871 B = U * c->yuv2rgb_u2b_coeff;
873 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
874 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
875 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
876 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
877 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
878 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 for (i = 0; i < (dstW >> 1); i++) {
883 int Y1 = (buf0[i * 2] ) >> 2;
884 int Y2 = (buf0[i * 2 + 1]) >> 2;
885 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
886 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
889 Y1 -= c->yuv2rgb_y_offset;
890 Y2 -= c->yuv2rgb_y_offset;
891 Y1 *= c->yuv2rgb_y_coeff;
892 Y2 *= c->yuv2rgb_y_coeff;
896 R = V * c->yuv2rgb_v2r_coeff;
897 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
898 B = U * c->yuv2rgb_u2b_coeff;
900 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
901 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
902 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
903 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
904 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
905 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
915 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
916 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
917 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
918 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
920 static av_always_inline void
921 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
922 int U, int V, int A1, int A2,
923 const void *_r, const void *_g, const void *_b, int y,
924 enum PixelFormat target, int hasAlpha)
926 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
927 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
928 uint32_t *dest = (uint32_t *) _dest;
929 const uint32_t *r = (const uint32_t *) _r;
930 const uint32_t *g = (const uint32_t *) _g;
931 const uint32_t *b = (const uint32_t *) _b;
934 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
936 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
937 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
940 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
942 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
943 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
945 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
946 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
949 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
950 uint8_t *dest = (uint8_t *) _dest;
951 const uint8_t *r = (const uint8_t *) _r;
952 const uint8_t *g = (const uint8_t *) _g;
953 const uint8_t *b = (const uint8_t *) _b;
955 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
956 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
957 dest[i * 6 + 0] = r_b[Y1];
958 dest[i * 6 + 1] = g[Y1];
959 dest[i * 6 + 2] = b_r[Y1];
960 dest[i * 6 + 3] = r_b[Y2];
961 dest[i * 6 + 4] = g[Y2];
962 dest[i * 6 + 5] = b_r[Y2];
965 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
966 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
967 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
968 uint16_t *dest = (uint16_t *) _dest;
969 const uint16_t *r = (const uint16_t *) _r;
970 const uint16_t *g = (const uint16_t *) _g;
971 const uint16_t *b = (const uint16_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
974 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
975 dr1 = dither_2x2_8[ y & 1 ][0];
976 dg1 = dither_2x2_4[ y & 1 ][0];
977 db1 = dither_2x2_8[(y & 1) ^ 1][0];
978 dr2 = dither_2x2_8[ y & 1 ][1];
979 dg2 = dither_2x2_4[ y & 1 ][1];
980 db2 = dither_2x2_8[(y & 1) ^ 1][1];
981 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
982 dr1 = dither_2x2_8[ y & 1 ][0];
983 dg1 = dither_2x2_8[ y & 1 ][1];
984 db1 = dither_2x2_8[(y & 1) ^ 1][0];
985 dr2 = dither_2x2_8[ y & 1 ][1];
986 dg2 = dither_2x2_8[ y & 1 ][0];
987 db2 = dither_2x2_8[(y & 1) ^ 1][1];
989 dr1 = dither_4x4_16[ y & 3 ][0];
990 dg1 = dither_4x4_16[ y & 3 ][1];
991 db1 = dither_4x4_16[(y & 3) ^ 3][0];
992 dr2 = dither_4x4_16[ y & 3 ][1];
993 dg2 = dither_4x4_16[ y & 3 ][0];
994 db2 = dither_4x4_16[(y & 3) ^ 3][1];
997 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
998 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
999 } else /* 8/4-bit */ {
1000 uint8_t *dest = (uint8_t *) _dest;
1001 const uint8_t *r = (const uint8_t *) _r;
1002 const uint8_t *g = (const uint8_t *) _g;
1003 const uint8_t *b = (const uint8_t *) _b;
1004 int dr1, dg1, db1, dr2, dg2, db2;
1006 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1007 const uint8_t * const d64 = dither_8x8_73[y & 7];
1008 const uint8_t * const d32 = dither_8x8_32[y & 7];
1009 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1010 db1 = d64[(i * 2 + 0) & 7];
1011 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1012 db2 = d64[(i * 2 + 1) & 7];
1014 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1015 const uint8_t * const d128 = dither_8x8_220[y & 7];
1016 dr1 = db1 = d128[(i * 2 + 0) & 7];
1017 dg1 = d64[(i * 2 + 0) & 7];
1018 dr2 = db2 = d128[(i * 2 + 1) & 7];
1019 dg2 = d64[(i * 2 + 1) & 7];
1022 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1023 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1024 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1026 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1027 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1032 static av_always_inline void
1033 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1034 const int16_t **lumSrc, int lumFilterSize,
1035 const int16_t *chrFilter, const int16_t **chrUSrc,
1036 const int16_t **chrVSrc, int chrFilterSize,
1037 const int16_t **alpSrc, uint8_t *dest, int dstW,
1038 int y, enum PixelFormat target, int hasAlpha)
1042 for (i = 0; i < (dstW >> 1); i++) {
1048 int av_unused A1, A2;
1049 const void *r, *g, *b;
1051 for (j = 0; j < lumFilterSize; j++) {
1052 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1053 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1055 for (j = 0; j < chrFilterSize; j++) {
1056 U += chrUSrc[j][i] * chrFilter[j];
1057 V += chrVSrc[j][i] * chrFilter[j];
1063 if ((Y1 | Y2 | U | V) & 0x100) {
1064 Y1 = av_clip_uint8(Y1);
1065 Y2 = av_clip_uint8(Y2);
1066 U = av_clip_uint8(U);
1067 V = av_clip_uint8(V);
1072 for (j = 0; j < lumFilterSize; j++) {
1073 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1074 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1078 if ((A1 | A2) & 0x100) {
1079 A1 = av_clip_uint8(A1);
1080 A2 = av_clip_uint8(A2);
1084 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1086 g = (c->table_gU[U] + c->table_gV[V]);
1089 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1090 r, g, b, y, target, hasAlpha);
1094 static av_always_inline void
1095 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1096 const int16_t *ubuf[2], const int16_t *vbuf[2],
1097 const int16_t *abuf[2], uint8_t *dest, int dstW,
1098 int yalpha, int uvalpha, int y,
1099 enum PixelFormat target, int hasAlpha)
1101 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1102 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1103 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1104 *abuf0 = hasAlpha ? abuf[0] : NULL,
1105 *abuf1 = hasAlpha ? abuf[1] : NULL;
1106 int yalpha1 = 4095 - yalpha;
1107 int uvalpha1 = 4095 - uvalpha;
1110 for (i = 0; i < (dstW >> 1); i++) {
1111 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1112 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1113 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1114 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1116 const void *r = c->table_rV[V],
1117 *g = (c->table_gU[U] + c->table_gV[V]),
1118 *b = c->table_bU[U];
1121 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1122 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1125 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1126 r, g, b, y, target, hasAlpha);
1130 static av_always_inline void
1131 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1132 const int16_t *ubuf[2], const int16_t *vbuf[2],
1133 const int16_t *abuf0, uint8_t *dest, int dstW,
1134 int uvalpha, int y, enum PixelFormat target,
1137 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1141 if (uvalpha < 2048) {
1142 for (i = 0; i < (dstW >> 1); i++) {
1143 int Y1 = buf0[i * 2] >> 7;
1144 int Y2 = buf0[i * 2 + 1] >> 7;
1145 int U = ubuf1[i] >> 7;
1146 int V = vbuf1[i] >> 7;
1148 const void *r = c->table_rV[V],
1149 *g = (c->table_gU[U] + c->table_gV[V]),
1150 *b = c->table_bU[U];
1153 A1 = abuf0[i * 2 ] >> 7;
1154 A2 = abuf0[i * 2 + 1] >> 7;
1157 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1158 r, g, b, y, target, hasAlpha);
1161 for (i = 0; i < (dstW >> 1); i++) {
1162 int Y1 = buf0[i * 2] >> 7;
1163 int Y2 = buf0[i * 2 + 1] >> 7;
1164 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1165 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1167 const void *r = c->table_rV[V],
1168 *g = (c->table_gU[U] + c->table_gV[V]),
1169 *b = c->table_bU[U];
1172 A1 = abuf0[i * 2 ] >> 7;
1173 A2 = abuf0[i * 2 + 1] >> 7;
1176 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1177 r, g, b, y, target, hasAlpha);
1182 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1183 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1184 const int16_t **lumSrc, int lumFilterSize, \
1185 const int16_t *chrFilter, const int16_t **chrUSrc, \
1186 const int16_t **chrVSrc, int chrFilterSize, \
1187 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1190 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1191 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1192 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1194 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1195 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1196 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1197 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1198 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1199 int yalpha, int uvalpha, int y) \
1201 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1202 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1205 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1206 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1207 const int16_t *abuf0, uint8_t *dest, int dstW, \
1208 int uvalpha, int y) \
1210 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1211 dstW, uvalpha, y, fmt, hasAlpha); \
1215 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1216 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1218 #if CONFIG_SWSCALE_ALPHA
1219 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1220 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1222 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1223 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1225 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1226 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1227 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1228 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1229 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1230 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1231 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1232 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1234 static av_always_inline void
1235 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1236 const int16_t **lumSrc, int lumFilterSize,
1237 const int16_t *chrFilter, const int16_t **chrUSrc,
1238 const int16_t **chrVSrc, int chrFilterSize,
1239 const int16_t **alpSrc, uint8_t *dest,
1240 int dstW, int y, enum PixelFormat target, int hasAlpha)
1243 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1245 for (i = 0; i < dstW; i++) {
1253 for (j = 0; j < lumFilterSize; j++) {
1254 Y += lumSrc[j][i] * lumFilter[j];
1256 for (j = 0; j < chrFilterSize; j++) {
1257 U += chrUSrc[j][i] * chrFilter[j];
1258 V += chrVSrc[j][i] * chrFilter[j];
1265 for (j = 0; j < lumFilterSize; j++) {
1266 A += alpSrc[j][i] * lumFilter[j];
1270 A = av_clip_uint8(A);
1272 Y -= c->yuv2rgb_y_offset;
1273 Y *= c->yuv2rgb_y_coeff;
1275 R = Y + V*c->yuv2rgb_v2r_coeff;
1276 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1277 B = Y + U*c->yuv2rgb_u2b_coeff;
1278 if ((R | G | B) & 0xC0000000) {
1279 R = av_clip_uintp2(R, 30);
1280 G = av_clip_uintp2(G, 30);
1281 B = av_clip_uintp2(B, 30);
1286 dest[0] = hasAlpha ? A : 255;
1300 dest[3] = hasAlpha ? A : 255;
1303 dest[0] = hasAlpha ? A : 255;
1318 dest[3] = hasAlpha ? A : 255;
1326 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1327 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1328 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1329 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1331 #if CONFIG_SWSCALE_ALPHA
1332 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1333 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1334 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1335 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1337 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1338 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1339 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1340 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1342 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1343 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1345 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1346 int width, int height,
1350 uint8_t *ptr = plane + stride*y;
1351 for (i=0; i<height; i++) {
1352 memset(ptr, val, width);
1357 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1359 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1360 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1362 static av_always_inline void
1363 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1364 enum PixelFormat origin)
1367 for (i = 0; i < width; i++) {
1368 unsigned int r_b = input_pixel(&src[i*3+0]);
1369 unsigned int g = input_pixel(&src[i*3+1]);
1370 unsigned int b_r = input_pixel(&src[i*3+2]);
1372 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1376 static av_always_inline void
1377 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1378 const uint16_t *src1, const uint16_t *src2,
1379 int width, enum PixelFormat origin)
1383 for (i = 0; i < width; i++) {
1384 int r_b = input_pixel(&src1[i*3+0]);
1385 int g = input_pixel(&src1[i*3+1]);
1386 int b_r = input_pixel(&src1[i*3+2]);
1388 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1393 static av_always_inline void
1394 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1395 const uint16_t *src1, const uint16_t *src2,
1396 int width, enum PixelFormat origin)
1400 for (i = 0; i < width; i++) {
1401 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1402 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1403 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1405 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1406 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1414 #define rgb48funcs(pattern, BE_LE, origin) \
1415 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1416 int width, uint32_t *unused) \
1418 const uint16_t *src = (const uint16_t *) _src; \
1419 uint16_t *dst = (uint16_t *) _dst; \
1420 rgb48ToY_c_template(dst, src, width, origin); \
1423 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1424 const uint8_t *_src1, const uint8_t *_src2, \
1425 int width, uint32_t *unused) \
1427 const uint16_t *src1 = (const uint16_t *) _src1, \
1428 *src2 = (const uint16_t *) _src2; \
1429 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1430 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1433 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1434 const uint8_t *_src1, const uint8_t *_src2, \
1435 int width, uint32_t *unused) \
1437 const uint16_t *src1 = (const uint16_t *) _src1, \
1438 *src2 = (const uint16_t *) _src2; \
1439 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1440 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1443 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1444 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1445 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1446 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1448 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1449 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1450 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1452 static av_always_inline void
1453 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1454 int width, enum PixelFormat origin,
1455 int shr, int shg, int shb, int shp,
1456 int maskr, int maskg, int maskb,
1457 int rsh, int gsh, int bsh, int S)
1459 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1460 rnd = 33 << (S - 1);
1463 for (i = 0; i < width; i++) {
1464 int px = input_pixel(i) >> shp;
1465 int b = (px & maskb) >> shb;
1466 int g = (px & maskg) >> shg;
1467 int r = (px & maskr) >> shr;
1469 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1473 static av_always_inline void
1474 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1475 const uint8_t *src, int width,
1476 enum PixelFormat origin,
1477 int shr, int shg, int shb, int shp,
1478 int maskr, int maskg, int maskb,
1479 int rsh, int gsh, int bsh, int S)
1481 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1482 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1483 rnd = 257 << (S - 1);
1486 for (i = 0; i < width; i++) {
1487 int px = input_pixel(i) >> shp;
1488 int b = (px & maskb) >> shb;
1489 int g = (px & maskg) >> shg;
1490 int r = (px & maskr) >> shr;
1492 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1493 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1497 static av_always_inline void
1498 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1499 const uint8_t *src, int width,
1500 enum PixelFormat origin,
1501 int shr, int shg, int shb, int shp,
1502 int maskr, int maskg, int maskb,
1503 int rsh, int gsh, int bsh, int S)
1505 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1506 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1507 rnd = 257 << S, maskgx = ~(maskr | maskb);
1510 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1511 for (i = 0; i < width; i++) {
1512 int px0 = input_pixel(2 * i + 0) >> shp;
1513 int px1 = input_pixel(2 * i + 1) >> shp;
1514 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1515 int rb = px0 + px1 - g;
1517 b = (rb & maskb) >> shb;
1518 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1519 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1522 g = (g & maskg) >> shg;
1524 r = (rb & maskr) >> shr;
1526 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1527 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1533 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1534 maskg, maskb, rsh, gsh, bsh, S) \
1535 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1536 int width, uint32_t *unused) \
1538 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1539 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1542 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1543 const uint8_t *src, const uint8_t *dummy, \
1544 int width, uint32_t *unused) \
1546 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1547 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1550 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1551 const uint8_t *src, const uint8_t *dummy, \
1552 int width, uint32_t *unused) \
1554 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1555 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1558 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1559 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1560 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1561 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1562 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1563 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1564 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1565 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1566 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1567 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1568 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1569 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1571 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1574 for (i=0; i<width; i++) {
1579 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1582 for (i=0; i<width; i++) {
1587 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1590 for (i=0; i<width; i++) {
1593 dst[i]= pal[d] & 0xFF;
1597 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1598 const uint8_t *src1, const uint8_t *src2,
1599 int width, uint32_t *pal)
1602 assert(src1 == src2);
1603 for (i=0; i<width; i++) {
1604 int p= pal[src1[i]];
1611 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1612 int width, uint32_t *unused)
1615 for (i=0; i<width/8; i++) {
1618 dst[8*i+j]= ((d>>(7-j))&1)*255;
1622 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1623 int width, uint32_t *unused)
1626 for (i=0; i<width/8; i++) {
1629 dst[8*i+j]= ((d>>(7-j))&1)*255;
1633 //FIXME yuy2* can read up to 7 samples too much
1635 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1639 for (i=0; i<width; i++)
1643 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1644 const uint8_t *src2, int width, uint32_t *unused)
1647 for (i=0; i<width; i++) {
1648 dstU[i]= src1[4*i + 1];
1649 dstV[i]= src1[4*i + 3];
1651 assert(src1 == src2);
1654 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1657 const uint16_t *src = (const uint16_t *) _src;
1658 uint16_t *dst = (uint16_t *) _dst;
1659 for (i=0; i<width; i++) {
1660 dst[i] = av_bswap16(src[i]);
1664 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1665 const uint8_t *_src2, int width, uint32_t *unused)
1668 const uint16_t *src1 = (const uint16_t *) _src1,
1669 *src2 = (const uint16_t *) _src2;
1670 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1671 for (i=0; i<width; i++) {
1672 dstU[i] = av_bswap16(src1[i]);
1673 dstV[i] = av_bswap16(src2[i]);
1677 /* This is almost identical to the previous, end exists only because
1678 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1679 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1683 for (i=0; i<width; i++)
1687 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1688 const uint8_t *src2, int width, uint32_t *unused)
1691 for (i=0; i<width; i++) {
1692 dstU[i]= src1[4*i + 0];
1693 dstV[i]= src1[4*i + 2];
1695 assert(src1 == src2);
1698 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1699 const uint8_t *src, int width)
1702 for (i = 0; i < width; i++) {
1703 dst1[i] = src[2*i+0];
1704 dst2[i] = src[2*i+1];
1708 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1709 const uint8_t *src1, const uint8_t *src2,
1710 int width, uint32_t *unused)
1712 nvXXtoUV_c(dstU, dstV, src1, width);
1715 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1716 const uint8_t *src1, const uint8_t *src2,
1717 int width, uint32_t *unused)
1719 nvXXtoUV_c(dstV, dstU, src1, width);
1722 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1724 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1725 int width, uint32_t *unused)
1728 for (i=0; i<width; i++) {
1733 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1737 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1738 const uint8_t *src2, int width, uint32_t *unused)
1741 for (i=0; i<width; i++) {
1742 int b= src1[3*i + 0];
1743 int g= src1[3*i + 1];
1744 int r= src1[3*i + 2];
1746 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1747 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1749 assert(src1 == src2);
1752 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1753 const uint8_t *src2, int width, uint32_t *unused)
1756 for (i=0; i<width; i++) {
1757 int b= src1[6*i + 0] + src1[6*i + 3];
1758 int g= src1[6*i + 1] + src1[6*i + 4];
1759 int r= src1[6*i + 2] + src1[6*i + 5];
1761 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1762 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1764 assert(src1 == src2);
1767 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1771 for (i=0; i<width; i++) {
1776 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1780 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1781 const uint8_t *src2, int width, uint32_t *unused)
1785 for (i=0; i<width; i++) {
1786 int r= src1[3*i + 0];
1787 int g= src1[3*i + 1];
1788 int b= src1[3*i + 2];
1790 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1791 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1795 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1796 const uint8_t *src2, int width, uint32_t *unused)
1800 for (i=0; i<width; i++) {
1801 int r= src1[6*i + 0] + src1[6*i + 3];
1802 int g= src1[6*i + 1] + src1[6*i + 4];
1803 int b= src1[6*i + 2] + src1[6*i + 5];
1805 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1806 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1810 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1811 const int16_t *filter,
1812 const int16_t *filterPos, int filterSize)
1815 int32_t *dst = (int32_t *) _dst;
1816 const uint16_t *src = (const uint16_t *) _src;
1817 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1820 for (i = 0; i < dstW; i++) {
1822 int srcPos = filterPos[i];
1825 for (j = 0; j < filterSize; j++) {
1826 val += src[srcPos + j] * filter[filterSize * i + j];
1828 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1829 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1833 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1834 const int16_t *filter,
1835 const int16_t *filterPos, int filterSize)
1838 const uint16_t *src = (const uint16_t *) _src;
1839 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1841 for (i = 0; i < dstW; i++) {
1843 int srcPos = filterPos[i];
1846 for (j = 0; j < filterSize; j++) {
1847 val += src[srcPos + j] * filter[filterSize * i + j];
1849 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1850 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1854 // bilinear / bicubic scaling
1855 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1856 const int16_t *filter, const int16_t *filterPos,
1860 for (i=0; i<dstW; i++) {
1862 int srcPos= filterPos[i];
1864 for (j=0; j<filterSize; j++) {
1865 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1867 //filter += hFilterSize;
1868 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1873 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1874 const int16_t *filter, const int16_t *filterPos,
1878 int32_t *dst = (int32_t *) _dst;
1879 for (i=0; i<dstW; i++) {
1881 int srcPos= filterPos[i];
1883 for (j=0; j<filterSize; j++) {
1884 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1886 //filter += hFilterSize;
1887 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1892 //FIXME all pal and rgb srcFormats could do this convertion as well
1893 //FIXME all scalers more complex than bilinear could do half of this transform
1894 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1897 for (i = 0; i < width; i++) {
1898 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1899 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1902 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1905 for (i = 0; i < width; i++) {
1906 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1907 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1910 static void lumRangeToJpeg_c(int16_t *dst, int width)
1913 for (i = 0; i < width; i++)
1914 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1916 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1919 for (i = 0; i < width; i++)
1920 dst[i] = (dst[i]*14071 + 33561947)>>14;
1923 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1926 int32_t *dstU = (int32_t *) _dstU;
1927 int32_t *dstV = (int32_t *) _dstV;
1928 for (i = 0; i < width; i++) {
1929 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1930 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1933 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1936 int32_t *dstU = (int32_t *) _dstU;
1937 int32_t *dstV = (int32_t *) _dstV;
1938 for (i = 0; i < width; i++) {
1939 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1940 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1943 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1946 int32_t *dst = (int32_t *) _dst;
1947 for (i = 0; i < width; i++)
1948 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1950 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1953 int32_t *dst = (int32_t *) _dst;
1954 for (i = 0; i < width; i++)
1955 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1958 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1959 const uint8_t *src, int srcW, int xInc)
1962 unsigned int xpos=0;
1963 for (i=0;i<dstWidth;i++) {
1964 register unsigned int xx=xpos>>16;
1965 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1966 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1971 // *** horizontal scale Y line to temp buffer
1972 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1973 const uint8_t *src, int srcW, int xInc,
1974 const int16_t *hLumFilter,
1975 const int16_t *hLumFilterPos, int hLumFilterSize,
1976 uint8_t *formatConvBuffer,
1977 uint32_t *pal, int isAlpha)
1979 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1980 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1983 toYV12(formatConvBuffer, src, srcW, pal);
1984 src= formatConvBuffer;
1987 if (!c->hyscale_fast) {
1988 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1989 } else { // fast bilinear upscale / crap downscale
1990 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1994 convertRange(dst, dstWidth);
1997 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1998 int dstWidth, const uint8_t *src1,
1999 const uint8_t *src2, int srcW, int xInc)
2002 unsigned int xpos=0;
2003 for (i=0;i<dstWidth;i++) {
2004 register unsigned int xx=xpos>>16;
2005 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2006 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2007 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2012 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2013 const uint8_t *src1, const uint8_t *src2,
2014 int srcW, int xInc, const int16_t *hChrFilter,
2015 const int16_t *hChrFilterPos, int hChrFilterSize,
2016 uint8_t *formatConvBuffer, uint32_t *pal)
2019 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2020 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2021 src1= formatConvBuffer;
2025 if (!c->hcscale_fast) {
2026 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2027 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2028 } else { // fast bilinear upscale / crap downscale
2029 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2032 if (c->chrConvertRange)
2033 c->chrConvertRange(dst1, dst2, dstWidth);
2036 static av_always_inline void
2037 find_c_packed_planar_out_funcs(SwsContext *c,
2038 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2039 yuv2interleavedX_fn *yuv2nv12cX,
2040 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2041 yuv2packedX_fn *yuv2packedX)
2043 enum PixelFormat dstFormat = c->dstFormat;
2045 if (is16BPS(dstFormat)) {
2046 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2047 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2048 } else if (is9_OR_10BPS(dstFormat)) {
2049 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2050 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2051 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2053 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2054 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2057 *yuv2plane1 = yuv2plane1_8_c;
2058 *yuv2planeX = yuv2planeX_8_c;
2059 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2060 *yuv2nv12cX = yuv2nv12cX_c;
2063 if(c->flags & SWS_FULL_CHR_H_INT) {
2064 switch (dstFormat) {
2067 *yuv2packedX = yuv2rgba32_full_X_c;
2069 #if CONFIG_SWSCALE_ALPHA
2071 *yuv2packedX = yuv2rgba32_full_X_c;
2073 #endif /* CONFIG_SWSCALE_ALPHA */
2075 *yuv2packedX = yuv2rgbx32_full_X_c;
2077 #endif /* !CONFIG_SMALL */
2081 *yuv2packedX = yuv2argb32_full_X_c;
2083 #if CONFIG_SWSCALE_ALPHA
2085 *yuv2packedX = yuv2argb32_full_X_c;
2087 #endif /* CONFIG_SWSCALE_ALPHA */
2089 *yuv2packedX = yuv2xrgb32_full_X_c;
2091 #endif /* !CONFIG_SMALL */
2095 *yuv2packedX = yuv2bgra32_full_X_c;
2097 #if CONFIG_SWSCALE_ALPHA
2099 *yuv2packedX = yuv2bgra32_full_X_c;
2101 #endif /* CONFIG_SWSCALE_ALPHA */
2103 *yuv2packedX = yuv2bgrx32_full_X_c;
2105 #endif /* !CONFIG_SMALL */
2109 *yuv2packedX = yuv2abgr32_full_X_c;
2111 #if CONFIG_SWSCALE_ALPHA
2113 *yuv2packedX = yuv2abgr32_full_X_c;
2115 #endif /* CONFIG_SWSCALE_ALPHA */
2117 *yuv2packedX = yuv2xbgr32_full_X_c;
2119 #endif /* !CONFIG_SMALL */
2122 *yuv2packedX = yuv2rgb24_full_X_c;
2125 *yuv2packedX = yuv2bgr24_full_X_c;
2129 switch (dstFormat) {
2130 case PIX_FMT_GRAY16BE:
2131 *yuv2packed1 = yuv2gray16BE_1_c;
2132 *yuv2packed2 = yuv2gray16BE_2_c;
2133 *yuv2packedX = yuv2gray16BE_X_c;
2135 case PIX_FMT_GRAY16LE:
2136 *yuv2packed1 = yuv2gray16LE_1_c;
2137 *yuv2packed2 = yuv2gray16LE_2_c;
2138 *yuv2packedX = yuv2gray16LE_X_c;
2140 case PIX_FMT_MONOWHITE:
2141 *yuv2packed1 = yuv2monowhite_1_c;
2142 *yuv2packed2 = yuv2monowhite_2_c;
2143 *yuv2packedX = yuv2monowhite_X_c;
2145 case PIX_FMT_MONOBLACK:
2146 *yuv2packed1 = yuv2monoblack_1_c;
2147 *yuv2packed2 = yuv2monoblack_2_c;
2148 *yuv2packedX = yuv2monoblack_X_c;
2150 case PIX_FMT_YUYV422:
2151 *yuv2packed1 = yuv2yuyv422_1_c;
2152 *yuv2packed2 = yuv2yuyv422_2_c;
2153 *yuv2packedX = yuv2yuyv422_X_c;
2155 case PIX_FMT_UYVY422:
2156 *yuv2packed1 = yuv2uyvy422_1_c;
2157 *yuv2packed2 = yuv2uyvy422_2_c;
2158 *yuv2packedX = yuv2uyvy422_X_c;
2160 case PIX_FMT_RGB48LE:
2161 *yuv2packed1 = yuv2rgb48le_1_c;
2162 *yuv2packed2 = yuv2rgb48le_2_c;
2163 *yuv2packedX = yuv2rgb48le_X_c;
2165 case PIX_FMT_RGB48BE:
2166 *yuv2packed1 = yuv2rgb48be_1_c;
2167 *yuv2packed2 = yuv2rgb48be_2_c;
2168 *yuv2packedX = yuv2rgb48be_X_c;
2170 case PIX_FMT_BGR48LE:
2171 *yuv2packed1 = yuv2bgr48le_1_c;
2172 *yuv2packed2 = yuv2bgr48le_2_c;
2173 *yuv2packedX = yuv2bgr48le_X_c;
2175 case PIX_FMT_BGR48BE:
2176 *yuv2packed1 = yuv2bgr48be_1_c;
2177 *yuv2packed2 = yuv2bgr48be_2_c;
2178 *yuv2packedX = yuv2bgr48be_X_c;
2183 *yuv2packed1 = yuv2rgb32_1_c;
2184 *yuv2packed2 = yuv2rgb32_2_c;
2185 *yuv2packedX = yuv2rgb32_X_c;
2187 #if CONFIG_SWSCALE_ALPHA
2189 *yuv2packed1 = yuv2rgba32_1_c;
2190 *yuv2packed2 = yuv2rgba32_2_c;
2191 *yuv2packedX = yuv2rgba32_X_c;
2193 #endif /* CONFIG_SWSCALE_ALPHA */
2195 *yuv2packed1 = yuv2rgbx32_1_c;
2196 *yuv2packed2 = yuv2rgbx32_2_c;
2197 *yuv2packedX = yuv2rgbx32_X_c;
2199 #endif /* !CONFIG_SMALL */
2201 case PIX_FMT_RGB32_1:
2202 case PIX_FMT_BGR32_1:
2204 *yuv2packed1 = yuv2rgb32_1_1_c;
2205 *yuv2packed2 = yuv2rgb32_1_2_c;
2206 *yuv2packedX = yuv2rgb32_1_X_c;
2208 #if CONFIG_SWSCALE_ALPHA
2210 *yuv2packed1 = yuv2rgba32_1_1_c;
2211 *yuv2packed2 = yuv2rgba32_1_2_c;
2212 *yuv2packedX = yuv2rgba32_1_X_c;
2214 #endif /* CONFIG_SWSCALE_ALPHA */
2216 *yuv2packed1 = yuv2rgbx32_1_1_c;
2217 *yuv2packed2 = yuv2rgbx32_1_2_c;
2218 *yuv2packedX = yuv2rgbx32_1_X_c;
2220 #endif /* !CONFIG_SMALL */
2223 *yuv2packed1 = yuv2rgb24_1_c;
2224 *yuv2packed2 = yuv2rgb24_2_c;
2225 *yuv2packedX = yuv2rgb24_X_c;
2228 *yuv2packed1 = yuv2bgr24_1_c;
2229 *yuv2packed2 = yuv2bgr24_2_c;
2230 *yuv2packedX = yuv2bgr24_X_c;
2232 case PIX_FMT_RGB565LE:
2233 case PIX_FMT_RGB565BE:
2234 case PIX_FMT_BGR565LE:
2235 case PIX_FMT_BGR565BE:
2236 *yuv2packed1 = yuv2rgb16_1_c;
2237 *yuv2packed2 = yuv2rgb16_2_c;
2238 *yuv2packedX = yuv2rgb16_X_c;
2240 case PIX_FMT_RGB555LE:
2241 case PIX_FMT_RGB555BE:
2242 case PIX_FMT_BGR555LE:
2243 case PIX_FMT_BGR555BE:
2244 *yuv2packed1 = yuv2rgb15_1_c;
2245 *yuv2packed2 = yuv2rgb15_2_c;
2246 *yuv2packedX = yuv2rgb15_X_c;
2248 case PIX_FMT_RGB444LE:
2249 case PIX_FMT_RGB444BE:
2250 case PIX_FMT_BGR444LE:
2251 case PIX_FMT_BGR444BE:
2252 *yuv2packed1 = yuv2rgb12_1_c;
2253 *yuv2packed2 = yuv2rgb12_2_c;
2254 *yuv2packedX = yuv2rgb12_X_c;
2258 *yuv2packed1 = yuv2rgb8_1_c;
2259 *yuv2packed2 = yuv2rgb8_2_c;
2260 *yuv2packedX = yuv2rgb8_X_c;
2264 *yuv2packed1 = yuv2rgb4_1_c;
2265 *yuv2packed2 = yuv2rgb4_2_c;
2266 *yuv2packedX = yuv2rgb4_X_c;
2268 case PIX_FMT_RGB4_BYTE:
2269 case PIX_FMT_BGR4_BYTE:
2270 *yuv2packed1 = yuv2rgb4b_1_c;
2271 *yuv2packed2 = yuv2rgb4b_2_c;
2272 *yuv2packedX = yuv2rgb4b_X_c;
2278 #define DEBUG_SWSCALE_BUFFERS 0
2279 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2281 static int swScale(SwsContext *c, const uint8_t* src[],
2282 int srcStride[], int srcSliceY,
2283 int srcSliceH, uint8_t* dst[], int dstStride[])
2285 /* load a few things into local vars to make the code more readable? and faster */
2286 const int srcW= c->srcW;
2287 const int dstW= c->dstW;
2288 const int dstH= c->dstH;
2289 const int chrDstW= c->chrDstW;
2290 const int chrSrcW= c->chrSrcW;
2291 const int lumXInc= c->lumXInc;
2292 const int chrXInc= c->chrXInc;
2293 const enum PixelFormat dstFormat= c->dstFormat;
2294 const int flags= c->flags;
2295 int16_t *vLumFilterPos= c->vLumFilterPos;
2296 int16_t *vChrFilterPos= c->vChrFilterPos;
2297 int16_t *hLumFilterPos= c->hLumFilterPos;
2298 int16_t *hChrFilterPos= c->hChrFilterPos;
2299 int16_t *vLumFilter= c->vLumFilter;
2300 int16_t *vChrFilter= c->vChrFilter;
2301 int16_t *hLumFilter= c->hLumFilter;
2302 int16_t *hChrFilter= c->hChrFilter;
2303 int32_t *lumMmxFilter= c->lumMmxFilter;
2304 int32_t *chrMmxFilter= c->chrMmxFilter;
2305 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2306 const int vLumFilterSize= c->vLumFilterSize;
2307 const int vChrFilterSize= c->vChrFilterSize;
2308 const int hLumFilterSize= c->hLumFilterSize;
2309 const int hChrFilterSize= c->hChrFilterSize;
2310 int16_t **lumPixBuf= c->lumPixBuf;
2311 int16_t **chrUPixBuf= c->chrUPixBuf;
2312 int16_t **chrVPixBuf= c->chrVPixBuf;
2313 int16_t **alpPixBuf= c->alpPixBuf;
2314 const int vLumBufSize= c->vLumBufSize;
2315 const int vChrBufSize= c->vChrBufSize;
2316 uint8_t *formatConvBuffer= c->formatConvBuffer;
2317 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2318 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2320 uint32_t *pal=c->pal_yuv;
2321 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2322 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2323 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2324 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2325 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2326 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2327 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2329 /* vars which will change and which we need to store back in the context */
2331 int lumBufIndex= c->lumBufIndex;
2332 int chrBufIndex= c->chrBufIndex;
2333 int lastInLumBuf= c->lastInLumBuf;
2334 int lastInChrBuf= c->lastInChrBuf;
2336 if (isPacked(c->srcFormat)) {
2344 srcStride[3]= srcStride[0];
2346 srcStride[1]<<= c->vChrDrop;
2347 srcStride[2]<<= c->vChrDrop;
2349 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2350 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2351 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2352 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2353 srcSliceY, srcSliceH, dstY, dstH);
2354 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2355 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2357 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2358 static int warnedAlready=0; //FIXME move this into the context perhaps
2359 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2360 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2361 " ->cannot do aligned memory accesses anymore\n");
2366 /* Note the user might start scaling the picture in the middle so this
2367 will not get executed. This is not really intended but works
2368 currently, so people might do it. */
2369 if (srcSliceY ==0) {
2377 if (!should_dither) {
2378 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2382 for (;dstY < dstH; dstY++) {
2383 const int chrDstY= dstY>>c->chrDstVSubSample;
2384 uint8_t *dest[4] = {
2385 dst[0] + dstStride[0] * dstY,
2386 dst[1] + dstStride[1] * chrDstY,
2387 dst[2] + dstStride[2] * chrDstY,
2388 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2391 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2392 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2393 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2394 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2395 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2396 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2399 //handle holes (FAST_BILINEAR & weird filters)
2400 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2401 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2402 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2403 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2405 DEBUG_BUFFERS("dstY: %d\n", dstY);
2406 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2407 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2408 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2409 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2411 // Do we have enough lines in this slice to output the dstY line
2412 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2414 if (!enough_lines) {
2415 lastLumSrcY = srcSliceY + srcSliceH - 1;
2416 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2417 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2418 lastLumSrcY, lastChrSrcY);
2421 //Do horizontal scaling
2422 while(lastInLumBuf < lastLumSrcY) {
2423 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2424 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2426 assert(lumBufIndex < 2*vLumBufSize);
2427 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2428 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2429 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2430 hLumFilter, hLumFilterPos, hLumFilterSize,
2433 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2434 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2435 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2439 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2440 lumBufIndex, lastInLumBuf);
2442 while(lastInChrBuf < lastChrSrcY) {
2443 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2444 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2446 assert(chrBufIndex < 2*vChrBufSize);
2447 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2448 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2449 //FIXME replace parameters through context struct (some at least)
2451 if (c->needs_hcscale)
2452 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2453 chrDstW, src1, src2, chrSrcW, chrXInc,
2454 hChrFilter, hChrFilterPos, hChrFilterSize,
2455 formatConvBuffer, pal);
2457 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2458 chrBufIndex, lastInChrBuf);
2460 //wrap buf index around to stay inside the ring buffer
2461 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2462 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2464 break; //we can't output a dstY line so let's try with the next slice
2467 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2469 if (should_dither) {
2470 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2471 c->lumDither8 = dither_8x8_128[dstY & 7];
2473 if (dstY >= dstH-2) {
2474 // hmm looks like we can't use MMX here without overwriting this array's tail
2475 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2476 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2480 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2481 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2482 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2483 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2484 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2485 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2487 if (vLumFilterSize == 1) {
2488 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2490 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2491 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2494 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2496 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2497 } else if (vChrFilterSize == 1) {
2498 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2499 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2501 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2502 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2503 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2504 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2508 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2509 if (vLumFilterSize == 1) {
2510 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2512 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2513 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2517 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2518 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2519 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2520 int chrAlpha = vChrFilter[2 * dstY + 1];
2521 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2522 alpPixBuf ? *alpSrcPtr : NULL,
2523 dest[0], dstW, chrAlpha, dstY);
2524 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2525 int lumAlpha = vLumFilter[2 * dstY + 1];
2526 int chrAlpha = vChrFilter[2 * dstY + 1];
2528 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2530 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2531 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2532 alpPixBuf ? alpSrcPtr : NULL,
2533 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2534 } else { //general RGB
2535 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2536 lumSrcPtr, vLumFilterSize,
2537 vChrFilter + dstY * vChrFilterSize,
2538 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2539 alpSrcPtr, dest[0], dstW, dstY);
2545 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2546 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2549 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2550 __asm__ volatile("sfence":::"memory");
2554 /* store changed local vars back in the context */
2556 c->lumBufIndex= lumBufIndex;
2557 c->chrBufIndex= chrBufIndex;
2558 c->lastInLumBuf= lastInLumBuf;
2559 c->lastInChrBuf= lastInChrBuf;
2561 return dstY - lastDstY;
2564 static av_cold void sws_init_swScale_c(SwsContext *c)
2566 enum PixelFormat srcFormat = c->srcFormat;
2568 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2569 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2572 c->chrToYV12 = NULL;
2574 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2575 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2576 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2577 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2581 case PIX_FMT_BGR4_BYTE:
2582 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2584 case PIX_FMT_YUV444P9LE:
2585 case PIX_FMT_YUV422P9LE:
2586 case PIX_FMT_YUV420P9LE:
2587 case PIX_FMT_YUV422P10LE:
2588 case PIX_FMT_YUV444P10LE:
2589 case PIX_FMT_YUV420P10LE:
2590 case PIX_FMT_YUV420P16LE:
2591 case PIX_FMT_YUV422P16LE:
2592 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2594 case PIX_FMT_YUV444P9BE:
2595 case PIX_FMT_YUV422P9BE:
2596 case PIX_FMT_YUV420P9BE:
2597 case PIX_FMT_YUV444P10BE:
2598 case PIX_FMT_YUV422P10BE:
2599 case PIX_FMT_YUV420P10BE:
2600 case PIX_FMT_YUV420P16BE:
2601 case PIX_FMT_YUV422P16BE:
2602 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2605 if (c->chrSrcHSubSample) {
2607 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2608 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2609 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2610 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2611 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2612 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2613 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2614 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2615 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2616 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2617 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2618 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2619 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2620 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2621 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2622 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2623 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2624 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2628 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2629 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2630 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2631 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2632 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2633 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2634 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2635 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2636 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2637 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2638 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2639 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2640 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2641 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2642 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2643 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2644 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2645 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2649 c->lumToYV12 = NULL;
2650 c->alpToYV12 = NULL;
2651 switch (srcFormat) {
2653 case PIX_FMT_YUV444P9LE:
2654 case PIX_FMT_YUV422P9LE:
2655 case PIX_FMT_YUV420P9LE:
2656 case PIX_FMT_YUV444P10LE:
2657 case PIX_FMT_YUV422P10LE:
2658 case PIX_FMT_YUV420P10LE:
2659 case PIX_FMT_YUV420P16LE:
2660 case PIX_FMT_YUV422P16LE:
2661 case PIX_FMT_YUV444P16LE:
2662 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2664 case PIX_FMT_YUV444P9BE:
2665 case PIX_FMT_YUV422P9BE:
2666 case PIX_FMT_YUV420P9BE:
2667 case PIX_FMT_YUV444P10BE:
2668 case PIX_FMT_YUV422P10BE:
2669 case PIX_FMT_YUV420P10BE:
2670 case PIX_FMT_YUV420P16BE:
2671 case PIX_FMT_YUV422P16BE:
2672 case PIX_FMT_YUV444P16BE:
2673 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2675 case PIX_FMT_YUYV422 :
2676 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2677 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2678 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2679 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2680 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2681 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2682 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2683 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2684 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2685 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2686 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2687 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2691 case PIX_FMT_BGR4_BYTE:
2692 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2693 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2694 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2695 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2696 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2697 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2698 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2699 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2700 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2701 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2702 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2705 switch (srcFormat) {
2707 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2709 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2710 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2714 if (c->srcBpc == 8) {
2715 if (c->dstBpc <= 10) {
2716 c->hyScale = c->hcScale = hScale8To15_c;
2717 if (c->flags & SWS_FAST_BILINEAR) {
2718 c->hyscale_fast = hyscale_fast_c;
2719 c->hcscale_fast = hcscale_fast_c;
2722 c->hyScale = c->hcScale = hScale8To19_c;
2725 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2728 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2729 if (c->dstBpc <= 10) {
2731 c->lumConvertRange = lumRangeFromJpeg_c;
2732 c->chrConvertRange = chrRangeFromJpeg_c;
2734 c->lumConvertRange = lumRangeToJpeg_c;
2735 c->chrConvertRange = chrRangeToJpeg_c;
2739 c->lumConvertRange = lumRangeFromJpeg16_c;
2740 c->chrConvertRange = chrRangeFromJpeg16_c;
2742 c->lumConvertRange = lumRangeToJpeg16_c;
2743 c->chrConvertRange = chrRangeToJpeg16_c;
2748 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2749 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2750 c->needs_hcscale = 1;
2753 SwsFunc ff_getSwsFunc(SwsContext *c)
2755 sws_init_swScale_c(c);
2758 ff_sws_init_swScale_mmx(c);
2760 ff_sws_init_swScale_altivec(c);