2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 #define output_pixel(pos, val, bias, signedness) \
200 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
202 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
205 static av_always_inline void
206 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
207 int big_endian, int output_bits)
210 int shift = 19 - output_bits;
212 for (i = 0; i < dstW; i++) {
213 int val = src[i] + (1 << (shift - 1));
214 output_pixel(&dest[i], val, 0, uint);
218 static av_always_inline void
219 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
220 const int32_t **src, uint16_t *dest, int dstW,
221 int big_endian, int output_bits)
224 int shift = 15 + 16 - output_bits;
226 for (i = 0; i < dstW; i++) {
227 int val = 1 << (30-output_bits);
230 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
231 * filters (or anything with negative coeffs, the range can be slightly
232 * wider in both directions. To account for this overflow, we subtract
233 * a constant so it always fits in the signed range (assuming a
234 * reasonable filterSize), and re-add that at the end. */
236 for (j = 0; j < filterSize; j++)
237 val += src[j][i] * filter[j];
239 output_pixel(&dest[i], val, 0x8000, int);
245 #define output_pixel(pos, val) \
247 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
249 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
252 static av_always_inline void
253 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
254 int big_endian, int output_bits)
257 int shift = 15 - output_bits;
259 for (i = 0; i < dstW; i++) {
260 int val = src[i] + (1 << (shift - 1));
261 output_pixel(&dest[i], val);
265 static av_always_inline void
266 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
267 const int16_t **src, uint16_t *dest, int dstW,
268 int big_endian, int output_bits)
271 int shift = 11 + 16 - output_bits;
273 for (i = 0; i < dstW; i++) {
274 int val = 1 << (26-output_bits);
277 for (j = 0; j < filterSize; j++)
278 val += src[j][i] * filter[j];
280 output_pixel(&dest[i], val);
286 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
287 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
288 uint8_t *dest, int dstW, \
289 const uint8_t *dither, int offset)\
291 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
292 (uint16_t *) dest, dstW, is_be, bits); \
294 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
295 const int16_t **src, uint8_t *dest, int dstW, \
296 const uint8_t *dither, int offset)\
298 yuv2planeX_## template_size ## _c_template(filter, \
299 filterSize, (const typeX_t **) src, \
300 (uint16_t *) dest, dstW, is_be, bits); \
302 yuv2NBPS( 9, BE, 1, 10, int16_t)
303 yuv2NBPS( 9, LE, 0, 10, int16_t)
304 yuv2NBPS(10, BE, 1, 10, int16_t)
305 yuv2NBPS(10, LE, 0, 10, int16_t)
306 yuv2NBPS(16, BE, 1, 16, int32_t)
307 yuv2NBPS(16, LE, 0, 16, int32_t)
309 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
310 const int16_t **src, uint8_t *dest, int dstW,
311 const uint8_t *dither, int offset)
314 for (i=0; i<dstW; i++) {
315 int val = dither[(i + offset) & 7] << 12;
317 for (j=0; j<filterSize; j++)
318 val += src[j][i] * filter[j];
320 dest[i]= av_clip_uint8(val>>19);
324 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
325 const uint8_t *dither, int offset)
328 for (i=0; i<dstW; i++) {
329 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
330 dest[i]= av_clip_uint8(val);
334 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
335 const int16_t **chrUSrc, const int16_t **chrVSrc,
336 uint8_t *dest, int chrDstW)
338 enum PixelFormat dstFormat = c->dstFormat;
339 const uint8_t *chrDither = c->chrDither8;
342 if (dstFormat == PIX_FMT_NV12)
343 for (i=0; i<chrDstW; i++) {
344 int u = chrDither[i & 7] << 12;
345 int v = chrDither[(i + 3) & 7] << 12;
347 for (j=0; j<chrFilterSize; j++) {
348 u += chrUSrc[j][i] * chrFilter[j];
349 v += chrVSrc[j][i] * chrFilter[j];
352 dest[2*i]= av_clip_uint8(u>>19);
353 dest[2*i+1]= av_clip_uint8(v>>19);
356 for (i=0; i<chrDstW; i++) {
357 int u = chrDither[i & 7] << 12;
358 int v = chrDither[(i + 3) & 7] << 12;
360 for (j=0; j<chrFilterSize; j++) {
361 u += chrUSrc[j][i] * chrFilter[j];
362 v += chrVSrc[j][i] * chrFilter[j];
365 dest[2*i]= av_clip_uint8(v>>19);
366 dest[2*i+1]= av_clip_uint8(u>>19);
370 #define output_pixel(pos, val) \
371 if (target == PIX_FMT_GRAY16BE) { \
377 static av_always_inline void
378 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
379 const int32_t **lumSrc, int lumFilterSize,
380 const int16_t *chrFilter, const int32_t **chrUSrc,
381 const int32_t **chrVSrc, int chrFilterSize,
382 const int32_t **alpSrc, uint16_t *dest, int dstW,
383 int y, enum PixelFormat target)
387 for (i = 0; i < (dstW >> 1); i++) {
389 int Y1 = (1 << 14) - 0x40000000;
390 int Y2 = (1 << 14) - 0x40000000;
392 for (j = 0; j < lumFilterSize; j++) {
393 Y1 += lumSrc[j][i * 2] * lumFilter[j];
394 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
398 Y1 = av_clip_int16(Y1);
399 Y2 = av_clip_int16(Y2);
400 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
401 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
405 static av_always_inline void
406 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
407 const int32_t *ubuf[2], const int32_t *vbuf[2],
408 const int32_t *abuf[2], uint16_t *dest, int dstW,
409 int yalpha, int uvalpha, int y,
410 enum PixelFormat target)
412 int yalpha1 = 4095 - yalpha;
414 const int32_t *buf0 = buf[0], *buf1 = buf[1];
416 for (i = 0; i < (dstW >> 1); i++) {
417 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
418 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
420 output_pixel(&dest[i * 2 + 0], Y1);
421 output_pixel(&dest[i * 2 + 1], Y2);
425 static av_always_inline void
426 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
427 const int32_t *ubuf[2], const int32_t *vbuf[2],
428 const int32_t *abuf0, uint16_t *dest, int dstW,
429 int uvalpha, int y, enum PixelFormat target)
433 for (i = 0; i < (dstW >> 1); i++) {
434 int Y1 = buf0[i * 2 ] << 1;
435 int Y2 = buf0[i * 2 + 1] << 1;
437 output_pixel(&dest[i * 2 + 0], Y1);
438 output_pixel(&dest[i * 2 + 1], Y2);
444 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
445 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
446 const int16_t **_lumSrc, int lumFilterSize, \
447 const int16_t *chrFilter, const int16_t **_chrUSrc, \
448 const int16_t **_chrVSrc, int chrFilterSize, \
449 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
452 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
453 **chrUSrc = (const int32_t **) _chrUSrc, \
454 **chrVSrc = (const int32_t **) _chrVSrc, \
455 **alpSrc = (const int32_t **) _alpSrc; \
456 uint16_t *dest = (uint16_t *) _dest; \
457 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
458 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
459 alpSrc, dest, dstW, y, fmt); \
462 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
463 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
464 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
465 int yalpha, int uvalpha, int y) \
467 const int32_t **buf = (const int32_t **) _buf, \
468 **ubuf = (const int32_t **) _ubuf, \
469 **vbuf = (const int32_t **) _vbuf, \
470 **abuf = (const int32_t **) _abuf; \
471 uint16_t *dest = (uint16_t *) _dest; \
472 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
473 dest, dstW, yalpha, uvalpha, y, fmt); \
476 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
477 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
478 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
479 int uvalpha, int y) \
481 const int32_t *buf0 = (const int32_t *) _buf0, \
482 **ubuf = (const int32_t **) _ubuf, \
483 **vbuf = (const int32_t **) _vbuf, \
484 *abuf0 = (const int32_t *) _abuf0; \
485 uint16_t *dest = (uint16_t *) _dest; \
486 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
487 dstW, uvalpha, y, fmt); \
490 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
491 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
493 #define output_pixel(pos, acc) \
494 if (target == PIX_FMT_MONOBLACK) { \
500 static av_always_inline void
501 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
502 const int16_t **lumSrc, int lumFilterSize,
503 const int16_t *chrFilter, const int16_t **chrUSrc,
504 const int16_t **chrVSrc, int chrFilterSize,
505 const int16_t **alpSrc, uint8_t *dest, int dstW,
506 int y, enum PixelFormat target)
508 const uint8_t * const d128=dither_8x8_220[y&7];
509 uint8_t *g = c->table_gU[128] + c->table_gV[128];
513 for (i = 0; i < dstW - 1; i += 2) {
518 for (j = 0; j < lumFilterSize; j++) {
519 Y1 += lumSrc[j][i] * lumFilter[j];
520 Y2 += lumSrc[j][i+1] * lumFilter[j];
524 if ((Y1 | Y2) & 0x100) {
525 Y1 = av_clip_uint8(Y1);
526 Y2 = av_clip_uint8(Y2);
528 acc += acc + g[Y1 + d128[(i + 0) & 7]];
529 acc += acc + g[Y2 + d128[(i + 1) & 7]];
531 output_pixel(*dest++, acc);
536 static av_always_inline void
537 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
538 const int16_t *ubuf[2], const int16_t *vbuf[2],
539 const int16_t *abuf[2], uint8_t *dest, int dstW,
540 int yalpha, int uvalpha, int y,
541 enum PixelFormat target)
543 const int16_t *buf0 = buf[0], *buf1 = buf[1];
544 const uint8_t * const d128 = dither_8x8_220[y & 7];
545 uint8_t *g = c->table_gU[128] + c->table_gV[128];
546 int yalpha1 = 4095 - yalpha;
549 for (i = 0; i < dstW - 7; i += 8) {
550 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
551 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
552 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
553 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
554 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
555 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
556 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
557 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
558 output_pixel(*dest++, acc);
562 static av_always_inline void
563 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
564 const int16_t *ubuf[2], const int16_t *vbuf[2],
565 const int16_t *abuf0, uint8_t *dest, int dstW,
566 int uvalpha, int y, enum PixelFormat target)
568 const uint8_t * const d128 = dither_8x8_220[y & 7];
569 uint8_t *g = c->table_gU[128] + c->table_gV[128];
572 for (i = 0; i < dstW - 7; i += 8) {
573 int acc = g[(buf0[i ] >> 7) + d128[0]];
574 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
575 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
576 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
577 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
578 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
579 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
580 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
581 output_pixel(*dest++, acc);
587 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
588 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
589 const int16_t **lumSrc, int lumFilterSize, \
590 const int16_t *chrFilter, const int16_t **chrUSrc, \
591 const int16_t **chrVSrc, int chrFilterSize, \
592 const int16_t **alpSrc, uint8_t *dest, int dstW, \
595 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
596 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
597 alpSrc, dest, dstW, y, fmt); \
600 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
601 const int16_t *ubuf[2], const int16_t *vbuf[2], \
602 const int16_t *abuf[2], uint8_t *dest, int dstW, \
603 int yalpha, int uvalpha, int y) \
605 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
606 dest, dstW, yalpha, uvalpha, y, fmt); \
609 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
610 const int16_t *ubuf[2], const int16_t *vbuf[2], \
611 const int16_t *abuf0, uint8_t *dest, int dstW, \
612 int uvalpha, int y) \
614 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
615 abuf0, dest, dstW, uvalpha, \
619 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
620 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
622 #define output_pixels(pos, Y1, U, Y2, V) \
623 if (target == PIX_FMT_YUYV422) { \
624 dest[pos + 0] = Y1; \
626 dest[pos + 2] = Y2; \
630 dest[pos + 1] = Y1; \
632 dest[pos + 3] = Y2; \
635 static av_always_inline void
636 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
637 const int16_t **lumSrc, int lumFilterSize,
638 const int16_t *chrFilter, const int16_t **chrUSrc,
639 const int16_t **chrVSrc, int chrFilterSize,
640 const int16_t **alpSrc, uint8_t *dest, int dstW,
641 int y, enum PixelFormat target)
645 for (i = 0; i < (dstW >> 1); i++) {
652 for (j = 0; j < lumFilterSize; j++) {
653 Y1 += lumSrc[j][i * 2] * lumFilter[j];
654 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
656 for (j = 0; j < chrFilterSize; j++) {
657 U += chrUSrc[j][i] * chrFilter[j];
658 V += chrVSrc[j][i] * chrFilter[j];
664 if ((Y1 | Y2 | U | V) & 0x100) {
665 Y1 = av_clip_uint8(Y1);
666 Y2 = av_clip_uint8(Y2);
667 U = av_clip_uint8(U);
668 V = av_clip_uint8(V);
670 output_pixels(4*i, Y1, U, Y2, V);
674 static av_always_inline void
675 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
676 const int16_t *ubuf[2], const int16_t *vbuf[2],
677 const int16_t *abuf[2], uint8_t *dest, int dstW,
678 int yalpha, int uvalpha, int y,
679 enum PixelFormat target)
681 const int16_t *buf0 = buf[0], *buf1 = buf[1],
682 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
683 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
684 int yalpha1 = 4095 - yalpha;
685 int uvalpha1 = 4095 - uvalpha;
688 for (i = 0; i < (dstW >> 1); i++) {
689 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
690 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
691 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
692 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
694 output_pixels(i * 4, Y1, U, Y2, V);
698 static av_always_inline void
699 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
700 const int16_t *ubuf[2], const int16_t *vbuf[2],
701 const int16_t *abuf0, uint8_t *dest, int dstW,
702 int uvalpha, int y, enum PixelFormat target)
704 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
705 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
708 if (uvalpha < 2048) {
709 for (i = 0; i < (dstW >> 1); i++) {
710 int Y1 = buf0[i * 2] >> 7;
711 int Y2 = buf0[i * 2 + 1] >> 7;
712 int U = ubuf1[i] >> 7;
713 int V = vbuf1[i] >> 7;
715 output_pixels(i * 4, Y1, U, Y2, V);
718 for (i = 0; i < (dstW >> 1); i++) {
719 int Y1 = buf0[i * 2] >> 7;
720 int Y2 = buf0[i * 2 + 1] >> 7;
721 int U = (ubuf0[i] + ubuf1[i]) >> 8;
722 int V = (vbuf0[i] + vbuf1[i]) >> 8;
724 output_pixels(i * 4, Y1, U, Y2, V);
731 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
732 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
734 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
735 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
736 #define output_pixel(pos, val) \
737 if (isBE(target)) { \
743 static av_always_inline void
744 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
745 const int32_t **lumSrc, int lumFilterSize,
746 const int16_t *chrFilter, const int32_t **chrUSrc,
747 const int32_t **chrVSrc, int chrFilterSize,
748 const int32_t **alpSrc, uint16_t *dest, int dstW,
749 int y, enum PixelFormat target)
753 for (i = 0; i < (dstW >> 1); i++) {
755 int Y1 = -0x40000000;
756 int Y2 = -0x40000000;
757 int U = -128 << 23; // 19
761 for (j = 0; j < lumFilterSize; j++) {
762 Y1 += lumSrc[j][i * 2] * lumFilter[j];
763 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
765 for (j = 0; j < chrFilterSize; j++) {
766 U += chrUSrc[j][i] * chrFilter[j];
767 V += chrVSrc[j][i] * chrFilter[j];
770 // 8bit: 12+15=27; 16-bit: 12+19=31
778 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
779 Y1 -= c->yuv2rgb_y_offset;
780 Y2 -= c->yuv2rgb_y_offset;
781 Y1 *= c->yuv2rgb_y_coeff;
782 Y2 *= c->yuv2rgb_y_coeff;
785 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
787 R = V * c->yuv2rgb_v2r_coeff;
788 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
789 B = U * c->yuv2rgb_u2b_coeff;
791 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
792 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
793 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
794 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
795 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
796 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
797 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
802 static av_always_inline void
803 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
804 const int32_t *ubuf[2], const int32_t *vbuf[2],
805 const int32_t *abuf[2], uint16_t *dest, int dstW,
806 int yalpha, int uvalpha, int y,
807 enum PixelFormat target)
809 const int32_t *buf0 = buf[0], *buf1 = buf[1],
810 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
811 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
812 int yalpha1 = 4095 - yalpha;
813 int uvalpha1 = 4095 - uvalpha;
816 for (i = 0; i < (dstW >> 1); i++) {
817 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
818 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
819 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
820 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
823 Y1 -= c->yuv2rgb_y_offset;
824 Y2 -= c->yuv2rgb_y_offset;
825 Y1 *= c->yuv2rgb_y_coeff;
826 Y2 *= c->yuv2rgb_y_coeff;
830 R = V * c->yuv2rgb_v2r_coeff;
831 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
832 B = U * c->yuv2rgb_u2b_coeff;
834 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
835 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
836 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
837 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
838 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
839 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
844 static av_always_inline void
845 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
846 const int32_t *ubuf[2], const int32_t *vbuf[2],
847 const int32_t *abuf0, uint16_t *dest, int dstW,
848 int uvalpha, int y, enum PixelFormat target)
850 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
851 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
854 if (uvalpha < 2048) {
855 for (i = 0; i < (dstW >> 1); i++) {
856 int Y1 = (buf0[i * 2] ) >> 2;
857 int Y2 = (buf0[i * 2 + 1]) >> 2;
858 int U = (ubuf0[i] + (-128 << 11)) >> 2;
859 int V = (vbuf0[i] + (-128 << 11)) >> 2;
862 Y1 -= c->yuv2rgb_y_offset;
863 Y2 -= c->yuv2rgb_y_offset;
864 Y1 *= c->yuv2rgb_y_coeff;
865 Y2 *= c->yuv2rgb_y_coeff;
869 R = V * c->yuv2rgb_v2r_coeff;
870 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
871 B = U * c->yuv2rgb_u2b_coeff;
873 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
874 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
875 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
876 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
877 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
878 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
882 for (i = 0; i < (dstW >> 1); i++) {
883 int Y1 = (buf0[i * 2] ) >> 2;
884 int Y2 = (buf0[i * 2 + 1]) >> 2;
885 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
886 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
889 Y1 -= c->yuv2rgb_y_offset;
890 Y2 -= c->yuv2rgb_y_offset;
891 Y1 *= c->yuv2rgb_y_coeff;
892 Y2 *= c->yuv2rgb_y_coeff;
896 R = V * c->yuv2rgb_v2r_coeff;
897 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
898 B = U * c->yuv2rgb_u2b_coeff;
900 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
901 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
902 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
903 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
904 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
905 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
915 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
916 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
917 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
918 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
920 static av_always_inline void
921 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
922 unsigned U, unsigned V, unsigned A1, unsigned A2,
923 const void *_r, const void *_g, const void *_b, int y,
924 enum PixelFormat target, int hasAlpha)
926 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
927 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
928 uint32_t *dest = (uint32_t *) _dest;
929 const uint32_t *r = (const uint32_t *) _r;
930 const uint32_t *g = (const uint32_t *) _g;
931 const uint32_t *b = (const uint32_t *) _b;
934 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
936 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
937 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
940 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
942 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
943 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
945 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
946 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
949 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
950 uint8_t *dest = (uint8_t *) _dest;
951 const uint8_t *r = (const uint8_t *) _r;
952 const uint8_t *g = (const uint8_t *) _g;
953 const uint8_t *b = (const uint8_t *) _b;
955 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
956 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
957 dest[i * 6 + 0] = r_b[Y1];
958 dest[i * 6 + 1] = g[Y1];
959 dest[i * 6 + 2] = b_r[Y1];
960 dest[i * 6 + 3] = r_b[Y2];
961 dest[i * 6 + 4] = g[Y2];
962 dest[i * 6 + 5] = b_r[Y2];
965 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
966 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
967 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
968 uint16_t *dest = (uint16_t *) _dest;
969 const uint16_t *r = (const uint16_t *) _r;
970 const uint16_t *g = (const uint16_t *) _g;
971 const uint16_t *b = (const uint16_t *) _b;
972 int dr1, dg1, db1, dr2, dg2, db2;
974 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
975 dr1 = dither_2x2_8[ y & 1 ][0];
976 dg1 = dither_2x2_4[ y & 1 ][0];
977 db1 = dither_2x2_8[(y & 1) ^ 1][0];
978 dr2 = dither_2x2_8[ y & 1 ][1];
979 dg2 = dither_2x2_4[ y & 1 ][1];
980 db2 = dither_2x2_8[(y & 1) ^ 1][1];
981 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
982 dr1 = dither_2x2_8[ y & 1 ][0];
983 dg1 = dither_2x2_8[ y & 1 ][1];
984 db1 = dither_2x2_8[(y & 1) ^ 1][0];
985 dr2 = dither_2x2_8[ y & 1 ][1];
986 dg2 = dither_2x2_8[ y & 1 ][0];
987 db2 = dither_2x2_8[(y & 1) ^ 1][1];
989 dr1 = dither_4x4_16[ y & 3 ][0];
990 dg1 = dither_4x4_16[ y & 3 ][1];
991 db1 = dither_4x4_16[(y & 3) ^ 3][0];
992 dr2 = dither_4x4_16[ y & 3 ][1];
993 dg2 = dither_4x4_16[ y & 3 ][0];
994 db2 = dither_4x4_16[(y & 3) ^ 3][1];
997 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
998 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
999 } else /* 8/4-bit */ {
1000 uint8_t *dest = (uint8_t *) _dest;
1001 const uint8_t *r = (const uint8_t *) _r;
1002 const uint8_t *g = (const uint8_t *) _g;
1003 const uint8_t *b = (const uint8_t *) _b;
1004 int dr1, dg1, db1, dr2, dg2, db2;
1006 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1007 const uint8_t * const d64 = dither_8x8_73[y & 7];
1008 const uint8_t * const d32 = dither_8x8_32[y & 7];
1009 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1010 db1 = d64[(i * 2 + 0) & 7];
1011 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1012 db2 = d64[(i * 2 + 1) & 7];
1014 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1015 const uint8_t * const d128 = dither_8x8_220[y & 7];
1016 dr1 = db1 = d128[(i * 2 + 0) & 7];
1017 dg1 = d64[(i * 2 + 0) & 7];
1018 dr2 = db2 = d128[(i * 2 + 1) & 7];
1019 dg2 = d64[(i * 2 + 1) & 7];
1022 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1023 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1024 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1026 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1027 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1032 static av_always_inline void
1033 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1034 const int16_t **lumSrc, int lumFilterSize,
1035 const int16_t *chrFilter, const int16_t **chrUSrc,
1036 const int16_t **chrVSrc, int chrFilterSize,
1037 const int16_t **alpSrc, uint8_t *dest, int dstW,
1038 int y, enum PixelFormat target, int hasAlpha)
1042 for (i = 0; i < (dstW >> 1); i++) {
1048 int av_unused A1, A2;
1049 const void *r, *g, *b;
1051 for (j = 0; j < lumFilterSize; j++) {
1052 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1053 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1055 for (j = 0; j < chrFilterSize; j++) {
1056 U += chrUSrc[j][i] * chrFilter[j];
1057 V += chrVSrc[j][i] * chrFilter[j];
1063 if ((Y1 | Y2 | U | V) & 0x100) {
1064 Y1 = av_clip_uint8(Y1);
1065 Y2 = av_clip_uint8(Y2);
1066 U = av_clip_uint8(U);
1067 V = av_clip_uint8(V);
1072 for (j = 0; j < lumFilterSize; j++) {
1073 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1074 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1078 if ((A1 | A2) & 0x100) {
1079 A1 = av_clip_uint8(A1);
1080 A2 = av_clip_uint8(A2);
1084 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1086 g = (c->table_gU[U] + c->table_gV[V]);
1089 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1090 r, g, b, y, target, hasAlpha);
1094 static av_always_inline void
1095 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1096 const int16_t *ubuf[2], const int16_t *vbuf[2],
1097 const int16_t *abuf[2], uint8_t *dest, int dstW,
1098 int yalpha, int uvalpha, int y,
1099 enum PixelFormat target, int hasAlpha)
1101 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1102 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1103 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1104 *abuf0 = hasAlpha ? abuf[0] : NULL,
1105 *abuf1 = hasAlpha ? abuf[1] : NULL;
1106 int yalpha1 = 4095 - yalpha;
1107 int uvalpha1 = 4095 - uvalpha;
1110 for (i = 0; i < (dstW >> 1); i++) {
1111 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1112 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1113 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1114 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1116 const void *r = c->table_rV[V],
1117 *g = (c->table_gU[U] + c->table_gV[V]),
1118 *b = c->table_bU[U];
1121 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1122 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1125 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1126 r, g, b, y, target, hasAlpha);
1130 static av_always_inline void
1131 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1132 const int16_t *ubuf[2], const int16_t *vbuf[2],
1133 const int16_t *abuf0, uint8_t *dest, int dstW,
1134 int uvalpha, int y, enum PixelFormat target,
1137 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1141 if (uvalpha < 2048) {
1142 for (i = 0; i < (dstW >> 1); i++) {
1143 int Y1 = buf0[i * 2] >> 7;
1144 int Y2 = buf0[i * 2 + 1] >> 7;
1145 int U = ubuf1[i] >> 7;
1146 int V = vbuf1[i] >> 7;
1148 const void *r = c->table_rV[V],
1149 *g = (c->table_gU[U] + c->table_gV[V]),
1150 *b = c->table_bU[U];
1153 A1 = abuf0[i * 2 ] >> 7;
1154 A2 = abuf0[i * 2 + 1] >> 7;
1157 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1158 r, g, b, y, target, hasAlpha);
1161 for (i = 0; i < (dstW >> 1); i++) {
1162 int Y1 = buf0[i * 2] >> 7;
1163 int Y2 = buf0[i * 2 + 1] >> 7;
1164 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1165 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1167 const void *r = c->table_rV[V],
1168 *g = (c->table_gU[U] + c->table_gV[V]),
1169 *b = c->table_bU[U];
1172 A1 = abuf0[i * 2 ] >> 7;
1173 A2 = abuf0[i * 2 + 1] >> 7;
1176 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1177 r, g, b, y, target, hasAlpha);
1182 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1183 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1184 const int16_t **lumSrc, int lumFilterSize, \
1185 const int16_t *chrFilter, const int16_t **chrUSrc, \
1186 const int16_t **chrVSrc, int chrFilterSize, \
1187 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1190 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1191 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1192 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1194 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1195 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1196 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1197 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1198 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1199 int yalpha, int uvalpha, int y) \
1201 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1202 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1205 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1206 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1207 const int16_t *abuf0, uint8_t *dest, int dstW, \
1208 int uvalpha, int y) \
1210 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1211 dstW, uvalpha, y, fmt, hasAlpha); \
1215 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1216 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1218 #if CONFIG_SWSCALE_ALPHA
1219 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1220 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1222 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1223 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1225 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1226 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1227 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1228 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1229 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1230 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1231 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1232 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1234 static av_always_inline void
1235 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1236 const int16_t **lumSrc, int lumFilterSize,
1237 const int16_t *chrFilter, const int16_t **chrUSrc,
1238 const int16_t **chrVSrc, int chrFilterSize,
1239 const int16_t **alpSrc, uint8_t *dest,
1240 int dstW, int y, enum PixelFormat target, int hasAlpha)
1243 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1245 for (i = 0; i < dstW; i++) {
1253 for (j = 0; j < lumFilterSize; j++) {
1254 Y += lumSrc[j][i] * lumFilter[j];
1256 for (j = 0; j < chrFilterSize; j++) {
1257 U += chrUSrc[j][i] * chrFilter[j];
1258 V += chrVSrc[j][i] * chrFilter[j];
1265 for (j = 0; j < lumFilterSize; j++) {
1266 A += alpSrc[j][i] * lumFilter[j];
1270 A = av_clip_uint8(A);
1272 Y -= c->yuv2rgb_y_offset;
1273 Y *= c->yuv2rgb_y_coeff;
1275 R = Y + V*c->yuv2rgb_v2r_coeff;
1276 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1277 B = Y + U*c->yuv2rgb_u2b_coeff;
1278 if ((R | G | B) & 0xC0000000) {
1279 R = av_clip_uintp2(R, 30);
1280 G = av_clip_uintp2(G, 30);
1281 B = av_clip_uintp2(B, 30);
1286 dest[0] = hasAlpha ? A : 255;
1300 dest[3] = hasAlpha ? A : 255;
1303 dest[0] = hasAlpha ? A : 255;
1318 dest[3] = hasAlpha ? A : 255;
1326 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1327 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1328 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1329 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1331 #if CONFIG_SWSCALE_ALPHA
1332 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1333 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1334 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1335 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1337 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1338 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1339 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1340 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1342 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1343 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1345 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1346 int width, int height,
1350 uint8_t *ptr = plane + stride*y;
1351 for (i=0; i<height; i++) {
1352 memset(ptr, val, width);
1357 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1359 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1360 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1362 static av_always_inline void
1363 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1364 enum PixelFormat origin)
1367 for (i = 0; i < width; i++) {
1368 unsigned int r_b = input_pixel(&src[i*3+0]);
1369 unsigned int g = input_pixel(&src[i*3+1]);
1370 unsigned int b_r = input_pixel(&src[i*3+2]);
1372 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1376 static av_always_inline void
1377 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1378 const uint16_t *src1, const uint16_t *src2,
1379 int width, enum PixelFormat origin)
1383 for (i = 0; i < width; i++) {
1384 int r_b = input_pixel(&src1[i*3+0]);
1385 int g = input_pixel(&src1[i*3+1]);
1386 int b_r = input_pixel(&src1[i*3+2]);
1388 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1389 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1393 static av_always_inline void
1394 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1395 const uint16_t *src1, const uint16_t *src2,
1396 int width, enum PixelFormat origin)
1400 for (i = 0; i < width; i++) {
1401 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1402 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1403 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1405 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1406 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1414 #define rgb48funcs(pattern, BE_LE, origin) \
1415 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1416 int width, uint32_t *unused) \
1418 const uint16_t *src = (const uint16_t *) _src; \
1419 uint16_t *dst = (uint16_t *) _dst; \
1420 rgb48ToY_c_template(dst, src, width, origin); \
1423 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1424 const uint8_t *_src1, const uint8_t *_src2, \
1425 int width, uint32_t *unused) \
1427 const uint16_t *src1 = (const uint16_t *) _src1, \
1428 *src2 = (const uint16_t *) _src2; \
1429 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1430 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1433 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1434 const uint8_t *_src1, const uint8_t *_src2, \
1435 int width, uint32_t *unused) \
1437 const uint16_t *src1 = (const uint16_t *) _src1, \
1438 *src2 = (const uint16_t *) _src2; \
1439 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1440 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1443 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1444 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1445 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1446 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1448 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1449 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1450 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1452 static av_always_inline void
1453 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1454 int width, enum PixelFormat origin,
1455 int shr, int shg, int shb, int shp,
1456 int maskr, int maskg, int maskb,
1457 int rsh, int gsh, int bsh, int S)
1459 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1460 const unsigned rnd = 33u << (S - 1);
1463 for (i = 0; i < width; i++) {
1464 int px = input_pixel(i) >> shp;
1465 int b = (px & maskb) >> shb;
1466 int g = (px & maskg) >> shg;
1467 int r = (px & maskr) >> shr;
1469 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1473 static av_always_inline void
1474 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1475 const uint8_t *src, int width,
1476 enum PixelFormat origin,
1477 int shr, int shg, int shb, int shp,
1478 int maskr, int maskg, int maskb,
1479 int rsh, int gsh, int bsh, int S)
1481 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1482 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1483 const unsigned rnd = 257u << (S - 1);
1486 for (i = 0; i < width; i++) {
1487 int px = input_pixel(i) >> shp;
1488 int b = (px & maskb) >> shb;
1489 int g = (px & maskg) >> shg;
1490 int r = (px & maskr) >> shr;
1492 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1493 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1497 static av_always_inline void
1498 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1499 const uint8_t *src, int width,
1500 enum PixelFormat origin,
1501 int shr, int shg, int shb, int shp,
1502 int maskr, int maskg, int maskb,
1503 int rsh, int gsh, int bsh, int S)
1505 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1506 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1507 maskgx = ~(maskr | maskb);
1508 const unsigned rnd = 257u << S;
1511 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1512 for (i = 0; i < width; i++) {
1513 int px0 = input_pixel(2 * i + 0) >> shp;
1514 int px1 = input_pixel(2 * i + 1) >> shp;
1515 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1516 int rb = px0 + px1 - g;
1518 b = (rb & maskb) >> shb;
1519 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1520 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1523 g = (g & maskg) >> shg;
1525 r = (rb & maskr) >> shr;
1527 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1528 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1534 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1535 maskg, maskb, rsh, gsh, bsh, S) \
1536 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1537 int width, uint32_t *unused) \
1539 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1540 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1543 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1544 const uint8_t *src, const uint8_t *dummy, \
1545 int width, uint32_t *unused) \
1547 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1548 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1551 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1552 const uint8_t *src, const uint8_t *dummy, \
1553 int width, uint32_t *unused) \
1555 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1556 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1559 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1560 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1561 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1562 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1563 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1564 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1565 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1566 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1567 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1568 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1569 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1570 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1572 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1575 for (i=0; i<width; i++) {
1580 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1583 for (i=0; i<width; i++) {
1588 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1591 for (i=0; i<width; i++) {
1594 dst[i]= pal[d] & 0xFF;
1598 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1599 const uint8_t *src1, const uint8_t *src2,
1600 int width, uint32_t *pal)
1603 assert(src1 == src2);
1604 for (i=0; i<width; i++) {
1605 int p= pal[src1[i]];
1612 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1613 int width, uint32_t *unused)
1616 for (i=0; i<width/8; i++) {
1619 dst[8*i+j]= ((d>>(7-j))&1)*255;
1623 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1624 int width, uint32_t *unused)
1627 for (i=0; i<width/8; i++) {
1630 dst[8*i+j]= ((d>>(7-j))&1)*255;
1634 //FIXME yuy2* can read up to 7 samples too much
1636 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1640 for (i=0; i<width; i++)
1644 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1645 const uint8_t *src2, int width, uint32_t *unused)
1648 for (i=0; i<width; i++) {
1649 dstU[i]= src1[4*i + 1];
1650 dstV[i]= src1[4*i + 3];
1652 assert(src1 == src2);
1655 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1658 const uint16_t *src = (const uint16_t *) _src;
1659 uint16_t *dst = (uint16_t *) _dst;
1660 for (i=0; i<width; i++) {
1661 dst[i] = av_bswap16(src[i]);
1665 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1666 const uint8_t *_src2, int width, uint32_t *unused)
1669 const uint16_t *src1 = (const uint16_t *) _src1,
1670 *src2 = (const uint16_t *) _src2;
1671 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1672 for (i=0; i<width; i++) {
1673 dstU[i] = av_bswap16(src1[i]);
1674 dstV[i] = av_bswap16(src2[i]);
1678 /* This is almost identical to the previous, end exists only because
1679 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1680 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1684 for (i=0; i<width; i++)
1688 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1689 const uint8_t *src2, int width, uint32_t *unused)
1692 for (i=0; i<width; i++) {
1693 dstU[i]= src1[4*i + 0];
1694 dstV[i]= src1[4*i + 2];
1696 assert(src1 == src2);
1699 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1700 const uint8_t *src, int width)
1703 for (i = 0; i < width; i++) {
1704 dst1[i] = src[2*i+0];
1705 dst2[i] = src[2*i+1];
1709 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1710 const uint8_t *src1, const uint8_t *src2,
1711 int width, uint32_t *unused)
1713 nvXXtoUV_c(dstU, dstV, src1, width);
1716 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1717 const uint8_t *src1, const uint8_t *src2,
1718 int width, uint32_t *unused)
1720 nvXXtoUV_c(dstV, dstU, src1, width);
1723 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1725 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1726 int width, uint32_t *unused)
1729 for (i=0; i<width; i++) {
1734 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1738 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1739 const uint8_t *src2, int width, uint32_t *unused)
1742 for (i=0; i<width; i++) {
1743 int b= src1[3*i + 0];
1744 int g= src1[3*i + 1];
1745 int r= src1[3*i + 2];
1747 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1748 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1750 assert(src1 == src2);
1753 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1754 const uint8_t *src2, int width, uint32_t *unused)
1757 for (i=0; i<width; i++) {
1758 int b= src1[6*i + 0] + src1[6*i + 3];
1759 int g= src1[6*i + 1] + src1[6*i + 4];
1760 int r= src1[6*i + 2] + src1[6*i + 5];
1762 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1763 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1765 assert(src1 == src2);
1768 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1772 for (i=0; i<width; i++) {
1777 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1781 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1782 const uint8_t *src2, int width, uint32_t *unused)
1786 for (i=0; i<width; i++) {
1787 int r= src1[3*i + 0];
1788 int g= src1[3*i + 1];
1789 int b= src1[3*i + 2];
1791 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1792 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1796 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1797 const uint8_t *src2, int width, uint32_t *unused)
1801 for (i=0; i<width; i++) {
1802 int r= src1[6*i + 0] + src1[6*i + 3];
1803 int g= src1[6*i + 1] + src1[6*i + 4];
1804 int b= src1[6*i + 2] + src1[6*i + 5];
1806 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1807 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1811 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1814 for (i = 0; i < width; i++) {
1819 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1823 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1826 const uint16_t **src = (const uint16_t **) _src;
1827 uint16_t *dst = (uint16_t *) _dst;
1828 for (i = 0; i < width; i++) {
1829 int g = AV_RL16(src[0] + i);
1830 int b = AV_RL16(src[1] + i);
1831 int r = AV_RL16(src[2] + i);
1833 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1837 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1840 const uint16_t **src = (const uint16_t **) _src;
1841 uint16_t *dst = (uint16_t *) _dst;
1842 for (i = 0; i < width; i++) {
1843 int g = AV_RB16(src[0] + i);
1844 int b = AV_RB16(src[1] + i);
1845 int r = AV_RB16(src[2] + i);
1847 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1851 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1854 for (i = 0; i < width; i++) {
1859 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1860 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1864 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1867 const uint16_t **src = (const uint16_t **) _src;
1868 uint16_t *dstU = (uint16_t *) _dstU;
1869 uint16_t *dstV = (uint16_t *) _dstV;
1870 for (i = 0; i < width; i++) {
1871 int g = AV_RL16(src[0] + i);
1872 int b = AV_RL16(src[1] + i);
1873 int r = AV_RL16(src[2] + i);
1875 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1876 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1880 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1883 const uint16_t **src = (const uint16_t **) _src;
1884 uint16_t *dstU = (uint16_t *) _dstU;
1885 uint16_t *dstV = (uint16_t *) _dstV;
1886 for (i = 0; i < width; i++) {
1887 int g = AV_RB16(src[0] + i);
1888 int b = AV_RB16(src[1] + i);
1889 int r = AV_RB16(src[2] + i);
1891 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1892 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1896 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1897 const int16_t *filter,
1898 const int16_t *filterPos, int filterSize)
1901 int32_t *dst = (int32_t *) _dst;
1902 const uint16_t *src = (const uint16_t *) _src;
1903 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1906 for (i = 0; i < dstW; i++) {
1908 int srcPos = filterPos[i];
1911 for (j = 0; j < filterSize; j++) {
1912 val += src[srcPos + j] * filter[filterSize * i + j];
1914 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1915 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1919 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1920 const int16_t *filter,
1921 const int16_t *filterPos, int filterSize)
1924 const uint16_t *src = (const uint16_t *) _src;
1925 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1927 for (i = 0; i < dstW; i++) {
1929 int srcPos = filterPos[i];
1932 for (j = 0; j < filterSize; j++) {
1933 val += src[srcPos + j] * filter[filterSize * i + j];
1935 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1936 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1940 // bilinear / bicubic scaling
1941 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1942 const int16_t *filter, const int16_t *filterPos,
1946 for (i=0; i<dstW; i++) {
1948 int srcPos= filterPos[i];
1950 for (j=0; j<filterSize; j++) {
1951 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1953 //filter += hFilterSize;
1954 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1959 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1960 const int16_t *filter, const int16_t *filterPos,
1964 int32_t *dst = (int32_t *) _dst;
1965 for (i=0; i<dstW; i++) {
1967 int srcPos= filterPos[i];
1969 for (j=0; j<filterSize; j++) {
1970 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1972 //filter += hFilterSize;
1973 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1978 //FIXME all pal and rgb srcFormats could do this convertion as well
1979 //FIXME all scalers more complex than bilinear could do half of this transform
1980 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1983 for (i = 0; i < width; i++) {
1984 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1985 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1988 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1991 for (i = 0; i < width; i++) {
1992 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1993 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1996 static void lumRangeToJpeg_c(int16_t *dst, int width)
1999 for (i = 0; i < width; i++)
2000 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2002 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2005 for (i = 0; i < width; i++)
2006 dst[i] = (dst[i]*14071 + 33561947)>>14;
2009 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2012 int32_t *dstU = (int32_t *) _dstU;
2013 int32_t *dstV = (int32_t *) _dstV;
2014 for (i = 0; i < width; i++) {
2015 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2016 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2019 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2022 int32_t *dstU = (int32_t *) _dstU;
2023 int32_t *dstV = (int32_t *) _dstV;
2024 for (i = 0; i < width; i++) {
2025 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2026 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2029 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2032 int32_t *dst = (int32_t *) _dst;
2033 for (i = 0; i < width; i++)
2034 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2036 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2039 int32_t *dst = (int32_t *) _dst;
2040 for (i = 0; i < width; i++)
2041 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2044 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2045 const uint8_t *src, int srcW, int xInc)
2048 unsigned int xpos=0;
2049 for (i=0;i<dstWidth;i++) {
2050 register unsigned int xx=xpos>>16;
2051 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2052 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2057 // *** horizontal scale Y line to temp buffer
2058 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2059 const uint8_t *src_in[4], int srcW, int xInc,
2060 const int16_t *hLumFilter,
2061 const int16_t *hLumFilterPos, int hLumFilterSize,
2062 uint8_t *formatConvBuffer,
2063 uint32_t *pal, int isAlpha)
2065 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2066 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2067 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2070 toYV12(formatConvBuffer, src, srcW, pal);
2071 src= formatConvBuffer;
2072 } else if (c->readLumPlanar && !isAlpha) {
2073 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2074 src = formatConvBuffer;
2077 if (!c->hyscale_fast) {
2078 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2079 } else { // fast bilinear upscale / crap downscale
2080 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2084 convertRange(dst, dstWidth);
2087 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2088 int dstWidth, const uint8_t *src1,
2089 const uint8_t *src2, int srcW, int xInc)
2092 unsigned int xpos=0;
2093 for (i=0;i<dstWidth;i++) {
2094 register unsigned int xx=xpos>>16;
2095 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2096 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2097 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2102 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2103 const uint8_t *src_in[4],
2104 int srcW, int xInc, const int16_t *hChrFilter,
2105 const int16_t *hChrFilterPos, int hChrFilterSize,
2106 uint8_t *formatConvBuffer, uint32_t *pal)
2108 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2110 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2111 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2112 src1= formatConvBuffer;
2114 } else if (c->readChrPlanar) {
2115 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2116 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2117 src1= formatConvBuffer;
2121 if (!c->hcscale_fast) {
2122 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2123 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2124 } else { // fast bilinear upscale / crap downscale
2125 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2128 if (c->chrConvertRange)
2129 c->chrConvertRange(dst1, dst2, dstWidth);
2132 static av_always_inline void
2133 find_c_packed_planar_out_funcs(SwsContext *c,
2134 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2135 yuv2interleavedX_fn *yuv2nv12cX,
2136 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2137 yuv2packedX_fn *yuv2packedX)
2139 enum PixelFormat dstFormat = c->dstFormat;
2141 if (is16BPS(dstFormat)) {
2142 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2143 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2144 } else if (is9_OR_10BPS(dstFormat)) {
2145 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2146 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2147 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2149 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2150 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2153 *yuv2plane1 = yuv2plane1_8_c;
2154 *yuv2planeX = yuv2planeX_8_c;
2155 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2156 *yuv2nv12cX = yuv2nv12cX_c;
2159 if(c->flags & SWS_FULL_CHR_H_INT) {
2160 switch (dstFormat) {
2163 *yuv2packedX = yuv2rgba32_full_X_c;
2165 #if CONFIG_SWSCALE_ALPHA
2167 *yuv2packedX = yuv2rgba32_full_X_c;
2169 #endif /* CONFIG_SWSCALE_ALPHA */
2171 *yuv2packedX = yuv2rgbx32_full_X_c;
2173 #endif /* !CONFIG_SMALL */
2177 *yuv2packedX = yuv2argb32_full_X_c;
2179 #if CONFIG_SWSCALE_ALPHA
2181 *yuv2packedX = yuv2argb32_full_X_c;
2183 #endif /* CONFIG_SWSCALE_ALPHA */
2185 *yuv2packedX = yuv2xrgb32_full_X_c;
2187 #endif /* !CONFIG_SMALL */
2191 *yuv2packedX = yuv2bgra32_full_X_c;
2193 #if CONFIG_SWSCALE_ALPHA
2195 *yuv2packedX = yuv2bgra32_full_X_c;
2197 #endif /* CONFIG_SWSCALE_ALPHA */
2199 *yuv2packedX = yuv2bgrx32_full_X_c;
2201 #endif /* !CONFIG_SMALL */
2205 *yuv2packedX = yuv2abgr32_full_X_c;
2207 #if CONFIG_SWSCALE_ALPHA
2209 *yuv2packedX = yuv2abgr32_full_X_c;
2211 #endif /* CONFIG_SWSCALE_ALPHA */
2213 *yuv2packedX = yuv2xbgr32_full_X_c;
2215 #endif /* !CONFIG_SMALL */
2218 *yuv2packedX = yuv2rgb24_full_X_c;
2221 *yuv2packedX = yuv2bgr24_full_X_c;
2225 switch (dstFormat) {
2226 case PIX_FMT_RGB48LE:
2227 *yuv2packed1 = yuv2rgb48le_1_c;
2228 *yuv2packed2 = yuv2rgb48le_2_c;
2229 *yuv2packedX = yuv2rgb48le_X_c;
2231 case PIX_FMT_RGB48BE:
2232 *yuv2packed1 = yuv2rgb48be_1_c;
2233 *yuv2packed2 = yuv2rgb48be_2_c;
2234 *yuv2packedX = yuv2rgb48be_X_c;
2236 case PIX_FMT_BGR48LE:
2237 *yuv2packed1 = yuv2bgr48le_1_c;
2238 *yuv2packed2 = yuv2bgr48le_2_c;
2239 *yuv2packedX = yuv2bgr48le_X_c;
2241 case PIX_FMT_BGR48BE:
2242 *yuv2packed1 = yuv2bgr48be_1_c;
2243 *yuv2packed2 = yuv2bgr48be_2_c;
2244 *yuv2packedX = yuv2bgr48be_X_c;
2249 *yuv2packed1 = yuv2rgb32_1_c;
2250 *yuv2packed2 = yuv2rgb32_2_c;
2251 *yuv2packedX = yuv2rgb32_X_c;
2253 #if CONFIG_SWSCALE_ALPHA
2255 *yuv2packed1 = yuv2rgba32_1_c;
2256 *yuv2packed2 = yuv2rgba32_2_c;
2257 *yuv2packedX = yuv2rgba32_X_c;
2259 #endif /* CONFIG_SWSCALE_ALPHA */
2261 *yuv2packed1 = yuv2rgbx32_1_c;
2262 *yuv2packed2 = yuv2rgbx32_2_c;
2263 *yuv2packedX = yuv2rgbx32_X_c;
2265 #endif /* !CONFIG_SMALL */
2267 case PIX_FMT_RGB32_1:
2268 case PIX_FMT_BGR32_1:
2270 *yuv2packed1 = yuv2rgb32_1_1_c;
2271 *yuv2packed2 = yuv2rgb32_1_2_c;
2272 *yuv2packedX = yuv2rgb32_1_X_c;
2274 #if CONFIG_SWSCALE_ALPHA
2276 *yuv2packed1 = yuv2rgba32_1_1_c;
2277 *yuv2packed2 = yuv2rgba32_1_2_c;
2278 *yuv2packedX = yuv2rgba32_1_X_c;
2280 #endif /* CONFIG_SWSCALE_ALPHA */
2282 *yuv2packed1 = yuv2rgbx32_1_1_c;
2283 *yuv2packed2 = yuv2rgbx32_1_2_c;
2284 *yuv2packedX = yuv2rgbx32_1_X_c;
2286 #endif /* !CONFIG_SMALL */
2289 *yuv2packed1 = yuv2rgb24_1_c;
2290 *yuv2packed2 = yuv2rgb24_2_c;
2291 *yuv2packedX = yuv2rgb24_X_c;
2294 *yuv2packed1 = yuv2bgr24_1_c;
2295 *yuv2packed2 = yuv2bgr24_2_c;
2296 *yuv2packedX = yuv2bgr24_X_c;
2298 case PIX_FMT_RGB565LE:
2299 case PIX_FMT_RGB565BE:
2300 case PIX_FMT_BGR565LE:
2301 case PIX_FMT_BGR565BE:
2302 *yuv2packed1 = yuv2rgb16_1_c;
2303 *yuv2packed2 = yuv2rgb16_2_c;
2304 *yuv2packedX = yuv2rgb16_X_c;
2306 case PIX_FMT_RGB555LE:
2307 case PIX_FMT_RGB555BE:
2308 case PIX_FMT_BGR555LE:
2309 case PIX_FMT_BGR555BE:
2310 *yuv2packed1 = yuv2rgb15_1_c;
2311 *yuv2packed2 = yuv2rgb15_2_c;
2312 *yuv2packedX = yuv2rgb15_X_c;
2314 case PIX_FMT_RGB444LE:
2315 case PIX_FMT_RGB444BE:
2316 case PIX_FMT_BGR444LE:
2317 case PIX_FMT_BGR444BE:
2318 *yuv2packed1 = yuv2rgb12_1_c;
2319 *yuv2packed2 = yuv2rgb12_2_c;
2320 *yuv2packedX = yuv2rgb12_X_c;
2324 *yuv2packed1 = yuv2rgb8_1_c;
2325 *yuv2packed2 = yuv2rgb8_2_c;
2326 *yuv2packedX = yuv2rgb8_X_c;
2330 *yuv2packed1 = yuv2rgb4_1_c;
2331 *yuv2packed2 = yuv2rgb4_2_c;
2332 *yuv2packedX = yuv2rgb4_X_c;
2334 case PIX_FMT_RGB4_BYTE:
2335 case PIX_FMT_BGR4_BYTE:
2336 *yuv2packed1 = yuv2rgb4b_1_c;
2337 *yuv2packed2 = yuv2rgb4b_2_c;
2338 *yuv2packedX = yuv2rgb4b_X_c;
2342 switch (dstFormat) {
2343 case PIX_FMT_GRAY16BE:
2344 *yuv2packed1 = yuv2gray16BE_1_c;
2345 *yuv2packed2 = yuv2gray16BE_2_c;
2346 *yuv2packedX = yuv2gray16BE_X_c;
2348 case PIX_FMT_GRAY16LE:
2349 *yuv2packed1 = yuv2gray16LE_1_c;
2350 *yuv2packed2 = yuv2gray16LE_2_c;
2351 *yuv2packedX = yuv2gray16LE_X_c;
2353 case PIX_FMT_MONOWHITE:
2354 *yuv2packed1 = yuv2monowhite_1_c;
2355 *yuv2packed2 = yuv2monowhite_2_c;
2356 *yuv2packedX = yuv2monowhite_X_c;
2358 case PIX_FMT_MONOBLACK:
2359 *yuv2packed1 = yuv2monoblack_1_c;
2360 *yuv2packed2 = yuv2monoblack_2_c;
2361 *yuv2packedX = yuv2monoblack_X_c;
2363 case PIX_FMT_YUYV422:
2364 *yuv2packed1 = yuv2yuyv422_1_c;
2365 *yuv2packed2 = yuv2yuyv422_2_c;
2366 *yuv2packedX = yuv2yuyv422_X_c;
2368 case PIX_FMT_UYVY422:
2369 *yuv2packed1 = yuv2uyvy422_1_c;
2370 *yuv2packed2 = yuv2uyvy422_2_c;
2371 *yuv2packedX = yuv2uyvy422_X_c;
2376 #define DEBUG_SWSCALE_BUFFERS 0
2377 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2379 static int swScale(SwsContext *c, const uint8_t* src[],
2380 int srcStride[], int srcSliceY,
2381 int srcSliceH, uint8_t* dst[], int dstStride[])
2383 /* load a few things into local vars to make the code more readable? and faster */
2384 const int srcW= c->srcW;
2385 const int dstW= c->dstW;
2386 const int dstH= c->dstH;
2387 const int chrDstW= c->chrDstW;
2388 const int chrSrcW= c->chrSrcW;
2389 const int lumXInc= c->lumXInc;
2390 const int chrXInc= c->chrXInc;
2391 const enum PixelFormat dstFormat= c->dstFormat;
2392 const int flags= c->flags;
2393 int16_t *vLumFilterPos= c->vLumFilterPos;
2394 int16_t *vChrFilterPos= c->vChrFilterPos;
2395 int16_t *hLumFilterPos= c->hLumFilterPos;
2396 int16_t *hChrFilterPos= c->hChrFilterPos;
2397 int16_t *vLumFilter= c->vLumFilter;
2398 int16_t *vChrFilter= c->vChrFilter;
2399 int16_t *hLumFilter= c->hLumFilter;
2400 int16_t *hChrFilter= c->hChrFilter;
2401 int32_t *lumMmxFilter= c->lumMmxFilter;
2402 int32_t *chrMmxFilter= c->chrMmxFilter;
2403 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2404 const int vLumFilterSize= c->vLumFilterSize;
2405 const int vChrFilterSize= c->vChrFilterSize;
2406 const int hLumFilterSize= c->hLumFilterSize;
2407 const int hChrFilterSize= c->hChrFilterSize;
2408 int16_t **lumPixBuf= c->lumPixBuf;
2409 int16_t **chrUPixBuf= c->chrUPixBuf;
2410 int16_t **chrVPixBuf= c->chrVPixBuf;
2411 int16_t **alpPixBuf= c->alpPixBuf;
2412 const int vLumBufSize= c->vLumBufSize;
2413 const int vChrBufSize= c->vChrBufSize;
2414 uint8_t *formatConvBuffer= c->formatConvBuffer;
2415 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2416 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2418 uint32_t *pal=c->pal_yuv;
2419 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2420 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2421 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2422 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2423 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2424 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2425 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2427 /* vars which will change and which we need to store back in the context */
2429 int lumBufIndex= c->lumBufIndex;
2430 int chrBufIndex= c->chrBufIndex;
2431 int lastInLumBuf= c->lastInLumBuf;
2432 int lastInChrBuf= c->lastInChrBuf;
2434 if (isPacked(c->srcFormat)) {
2442 srcStride[3]= srcStride[0];
2444 srcStride[1]<<= c->vChrDrop;
2445 srcStride[2]<<= c->vChrDrop;
2447 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2448 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2449 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2450 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2451 srcSliceY, srcSliceH, dstY, dstH);
2452 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2453 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2455 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2456 static int warnedAlready=0; //FIXME move this into the context perhaps
2457 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2458 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2459 " ->cannot do aligned memory accesses anymore\n");
2464 /* Note the user might start scaling the picture in the middle so this
2465 will not get executed. This is not really intended but works
2466 currently, so people might do it. */
2467 if (srcSliceY ==0) {
2475 if (!should_dither) {
2476 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2480 for (;dstY < dstH; dstY++) {
2481 const int chrDstY= dstY>>c->chrDstVSubSample;
2482 uint8_t *dest[4] = {
2483 dst[0] + dstStride[0] * dstY,
2484 dst[1] + dstStride[1] * chrDstY,
2485 dst[2] + dstStride[2] * chrDstY,
2486 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2489 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2490 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2491 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2493 // Last line needed as input
2494 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
2495 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
2496 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
2499 //handle holes (FAST_BILINEAR & weird filters)
2500 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2501 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2502 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2503 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2505 DEBUG_BUFFERS("dstY: %d\n", dstY);
2506 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2507 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2508 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2509 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2511 // Do we have enough lines in this slice to output the dstY line
2512 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2514 if (!enough_lines) {
2515 lastLumSrcY = srcSliceY + srcSliceH - 1;
2516 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2517 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2518 lastLumSrcY, lastChrSrcY);
2521 //Do horizontal scaling
2522 while(lastInLumBuf < lastLumSrcY) {
2523 const uint8_t *src1[4] = {
2524 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2525 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2526 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2527 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2530 assert(lumBufIndex < 2*vLumBufSize);
2531 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2532 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2533 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2534 hLumFilter, hLumFilterPos, hLumFilterSize,
2537 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2538 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2539 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2543 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2544 lumBufIndex, lastInLumBuf);
2546 while(lastInChrBuf < lastChrSrcY) {
2547 const uint8_t *src1[4] = {
2548 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2549 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2550 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2551 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2554 assert(chrBufIndex < 2*vChrBufSize);
2555 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2556 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2557 //FIXME replace parameters through context struct (some at least)
2559 if (c->needs_hcscale)
2560 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2561 chrDstW, src1, chrSrcW, chrXInc,
2562 hChrFilter, hChrFilterPos, hChrFilterSize,
2563 formatConvBuffer, pal);
2565 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2566 chrBufIndex, lastInChrBuf);
2568 //wrap buf index around to stay inside the ring buffer
2569 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2570 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2572 break; //we can't output a dstY line so let's try with the next slice
2575 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2577 if (should_dither) {
2578 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2579 c->lumDither8 = dither_8x8_128[dstY & 7];
2581 if (dstY >= dstH-2) {
2582 // hmm looks like we can't use MMX here without overwriting this array's tail
2583 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2584 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2588 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2589 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2590 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2591 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2593 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2594 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2595 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2596 for (i = 0; i < neg; i++)
2597 tmpY[i] = lumSrcPtr[neg];
2598 for ( ; i < end; i++)
2599 tmpY[i] = lumSrcPtr[i];
2600 for ( ; i < vLumFilterSize; i++)
2601 tmpY[i] = tmpY[i-1];
2605 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2606 for (i = 0; i < neg; i++)
2607 tmpA[i] = alpSrcPtr[neg];
2608 for ( ; i < end; i++)
2609 tmpA[i] = alpSrcPtr[i];
2610 for ( ; i < vLumFilterSize; i++)
2611 tmpA[i] = tmpA[i - 1];
2615 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2616 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2617 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2618 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2619 for (i = 0; i < neg; i++) {
2620 tmpU[i] = chrUSrcPtr[neg];
2621 tmpV[i] = chrVSrcPtr[neg];
2623 for ( ; i < end; i++) {
2624 tmpU[i] = chrUSrcPtr[i];
2625 tmpV[i] = chrVSrcPtr[i];
2627 for ( ; i < vChrFilterSize; i++) {
2628 tmpU[i] = tmpU[i - 1];
2629 tmpV[i] = tmpV[i - 1];
2635 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2636 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2638 if (vLumFilterSize == 1) {
2639 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2641 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2642 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2645 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2647 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2648 } else if (vChrFilterSize == 1) {
2649 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2650 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2652 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2653 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2654 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2655 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2659 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2660 if (vLumFilterSize == 1) {
2661 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2663 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2664 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2668 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2669 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2670 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2671 int chrAlpha = vChrFilter[2 * dstY + 1];
2672 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2673 alpPixBuf ? *alpSrcPtr : NULL,
2674 dest[0], dstW, chrAlpha, dstY);
2675 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2676 int lumAlpha = vLumFilter[2 * dstY + 1];
2677 int chrAlpha = vChrFilter[2 * dstY + 1];
2679 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2681 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2682 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2683 alpPixBuf ? alpSrcPtr : NULL,
2684 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2685 } else { //general RGB
2686 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2687 lumSrcPtr, vLumFilterSize,
2688 vChrFilter + dstY * vChrFilterSize,
2689 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2690 alpSrcPtr, dest[0], dstW, dstY);
2696 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2697 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2700 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2701 __asm__ volatile("sfence":::"memory");
2705 /* store changed local vars back in the context */
2707 c->lumBufIndex= lumBufIndex;
2708 c->chrBufIndex= chrBufIndex;
2709 c->lastInLumBuf= lastInLumBuf;
2710 c->lastInChrBuf= lastInChrBuf;
2712 return dstY - lastDstY;
2715 static av_cold void sws_init_swScale_c(SwsContext *c)
2717 enum PixelFormat srcFormat = c->srcFormat;
2719 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2720 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2723 c->chrToYV12 = NULL;
2725 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2726 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2727 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2728 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2732 case PIX_FMT_BGR4_BYTE:
2733 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2734 case PIX_FMT_GBRP9LE:
2735 case PIX_FMT_GBRP10LE:
2736 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2737 case PIX_FMT_GBRP9BE:
2738 case PIX_FMT_GBRP10BE:
2739 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2740 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2742 case PIX_FMT_YUV444P9LE:
2743 case PIX_FMT_YUV422P9LE:
2744 case PIX_FMT_YUV420P9LE:
2745 case PIX_FMT_YUV422P10LE:
2746 case PIX_FMT_YUV444P10LE:
2747 case PIX_FMT_YUV420P10LE:
2748 case PIX_FMT_YUV420P16LE:
2749 case PIX_FMT_YUV422P16LE:
2750 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2752 case PIX_FMT_YUV444P9BE:
2753 case PIX_FMT_YUV422P9BE:
2754 case PIX_FMT_YUV420P9BE:
2755 case PIX_FMT_YUV444P10BE:
2756 case PIX_FMT_YUV422P10BE:
2757 case PIX_FMT_YUV420P10BE:
2758 case PIX_FMT_YUV420P16BE:
2759 case PIX_FMT_YUV422P16BE:
2760 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2763 if (c->chrSrcHSubSample) {
2765 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2766 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2767 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2768 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2769 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2770 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2771 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2772 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2773 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2774 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2775 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2776 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2777 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2778 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2779 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2780 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2781 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2782 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2786 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2787 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2788 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2789 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2790 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2791 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2792 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2793 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2794 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2795 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2796 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2797 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2798 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2799 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2800 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2801 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2802 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2803 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2807 c->lumToYV12 = NULL;
2808 c->alpToYV12 = NULL;
2809 switch (srcFormat) {
2810 case PIX_FMT_GBRP9LE:
2811 case PIX_FMT_GBRP10LE:
2812 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2813 case PIX_FMT_GBRP9BE:
2814 case PIX_FMT_GBRP10BE:
2815 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2816 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2818 case PIX_FMT_YUV444P9LE:
2819 case PIX_FMT_YUV422P9LE:
2820 case PIX_FMT_YUV420P9LE:
2821 case PIX_FMT_YUV444P10LE:
2822 case PIX_FMT_YUV422P10LE:
2823 case PIX_FMT_YUV420P10LE:
2824 case PIX_FMT_YUV420P16LE:
2825 case PIX_FMT_YUV422P16LE:
2826 case PIX_FMT_YUV444P16LE:
2827 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2829 case PIX_FMT_YUV444P9BE:
2830 case PIX_FMT_YUV422P9BE:
2831 case PIX_FMT_YUV420P9BE:
2832 case PIX_FMT_YUV444P10BE:
2833 case PIX_FMT_YUV422P10BE:
2834 case PIX_FMT_YUV420P10BE:
2835 case PIX_FMT_YUV420P16BE:
2836 case PIX_FMT_YUV422P16BE:
2837 case PIX_FMT_YUV444P16BE:
2838 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2840 case PIX_FMT_YUYV422 :
2841 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2842 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2843 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2844 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2845 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2846 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2847 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2848 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2849 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2850 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2851 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2852 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2856 case PIX_FMT_BGR4_BYTE:
2857 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2858 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2859 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2860 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2861 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2862 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2863 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2864 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2865 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2866 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2867 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2870 switch (srcFormat) {
2872 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2874 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2875 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2879 if (c->srcBpc == 8) {
2880 if (c->dstBpc <= 10) {
2881 c->hyScale = c->hcScale = hScale8To15_c;
2882 if (c->flags & SWS_FAST_BILINEAR) {
2883 c->hyscale_fast = hyscale_fast_c;
2884 c->hcscale_fast = hcscale_fast_c;
2887 c->hyScale = c->hcScale = hScale8To19_c;
2890 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2893 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2894 if (c->dstBpc <= 10) {
2896 c->lumConvertRange = lumRangeFromJpeg_c;
2897 c->chrConvertRange = chrRangeFromJpeg_c;
2899 c->lumConvertRange = lumRangeToJpeg_c;
2900 c->chrConvertRange = chrRangeToJpeg_c;
2904 c->lumConvertRange = lumRangeFromJpeg16_c;
2905 c->chrConvertRange = chrRangeFromJpeg16_c;
2907 c->lumConvertRange = lumRangeToJpeg16_c;
2908 c->chrConvertRange = chrRangeToJpeg16_c;
2913 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2914 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2915 c->needs_hcscale = 1;
2918 SwsFunc ff_getSwsFunc(SwsContext *c)
2920 sws_init_swScale_c(c);
2923 ff_sws_init_swScale_mmx(c);
2925 ff_sws_init_swScale_altivec(c);