2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
199 { 0, 1, 0, 1, 0, 1, 0, 1,},
200 { 1, 0, 1, 0, 1, 0, 1, 0,},
201 { 0, 1, 0, 1, 0, 1, 0, 1,},
202 { 1, 0, 1, 0, 1, 0, 1, 0,},
203 { 0, 1, 0, 1, 0, 1, 0, 1,},
204 { 1, 0, 1, 0, 1, 0, 1, 0,},
205 { 0, 1, 0, 1, 0, 1, 0, 1,},
206 { 1, 0, 1, 0, 1, 0, 1, 0,},
208 { 1, 2, 1, 2, 1, 2, 1, 2,},
209 { 3, 0, 3, 0, 3, 0, 3, 0,},
210 { 1, 2, 1, 2, 1, 2, 1, 2,},
211 { 3, 0, 3, 0, 3, 0, 3, 0,},
212 { 1, 2, 1, 2, 1, 2, 1, 2,},
213 { 3, 0, 3, 0, 3, 0, 3, 0,},
214 { 1, 2, 1, 2, 1, 2, 1, 2,},
215 { 3, 0, 3, 0, 3, 0, 3, 0,},
217 { 2, 4, 3, 5, 2, 4, 3, 5,},
218 { 6, 0, 7, 1, 6, 0, 7, 1,},
219 { 3, 5, 2, 4, 3, 5, 2, 4,},
220 { 7, 1, 6, 0, 7, 1, 6, 0,},
221 { 2, 4, 3, 5, 2, 4, 3, 5,},
222 { 6, 0, 7, 1, 6, 0, 7, 1,},
223 { 3, 5, 2, 4, 3, 5, 2, 4,},
224 { 7, 1, 6, 0, 7, 1, 6, 0,},
226 { 4, 8, 7, 11, 4, 8, 7, 11,},
227 { 12, 0, 15, 3, 12, 0, 15, 3,},
228 { 6, 10, 5, 9, 6, 10, 5, 9,},
229 { 14, 2, 13, 1, 14, 2, 13, 1,},
230 { 4, 8, 7, 11, 4, 8, 7, 11,},
231 { 12, 0, 15, 3, 12, 0, 15, 3,},
232 { 6, 10, 5, 9, 6, 10, 5, 9,},
233 { 14, 2, 13, 1, 14, 2, 13, 1,},
235 { 9, 17, 15, 23, 8, 16, 14, 22,},
236 { 25, 1, 31, 7, 24, 0, 30, 6,},
237 { 13, 21, 11, 19, 12, 20, 10, 18,},
238 { 29, 5, 27, 3, 28, 4, 26, 2,},
239 { 8, 16, 14, 22, 9, 17, 15, 23,},
240 { 24, 0, 30, 6, 25, 1, 31, 7,},
241 { 12, 20, 10, 18, 13, 21, 11, 19,},
242 { 28, 4, 26, 2, 29, 5, 27, 3,},
244 { 18, 34, 30, 46, 17, 33, 29, 45,},
245 { 50, 2, 62, 14, 49, 1, 61, 13,},
246 { 26, 42, 22, 38, 25, 41, 21, 37,},
247 { 58, 10, 54, 6, 57, 9, 53, 5,},
248 { 16, 32, 28, 44, 19, 35, 31, 47,},
249 { 48, 0, 60, 12, 51, 3, 63, 15,},
250 { 24, 40, 20, 36, 27, 43, 23, 39,},
251 { 56, 8, 52, 4, 59, 11, 55, 7,},
253 { 18, 34, 30, 46, 17, 33, 29, 45,},
254 { 50, 2, 62, 14, 49, 1, 61, 13,},
255 { 26, 42, 22, 38, 25, 41, 21, 37,},
256 { 58, 10, 54, 6, 57, 9, 53, 5,},
257 { 16, 32, 28, 44, 19, 35, 31, 47,},
258 { 48, 0, 60, 12, 51, 3, 63, 15,},
259 { 24, 40, 20, 36, 27, 43, 23, 39,},
260 { 56, 8, 52, 4, 59, 11, 55, 7,},
262 { 36, 68, 60, 92, 34, 66, 58, 90,},
263 { 100, 4,124, 28, 98, 2,122, 26,},
264 { 52, 84, 44, 76, 50, 82, 42, 74,},
265 { 116, 20,108, 12,114, 18,106, 10,},
266 { 32, 64, 56, 88, 38, 70, 62, 94,},
267 { 96, 0,120, 24,102, 6,126, 30,},
268 { 48, 80, 40, 72, 54, 86, 46, 78,},
269 { 112, 16,104, 8,118, 22,110, 14,},
272 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
274 const uint16_t dither_scale[15][16]={
275 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
276 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
277 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
278 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
279 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
280 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
281 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
282 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
283 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
284 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
285 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
286 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
287 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
288 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
289 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
292 static av_always_inline void
293 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
294 int lumFilterSize, const int16_t *chrFilter,
295 const int32_t **chrUSrc, const int32_t **chrVSrc,
296 int chrFilterSize, const int32_t **alpSrc,
297 uint16_t *dest[4], int dstW, int chrDstW,
298 int big_endian, int output_bits)
300 //FIXME Optimize (just quickly written not optimized..)
302 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
303 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
304 int shift = 15 + 16 - output_bits;
306 #define output_pixel(pos, val) \
308 if (output_bits == 16) { \
309 AV_WB16(pos, av_clip_uint16(val >> shift)); \
311 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
314 if (output_bits == 16) { \
315 AV_WL16(pos, av_clip_uint16(val >> shift)); \
317 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
320 for (i = 0; i < dstW; i++) {
321 int val = 1 << (30-output_bits);
324 for (j = 0; j < lumFilterSize; j++)
325 val += lumSrc[j][i] * lumFilter[j];
327 output_pixel(&yDest[i], val);
331 for (i = 0; i < chrDstW; i++) {
332 int u = 1 << (30-output_bits);
333 int v = 1 << (30-output_bits);
336 for (j = 0; j < chrFilterSize; j++) {
337 u += chrUSrc[j][i] * chrFilter[j];
338 v += chrVSrc[j][i] * chrFilter[j];
341 output_pixel(&uDest[i], u);
342 output_pixel(&vDest[i], v);
346 if (CONFIG_SWSCALE_ALPHA && aDest) {
347 for (i = 0; i < dstW; i++) {
348 int val = 1 << (30-output_bits);
351 for (j = 0; j < lumFilterSize; j++)
352 val += alpSrc[j][i] * lumFilter[j];
354 output_pixel(&aDest[i], val);
360 #define yuv2NBPS(bits, BE_LE, is_be) \
361 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
362 const int16_t **_lumSrc, int lumFilterSize, \
363 const int16_t *chrFilter, const int16_t **_chrUSrc, \
364 const int16_t **_chrVSrc, \
365 int chrFilterSize, const int16_t **_alpSrc, \
366 uint8_t *_dest[4], int dstW, int chrDstW) \
368 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
369 **chrUSrc = (const int32_t **) _chrUSrc, \
370 **chrVSrc = (const int32_t **) _chrVSrc, \
371 **alpSrc = (const int32_t **) _alpSrc; \
372 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
373 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
374 alpSrc, (uint16_t **) _dest, \
375 dstW, chrDstW, is_be, bits); \
384 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
385 const int16_t **lumSrc, int lumFilterSize,
386 const int16_t *chrFilter, const int16_t **chrUSrc,
387 const int16_t **chrVSrc,
388 int chrFilterSize, const int16_t **alpSrc,
389 uint8_t *dest[4], int dstW, int chrDstW,
390 const uint8_t *lumDither, const uint8_t *chrDither)
392 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
393 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
396 //FIXME Optimize (just quickly written not optimized..)
397 for (i=0; i<dstW; i++) {
398 int val = lumDither[i&7] << 12;
400 for (j=0; j<lumFilterSize; j++)
401 val += lumSrc[j][i] * lumFilter[j];
403 yDest[i]= av_clip_uint8(val>>19);
407 for (i=0; i<chrDstW; i++) {
408 int u = chrDither[i&7] << 12;
409 int v = chrDither[(i+3)&7] << 12;
411 for (j=0; j<chrFilterSize; j++) {
412 u += chrUSrc[j][i] * chrFilter[j];
413 v += chrVSrc[j][i] * chrFilter[j];
416 uDest[i]= av_clip_uint8(u>>19);
417 vDest[i]= av_clip_uint8(v>>19);
420 if (CONFIG_SWSCALE_ALPHA && aDest)
421 for (i=0; i<dstW; i++) {
422 int val = lumDither[i&7] << 12;
424 for (j=0; j<lumFilterSize; j++)
425 val += alpSrc[j][i] * lumFilter[j];
427 aDest[i]= av_clip_uint8(val>>19);
431 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
432 const int16_t *chrUSrc, const int16_t *chrVSrc,
433 const int16_t *alpSrc,
434 uint8_t *dest[4], int dstW, int chrDstW,
435 const uint8_t *lumDither, const uint8_t *chrDither)
437 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
438 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
441 for (i=0; i<dstW; i++) {
442 int val= (lumSrc[i]+lumDither[i&7])>>7;
443 yDest[i]= av_clip_uint8(val);
447 for (i=0; i<chrDstW; i++) {
448 int u=(chrUSrc[i]+chrDither[i&7])>>7;
449 int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
450 uDest[i]= av_clip_uint8(u);
451 vDest[i]= av_clip_uint8(v);
454 if (CONFIG_SWSCALE_ALPHA && aDest)
455 for (i=0; i<dstW; i++) {
456 int val= (alpSrc[i]+lumDither[i&7])>>7;
457 aDest[i]= av_clip_uint8(val);
461 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
462 const int16_t **lumSrc, int lumFilterSize,
463 const int16_t *chrFilter, const int16_t **chrUSrc,
464 const int16_t **chrVSrc, int chrFilterSize,
465 const int16_t **alpSrc, uint8_t *dest[4],
466 int dstW, int chrDstW,
467 const uint8_t *lumDither, const uint8_t *chrDither)
469 uint8_t *yDest = dest[0], *uDest = dest[1];
470 enum PixelFormat dstFormat = c->dstFormat;
472 //FIXME Optimize (just quickly written not optimized..)
474 for (i=0; i<dstW; i++) {
475 int val = lumDither[i&7]<<12;
477 for (j=0; j<lumFilterSize; j++)
478 val += lumSrc[j][i] * lumFilter[j];
480 yDest[i]= av_clip_uint8(val>>19);
486 if (dstFormat == PIX_FMT_NV12)
487 for (i=0; i<chrDstW; i++) {
488 int u = chrDither[i&7]<<12;
489 int v = chrDither[(i+3)&7]<<12;
491 for (j=0; j<chrFilterSize; j++) {
492 u += chrUSrc[j][i] * chrFilter[j];
493 v += chrVSrc[j][i] * chrFilter[j];
496 uDest[2*i]= av_clip_uint8(u>>19);
497 uDest[2*i+1]= av_clip_uint8(v>>19);
500 for (i=0; i<chrDstW; i++) {
501 int u = chrDither[i&7]<<12;
502 int v = chrDither[(i+3)&7]<<12;
504 for (j=0; j<chrFilterSize; j++) {
505 u += chrUSrc[j][i] * chrFilter[j];
506 v += chrVSrc[j][i] * chrFilter[j];
509 uDest[2*i]= av_clip_uint8(v>>19);
510 uDest[2*i+1]= av_clip_uint8(u>>19);
514 #define output_pixel(pos, val) \
515 if (target == PIX_FMT_GRAY16BE) { \
521 static av_always_inline void
522 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
523 const int32_t **lumSrc, int lumFilterSize,
524 const int16_t *chrFilter, const int32_t **chrUSrc,
525 const int32_t **chrVSrc, int chrFilterSize,
526 const int32_t **alpSrc, uint16_t *dest, int dstW,
527 int y, enum PixelFormat target)
531 for (i = 0; i < (dstW >> 1); i++) {
536 for (j = 0; j < lumFilterSize; j++) {
537 Y1 += lumSrc[j][i * 2] * lumFilter[j];
538 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
542 if ((Y1 | Y2) & 0x10000) {
543 Y1 = av_clip_uint16(Y1);
544 Y2 = av_clip_uint16(Y2);
546 output_pixel(&dest[i * 2 + 0], Y1);
547 output_pixel(&dest[i * 2 + 1], Y2);
551 static av_always_inline void
552 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
553 const int32_t *ubuf[2], const int32_t *vbuf[2],
554 const int32_t *abuf[2], uint16_t *dest, int dstW,
555 int yalpha, int uvalpha, int y,
556 enum PixelFormat target)
558 int yalpha1 = 4095 - yalpha;
560 const int32_t *buf0 = buf[0], *buf1 = buf[1];
562 for (i = 0; i < (dstW >> 1); i++) {
563 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
564 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
566 output_pixel(&dest[i * 2 + 0], Y1);
567 output_pixel(&dest[i * 2 + 1], Y2);
571 static av_always_inline void
572 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
573 const int32_t *ubuf[2], const int32_t *vbuf[2],
574 const int32_t *abuf0, uint16_t *dest, int dstW,
575 int uvalpha, int y, enum PixelFormat target)
579 for (i = 0; i < (dstW >> 1); i++) {
580 int Y1 = buf0[i * 2 ] << 1;
581 int Y2 = buf0[i * 2 + 1] << 1;
583 output_pixel(&dest[i * 2 + 0], Y1);
584 output_pixel(&dest[i * 2 + 1], Y2);
590 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
591 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
592 const int16_t **_lumSrc, int lumFilterSize, \
593 const int16_t *chrFilter, const int16_t **_chrUSrc, \
594 const int16_t **_chrVSrc, int chrFilterSize, \
595 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
598 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
599 **chrUSrc = (const int32_t **) _chrUSrc, \
600 **chrVSrc = (const int32_t **) _chrVSrc, \
601 **alpSrc = (const int32_t **) _alpSrc; \
602 uint16_t *dest = (uint16_t *) _dest; \
603 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
604 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
605 alpSrc, dest, dstW, y, fmt); \
608 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
609 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
610 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
611 int yalpha, int uvalpha, int y) \
613 const int32_t **buf = (const int32_t **) _buf, \
614 **ubuf = (const int32_t **) _ubuf, \
615 **vbuf = (const int32_t **) _vbuf, \
616 **abuf = (const int32_t **) _abuf; \
617 uint16_t *dest = (uint16_t *) _dest; \
618 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
619 dest, dstW, yalpha, uvalpha, y, fmt); \
622 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
623 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
624 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
625 int uvalpha, int y) \
627 const int32_t *buf0 = (const int32_t *) _buf0, \
628 **ubuf = (const int32_t **) _ubuf, \
629 **vbuf = (const int32_t **) _vbuf, \
630 *abuf0 = (const int32_t *) _abuf0; \
631 uint16_t *dest = (uint16_t *) _dest; \
632 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
633 dstW, uvalpha, y, fmt); \
636 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
637 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
639 #define output_pixel(pos, acc) \
640 if (target == PIX_FMT_MONOBLACK) { \
646 static av_always_inline void
647 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
648 const int16_t **lumSrc, int lumFilterSize,
649 const int16_t *chrFilter, const int16_t **chrUSrc,
650 const int16_t **chrVSrc, int chrFilterSize,
651 const int16_t **alpSrc, uint8_t *dest, int dstW,
652 int y, enum PixelFormat target)
654 const uint8_t * const d128=dither_8x8_220[y&7];
655 uint8_t *g = c->table_gU[128] + c->table_gV[128];
659 for (i = 0; i < dstW - 1; i += 2) {
664 for (j = 0; j < lumFilterSize; j++) {
665 Y1 += lumSrc[j][i] * lumFilter[j];
666 Y2 += lumSrc[j][i+1] * lumFilter[j];
670 if ((Y1 | Y2) & 0x100) {
671 Y1 = av_clip_uint8(Y1);
672 Y2 = av_clip_uint8(Y2);
674 acc += acc + g[Y1 + d128[(i + 0) & 7]];
675 acc += acc + g[Y2 + d128[(i + 1) & 7]];
677 output_pixel(*dest++, acc);
682 static av_always_inline void
683 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
684 const int16_t *ubuf[2], const int16_t *vbuf[2],
685 const int16_t *abuf[2], uint8_t *dest, int dstW,
686 int yalpha, int uvalpha, int y,
687 enum PixelFormat target)
689 const int16_t *buf0 = buf[0], *buf1 = buf[1];
690 const uint8_t * const d128 = dither_8x8_220[y & 7];
691 uint8_t *g = c->table_gU[128] + c->table_gV[128];
692 int yalpha1 = 4095 - yalpha;
695 for (i = 0; i < dstW - 7; i += 8) {
696 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
697 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
698 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
699 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
700 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
701 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
702 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
703 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
704 output_pixel(*dest++, acc);
708 static av_always_inline void
709 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
710 const int16_t *ubuf[2], const int16_t *vbuf[2],
711 const int16_t *abuf0, uint8_t *dest, int dstW,
712 int uvalpha, int y, enum PixelFormat target)
714 const uint8_t * const d128 = dither_8x8_220[y & 7];
715 uint8_t *g = c->table_gU[128] + c->table_gV[128];
718 for (i = 0; i < dstW - 7; i += 8) {
719 int acc = g[(buf0[i ] >> 7) + d128[0]];
720 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
721 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
722 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
723 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
724 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
725 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
726 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
727 output_pixel(*dest++, acc);
733 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
734 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
735 const int16_t **lumSrc, int lumFilterSize, \
736 const int16_t *chrFilter, const int16_t **chrUSrc, \
737 const int16_t **chrVSrc, int chrFilterSize, \
738 const int16_t **alpSrc, uint8_t *dest, int dstW, \
741 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
742 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
743 alpSrc, dest, dstW, y, fmt); \
746 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
747 const int16_t *ubuf[2], const int16_t *vbuf[2], \
748 const int16_t *abuf[2], uint8_t *dest, int dstW, \
749 int yalpha, int uvalpha, int y) \
751 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
752 dest, dstW, yalpha, uvalpha, y, fmt); \
755 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
756 const int16_t *ubuf[2], const int16_t *vbuf[2], \
757 const int16_t *abuf0, uint8_t *dest, int dstW, \
758 int uvalpha, int y) \
760 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
761 abuf0, dest, dstW, uvalpha, \
765 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
766 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
768 #define output_pixels(pos, Y1, U, Y2, V) \
769 if (target == PIX_FMT_YUYV422) { \
770 dest[pos + 0] = Y1; \
772 dest[pos + 2] = Y2; \
776 dest[pos + 1] = Y1; \
778 dest[pos + 3] = Y2; \
781 static av_always_inline void
782 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
783 const int16_t **lumSrc, int lumFilterSize,
784 const int16_t *chrFilter, const int16_t **chrUSrc,
785 const int16_t **chrVSrc, int chrFilterSize,
786 const int16_t **alpSrc, uint8_t *dest, int dstW,
787 int y, enum PixelFormat target)
791 for (i = 0; i < (dstW >> 1); i++) {
798 for (j = 0; j < lumFilterSize; j++) {
799 Y1 += lumSrc[j][i * 2] * lumFilter[j];
800 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
802 for (j = 0; j < chrFilterSize; j++) {
803 U += chrUSrc[j][i] * chrFilter[j];
804 V += chrVSrc[j][i] * chrFilter[j];
810 if ((Y1 | Y2 | U | V) & 0x100) {
811 Y1 = av_clip_uint8(Y1);
812 Y2 = av_clip_uint8(Y2);
813 U = av_clip_uint8(U);
814 V = av_clip_uint8(V);
816 output_pixels(4*i, Y1, U, Y2, V);
820 static av_always_inline void
821 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
822 const int16_t *ubuf[2], const int16_t *vbuf[2],
823 const int16_t *abuf[2], uint8_t *dest, int dstW,
824 int yalpha, int uvalpha, int y,
825 enum PixelFormat target)
827 const int16_t *buf0 = buf[0], *buf1 = buf[1],
828 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
829 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
830 int yalpha1 = 4095 - yalpha;
831 int uvalpha1 = 4095 - uvalpha;
834 for (i = 0; i < (dstW >> 1); i++) {
835 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
836 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
837 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
838 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
840 output_pixels(i * 4, Y1, U, Y2, V);
844 static av_always_inline void
845 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
846 const int16_t *ubuf[2], const int16_t *vbuf[2],
847 const int16_t *abuf0, uint8_t *dest, int dstW,
848 int uvalpha, int y, enum PixelFormat target)
850 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
851 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
854 if (uvalpha < 2048) {
855 for (i = 0; i < (dstW >> 1); i++) {
856 int Y1 = buf0[i * 2] >> 7;
857 int Y2 = buf0[i * 2 + 1] >> 7;
858 int U = ubuf1[i] >> 7;
859 int V = vbuf1[i] >> 7;
861 output_pixels(i * 4, Y1, U, Y2, V);
864 for (i = 0; i < (dstW >> 1); i++) {
865 int Y1 = buf0[i * 2] >> 7;
866 int Y2 = buf0[i * 2 + 1] >> 7;
867 int U = (ubuf0[i] + ubuf1[i]) >> 8;
868 int V = (vbuf0[i] + vbuf1[i]) >> 8;
870 output_pixels(i * 4, Y1, U, Y2, V);
877 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
878 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
880 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
881 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
882 #define output_pixel(pos, val) \
883 if (isBE(target)) { \
889 static av_always_inline void
890 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
891 const int32_t **lumSrc, int lumFilterSize,
892 const int16_t *chrFilter, const int32_t **chrUSrc,
893 const int32_t **chrVSrc, int chrFilterSize,
894 const int32_t **alpSrc, uint16_t *dest, int dstW,
895 int y, enum PixelFormat target)
899 for (i = 0; i < (dstW >> 1); i++) {
903 int U = -128 << 23; // 19
907 for (j = 0; j < lumFilterSize; j++) {
908 Y1 += lumSrc[j][i * 2] * lumFilter[j];
909 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
911 for (j = 0; j < chrFilterSize; j++) {
912 U += chrUSrc[j][i] * chrFilter[j];
913 V += chrVSrc[j][i] * chrFilter[j];
916 // 8bit: 12+15=27; 16-bit: 12+19=31
922 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
923 Y1 -= c->yuv2rgb_y_offset;
924 Y2 -= c->yuv2rgb_y_offset;
925 Y1 *= c->yuv2rgb_y_coeff;
926 Y2 *= c->yuv2rgb_y_coeff;
929 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
931 R = V * c->yuv2rgb_v2r_coeff;
932 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
933 B = U * c->yuv2rgb_u2b_coeff;
935 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
936 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
937 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
938 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
939 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
940 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
941 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
946 static av_always_inline void
947 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
948 const int32_t *ubuf[2], const int32_t *vbuf[2],
949 const int32_t *abuf[2], uint16_t *dest, int dstW,
950 int yalpha, int uvalpha, int y,
951 enum PixelFormat target)
953 const int32_t *buf0 = buf[0], *buf1 = buf[1],
954 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
955 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
956 int yalpha1 = 4095 - yalpha;
957 int uvalpha1 = 4095 - uvalpha;
960 for (i = 0; i < (dstW >> 1); i++) {
961 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
962 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
963 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
964 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
967 Y1 -= c->yuv2rgb_y_offset;
968 Y2 -= c->yuv2rgb_y_offset;
969 Y1 *= c->yuv2rgb_y_coeff;
970 Y2 *= c->yuv2rgb_y_coeff;
974 R = V * c->yuv2rgb_v2r_coeff;
975 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
976 B = U * c->yuv2rgb_u2b_coeff;
978 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
979 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
980 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
981 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
982 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
983 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
988 static av_always_inline void
989 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
990 const int32_t *ubuf[2], const int32_t *vbuf[2],
991 const int32_t *abuf0, uint16_t *dest, int dstW,
992 int uvalpha, int y, enum PixelFormat target)
994 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
995 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
998 if (uvalpha < 2048) {
999 for (i = 0; i < (dstW >> 1); i++) {
1000 int Y1 = (buf0[i * 2] ) >> 2;
1001 int Y2 = (buf0[i * 2 + 1]) >> 2;
1002 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1003 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1006 Y1 -= c->yuv2rgb_y_offset;
1007 Y2 -= c->yuv2rgb_y_offset;
1008 Y1 *= c->yuv2rgb_y_coeff;
1009 Y2 *= c->yuv2rgb_y_coeff;
1013 R = V * c->yuv2rgb_v2r_coeff;
1014 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1015 B = U * c->yuv2rgb_u2b_coeff;
1017 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1018 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1019 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1020 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1021 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1022 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1026 for (i = 0; i < (dstW >> 1); i++) {
1027 int Y1 = (buf0[i * 2] ) >> 2;
1028 int Y2 = (buf0[i * 2 + 1]) >> 2;
1029 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
1030 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
1033 Y1 -= c->yuv2rgb_y_offset;
1034 Y2 -= c->yuv2rgb_y_offset;
1035 Y1 *= c->yuv2rgb_y_coeff;
1036 Y2 *= c->yuv2rgb_y_coeff;
1040 R = V * c->yuv2rgb_v2r_coeff;
1041 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1042 B = U * c->yuv2rgb_u2b_coeff;
1044 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1045 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1046 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1047 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1048 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1049 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1059 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1060 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1061 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1062 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1064 static av_always_inline void
1065 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1066 int U, int V, int A1, int A2,
1067 const void *_r, const void *_g, const void *_b, int y,
1068 enum PixelFormat target, int hasAlpha)
1070 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1071 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1072 uint32_t *dest = (uint32_t *) _dest;
1073 const uint32_t *r = (const uint32_t *) _r;
1074 const uint32_t *g = (const uint32_t *) _g;
1075 const uint32_t *b = (const uint32_t *) _b;
1078 int sh = hasAlpha ? ((fmt == PIX_FMT_RGB32_1 || fmt == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1080 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1081 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1084 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1086 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1087 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1089 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1090 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1093 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1094 uint8_t *dest = (uint8_t *) _dest;
1095 const uint8_t *r = (const uint8_t *) _r;
1096 const uint8_t *g = (const uint8_t *) _g;
1097 const uint8_t *b = (const uint8_t *) _b;
1099 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1100 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1102 dest[i * 6 + 0] = r_b[Y1];
1103 dest[i * 6 + 1] = g[Y1];
1104 dest[i * 6 + 2] = b_r[Y1];
1105 dest[i * 6 + 3] = r_b[Y2];
1106 dest[i * 6 + 4] = g[Y2];
1107 dest[i * 6 + 5] = b_r[Y2];
1110 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1111 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1112 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1113 uint16_t *dest = (uint16_t *) _dest;
1114 const uint16_t *r = (const uint16_t *) _r;
1115 const uint16_t *g = (const uint16_t *) _g;
1116 const uint16_t *b = (const uint16_t *) _b;
1117 int dr1, dg1, db1, dr2, dg2, db2;
1119 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1120 dr1 = dither_2x2_8[ y & 1 ][0];
1121 dg1 = dither_2x2_4[ y & 1 ][0];
1122 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1123 dr2 = dither_2x2_8[ y & 1 ][1];
1124 dg2 = dither_2x2_4[ y & 1 ][1];
1125 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1126 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1127 dr1 = dither_2x2_8[ y & 1 ][0];
1128 dg1 = dither_2x2_8[ y & 1 ][1];
1129 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1130 dr2 = dither_2x2_8[ y & 1 ][1];
1131 dg2 = dither_2x2_8[ y & 1 ][0];
1132 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1134 dr1 = dither_4x4_16[ y & 3 ][0];
1135 dg1 = dither_4x4_16[ y & 3 ][1];
1136 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1137 dr2 = dither_4x4_16[ y & 3 ][1];
1138 dg2 = dither_4x4_16[ y & 3 ][0];
1139 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1142 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1143 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1144 } else /* 8/4-bit */ {
1145 uint8_t *dest = (uint8_t *) _dest;
1146 const uint8_t *r = (const uint8_t *) _r;
1147 const uint8_t *g = (const uint8_t *) _g;
1148 const uint8_t *b = (const uint8_t *) _b;
1149 int dr1, dg1, db1, dr2, dg2, db2;
1151 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1152 const uint8_t * const d64 = dither_8x8_73[y & 7];
1153 const uint8_t * const d32 = dither_8x8_32[y & 7];
1154 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1155 db1 = d64[(i * 2 + 0) & 7];
1156 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1157 db2 = d64[(i * 2 + 1) & 7];
1159 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1160 const uint8_t * const d128 = dither_8x8_220[y & 7];
1161 dr1 = db1 = d128[(i * 2 + 0) & 7];
1162 dg1 = d64[(i * 2 + 0) & 7];
1163 dr2 = db2 = d128[(i * 2 + 1) & 7];
1164 dg2 = d64[(i * 2 + 1) & 7];
1167 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1168 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1169 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1171 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1172 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1177 static av_always_inline void
1178 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1179 const int16_t **lumSrc, int lumFilterSize,
1180 const int16_t *chrFilter, const int16_t **chrUSrc,
1181 const int16_t **chrVSrc, int chrFilterSize,
1182 const int16_t **alpSrc, uint8_t *dest, int dstW,
1183 int y, enum PixelFormat target, int hasAlpha)
1187 for (i = 0; i < (dstW >> 1); i++) {
1193 int av_unused A1, A2;
1194 const void *r, *g, *b;
1196 for (j = 0; j < lumFilterSize; j++) {
1197 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1198 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1200 for (j = 0; j < chrFilterSize; j++) {
1201 U += chrUSrc[j][i] * chrFilter[j];
1202 V += chrVSrc[j][i] * chrFilter[j];
1208 if ((Y1 | Y2 | U | V) & 0x100) {
1209 Y1 = av_clip_uint8(Y1);
1210 Y2 = av_clip_uint8(Y2);
1211 U = av_clip_uint8(U);
1212 V = av_clip_uint8(V);
1217 for (j = 0; j < lumFilterSize; j++) {
1218 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1219 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1223 if ((A1 | A2) & 0x100) {
1224 A1 = av_clip_uint8(A1);
1225 A2 = av_clip_uint8(A2);
1229 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1231 g = (c->table_gU[U] + c->table_gV[V]);
1234 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1235 r, g, b, y, target, hasAlpha);
1239 static av_always_inline void
1240 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1241 const int16_t *ubuf[2], const int16_t *vbuf[2],
1242 const int16_t *abuf[2], uint8_t *dest, int dstW,
1243 int yalpha, int uvalpha, int y,
1244 enum PixelFormat target, int hasAlpha)
1246 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1247 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1248 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1249 *abuf0 = abuf[0], *abuf1 = abuf[1];
1250 int yalpha1 = 4095 - yalpha;
1251 int uvalpha1 = 4095 - uvalpha;
1254 for (i = 0; i < (dstW >> 1); i++) {
1255 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1256 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1257 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1258 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1260 const void *r = c->table_rV[V],
1261 *g = (c->table_gU[U] + c->table_gV[V]),
1262 *b = c->table_bU[U];
1265 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1266 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1269 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1270 r, g, b, y, target, hasAlpha);
1274 static av_always_inline void
1275 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1276 const int16_t *ubuf[2], const int16_t *vbuf[2],
1277 const int16_t *abuf0, uint8_t *dest, int dstW,
1278 int uvalpha, int y, enum PixelFormat target,
1281 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1282 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1285 if (uvalpha < 2048) {
1286 for (i = 0; i < (dstW >> 1); i++) {
1287 int Y1 = buf0[i * 2] >> 7;
1288 int Y2 = buf0[i * 2 + 1] >> 7;
1289 int U = ubuf1[i] >> 7;
1290 int V = vbuf1[i] >> 7;
1292 const void *r = c->table_rV[V],
1293 *g = (c->table_gU[U] + c->table_gV[V]),
1294 *b = c->table_bU[U];
1297 A1 = abuf0[i * 2 ] >> 7;
1298 A2 = abuf0[i * 2 + 1] >> 7;
1301 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1302 r, g, b, y, target, hasAlpha);
1305 for (i = 0; i < (dstW >> 1); i++) {
1306 int Y1 = buf0[i * 2] >> 7;
1307 int Y2 = buf0[i * 2 + 1] >> 7;
1308 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1309 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1311 const void *r = c->table_rV[V],
1312 *g = (c->table_gU[U] + c->table_gV[V]),
1313 *b = c->table_bU[U];
1316 A1 = abuf0[i * 2 ] >> 7;
1317 A2 = abuf0[i * 2 + 1] >> 7;
1320 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1321 r, g, b, y, target, hasAlpha);
1326 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1327 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1328 const int16_t **lumSrc, int lumFilterSize, \
1329 const int16_t *chrFilter, const int16_t **chrUSrc, \
1330 const int16_t **chrVSrc, int chrFilterSize, \
1331 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1334 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1335 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1336 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1338 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1339 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1340 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1341 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1342 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1343 int yalpha, int uvalpha, int y) \
1345 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1346 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1349 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1350 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1351 const int16_t *abuf0, uint8_t *dest, int dstW, \
1352 int uvalpha, int y) \
1354 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1355 dstW, uvalpha, y, fmt, hasAlpha); \
1359 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1360 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1362 #if CONFIG_SWSCALE_ALPHA
1363 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1364 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1366 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1367 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1369 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1370 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1371 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1372 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1373 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1374 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1375 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1376 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1378 static av_always_inline void
1379 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1380 const int16_t **lumSrc, int lumFilterSize,
1381 const int16_t *chrFilter, const int16_t **chrUSrc,
1382 const int16_t **chrVSrc, int chrFilterSize,
1383 const int16_t **alpSrc, uint8_t *dest,
1384 int dstW, int y, enum PixelFormat target, int hasAlpha)
1387 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1389 for (i = 0; i < dstW; i++) {
1392 int U = (1<<9)-(128 << 19);
1393 int V = (1<<9)-(128 << 19);
1397 for (j = 0; j < lumFilterSize; j++) {
1398 Y += lumSrc[j][i] * lumFilter[j];
1400 for (j = 0; j < chrFilterSize; j++) {
1401 U += chrUSrc[j][i] * chrFilter[j];
1402 V += chrVSrc[j][i] * chrFilter[j];
1409 for (j = 0; j < lumFilterSize; j++) {
1410 A += alpSrc[j][i] * lumFilter[j];
1414 A = av_clip_uint8(A);
1416 Y -= c->yuv2rgb_y_offset;
1417 Y *= c->yuv2rgb_y_coeff;
1419 R = Y + V*c->yuv2rgb_v2r_coeff;
1420 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1421 B = Y + U*c->yuv2rgb_u2b_coeff;
1422 if ((R | G | B) & 0xC0000000) {
1423 R = av_clip_uintp2(R, 30);
1424 G = av_clip_uintp2(G, 30);
1425 B = av_clip_uintp2(B, 30);
1430 dest[0] = hasAlpha ? A : 255;
1444 dest[3] = hasAlpha ? A : 255;
1447 dest[0] = hasAlpha ? A : 255;
1461 dest[3] = hasAlpha ? A : 255;
1469 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1470 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1471 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1472 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1474 #if CONFIG_SWSCALE_ALPHA
1475 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1476 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1477 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1478 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1480 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1481 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1482 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1483 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1485 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1486 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1488 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1489 int width, int height,
1493 uint8_t *ptr = plane + stride*y;
1494 for (i=0; i<height; i++) {
1495 memset(ptr, val, width);
1500 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1502 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1503 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1505 static av_always_inline void
1506 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1507 enum PixelFormat origin)
1510 for (i = 0; i < width; i++) {
1511 unsigned int r_b = input_pixel(&src[i*3+0]);
1512 unsigned int g = input_pixel(&src[i*3+1]);
1513 unsigned int b_r = input_pixel(&src[i*3+2]);
1515 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1519 static av_always_inline void
1520 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1521 const uint16_t *src1, const uint16_t *src2,
1522 int width, enum PixelFormat origin)
1526 for (i = 0; i < width; i++) {
1527 int r_b = input_pixel(&src1[i*3+0]);
1528 int g = input_pixel(&src1[i*3+1]);
1529 int b_r = input_pixel(&src1[i*3+2]);
1531 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1532 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1536 static av_always_inline void
1537 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1538 const uint16_t *src1, const uint16_t *src2,
1539 int width, enum PixelFormat origin)
1543 for (i = 0; i < width; i++) {
1544 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1545 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1546 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1548 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1549 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1557 #define rgb48funcs(pattern, BE_LE, origin) \
1558 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1559 int width, uint32_t *unused) \
1561 const uint16_t *src = (const uint16_t *) _src; \
1562 uint16_t *dst = (uint16_t *) _dst; \
1563 rgb48ToY_c_template(dst, src, width, origin); \
1566 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1567 const uint8_t *_src1, const uint8_t *_src2, \
1568 int width, uint32_t *unused) \
1570 const uint16_t *src1 = (const uint16_t *) _src1, \
1571 *src2 = (const uint16_t *) _src2; \
1572 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1573 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1576 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1577 const uint8_t *_src1, const uint8_t *_src2, \
1578 int width, uint32_t *unused) \
1580 const uint16_t *src1 = (const uint16_t *) _src1, \
1581 *src2 = (const uint16_t *) _src2; \
1582 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1583 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1586 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1587 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1588 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1589 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1591 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1592 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1593 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1595 static av_always_inline void
1596 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1597 int width, enum PixelFormat origin,
1598 int shr, int shg, int shb, int shp,
1599 int maskr, int maskg, int maskb,
1600 int rsh, int gsh, int bsh, int S)
1602 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1603 rnd = (32<<((S)-1)) + (1<<(S-7));
1606 for (i = 0; i < width; i++) {
1607 int px = input_pixel(i) >> shp;
1608 int b = (px & maskb) >> shb;
1609 int g = (px & maskg) >> shg;
1610 int r = (px & maskr) >> shr;
1612 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1616 static av_always_inline void
1617 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1618 const uint8_t *src, int width,
1619 enum PixelFormat origin,
1620 int shr, int shg, int shb, int shp,
1621 int maskr, int maskg, int maskb,
1622 int rsh, int gsh, int bsh, int S)
1624 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1625 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1626 rnd = (256<<((S)-1)) + (1<<(S-7));
1629 for (i = 0; i < width; i++) {
1630 int px = input_pixel(i) >> shp;
1631 int b = (px & maskb) >> shb;
1632 int g = (px & maskg) >> shg;
1633 int r = (px & maskr) >> shr;
1635 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1636 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1640 static av_always_inline void
1641 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1642 const uint8_t *src, int width,
1643 enum PixelFormat origin,
1644 int shr, int shg, int shb, int shp,
1645 int maskr, int maskg, int maskb,
1646 int rsh, int gsh, int bsh, int S)
1648 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1649 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1650 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1653 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1654 for (i = 0; i < width; i++) {
1655 int px0 = input_pixel(2 * i + 0) >> shp;
1656 int px1 = input_pixel(2 * i + 1) >> shp;
1657 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1658 int rb = px0 + px1 - g;
1660 b = (rb & maskb) >> shb;
1661 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1662 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1665 g = (g & maskg) >> shg;
1667 r = (rb & maskr) >> shr;
1669 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1670 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1676 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1677 maskg, maskb, rsh, gsh, bsh, S) \
1678 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1679 int width, uint32_t *unused) \
1681 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1682 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1685 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1686 const uint8_t *src, const uint8_t *dummy, \
1687 int width, uint32_t *unused) \
1689 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1690 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1693 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1694 const uint8_t *src, const uint8_t *dummy, \
1695 int width, uint32_t *unused) \
1697 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1698 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1701 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1702 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1703 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1704 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1705 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1706 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1707 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1708 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1709 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1710 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1711 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1712 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1714 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1717 for (i=0; i<width; i++) {
1718 dst[i]= src[4*i]<<6;
1722 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1725 for (i=0; i<width; i++) {
1726 dst[i]= src[4*i+3]<<6;
1730 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1733 for (i=0; i<width; i++) {
1736 dst[i]= (pal[d] >> 24)<<6;
1740 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1743 for (i=0; i<width; i++) {
1746 dst[i]= (pal[d] & 0xFF)<<6;
1750 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1751 const uint8_t *src1, const uint8_t *src2,
1752 int width, uint32_t *pal)
1755 assert(src1 == src2);
1756 for (i=0; i<width; i++) {
1757 int p= pal[src1[i]];
1759 dstU[i]= (uint8_t)(p>> 8)<<6;
1760 dstV[i]= (uint8_t)(p>>16)<<6;
1764 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1767 for (i=0; i<width/8; i++) {
1770 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1774 for(j=0; j<(width&7); j++)
1775 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1779 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1782 for (i=0; i<width/8; i++) {
1785 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1789 for(j=0; j<(width&7); j++)
1790 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1794 //FIXME yuy2* can read up to 7 samples too much
1796 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1800 for (i=0; i<width; i++)
1804 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1805 const uint8_t *src2, int width, uint32_t *unused)
1808 for (i=0; i<width; i++) {
1809 dstU[i]= src1[4*i + 1];
1810 dstV[i]= src1[4*i + 3];
1812 assert(src1 == src2);
1815 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1818 const uint16_t *src = (const uint16_t *) _src;
1819 uint16_t *dst = (uint16_t *) _dst;
1820 for (i=0; i<width; i++) {
1821 dst[i] = av_bswap16(src[i]);
1825 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1826 const uint8_t *_src2, int width, uint32_t *unused)
1829 const uint16_t *src1 = (const uint16_t *) _src1,
1830 *src2 = (const uint16_t *) _src2;
1831 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1832 for (i=0; i<width; i++) {
1833 dstU[i] = av_bswap16(src1[i]);
1834 dstV[i] = av_bswap16(src2[i]);
1838 /* This is almost identical to the previous, end exists only because
1839 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1840 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1844 for (i=0; i<width; i++)
1848 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1849 const uint8_t *src2, int width, uint32_t *unused)
1852 for (i=0; i<width; i++) {
1853 dstU[i]= src1[4*i + 0];
1854 dstV[i]= src1[4*i + 2];
1856 assert(src1 == src2);
1859 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1860 const uint8_t *src, int width)
1863 for (i = 0; i < width; i++) {
1864 dst1[i] = src[2*i+0];
1865 dst2[i] = src[2*i+1];
1869 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1870 const uint8_t *src1, const uint8_t *src2,
1871 int width, uint32_t *unused)
1873 nvXXtoUV_c(dstU, dstV, src1, width);
1876 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1877 const uint8_t *src1, const uint8_t *src2,
1878 int width, uint32_t *unused)
1880 nvXXtoUV_c(dstV, dstU, src1, width);
1883 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1885 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1886 int width, uint32_t *unused)
1889 for (i=0; i<width; i++) {
1894 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1898 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1899 const uint8_t *src2, int width, uint32_t *unused)
1902 for (i=0; i<width; i++) {
1903 int b= src1[3*i + 0];
1904 int g= src1[3*i + 1];
1905 int r= src1[3*i + 2];
1907 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1908 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1910 assert(src1 == src2);
1913 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1914 const uint8_t *src2, int width, uint32_t *unused)
1917 for (i=0; i<width; i++) {
1918 int b= src1[6*i + 0] + src1[6*i + 3];
1919 int g= src1[6*i + 1] + src1[6*i + 4];
1920 int r= src1[6*i + 2] + src1[6*i + 5];
1922 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1923 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1925 assert(src1 == src2);
1928 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1932 for (i=0; i<width; i++) {
1937 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1941 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1942 const uint8_t *src2, int width, uint32_t *unused)
1946 for (i=0; i<width; i++) {
1947 int r= src1[3*i + 0];
1948 int g= src1[3*i + 1];
1949 int b= src1[3*i + 2];
1951 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1952 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1956 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1957 const uint8_t *src2, int width, uint32_t *unused)
1961 for (i=0; i<width; i++) {
1962 int r= src1[6*i + 0] + src1[6*i + 3];
1963 int g= src1[6*i + 1] + src1[6*i + 4];
1964 int b= src1[6*i + 2] + src1[6*i + 5];
1966 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1967 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1971 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1972 const int16_t *filter,
1973 const int16_t *filterPos, int filterSize)
1976 int32_t *dst = (int32_t *) _dst;
1977 const uint16_t *src = (const uint16_t *) _src;
1978 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1979 int sh = (bits <= 7) ? 11 : (bits - 4);
1981 for (i = 0; i < dstW; i++) {
1983 int srcPos = filterPos[i];
1986 for (j = 0; j < filterSize; j++) {
1987 val += src[srcPos + j] * filter[filterSize * i + j];
1989 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1990 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1994 // bilinear / bicubic scaling
1995 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1996 const int16_t *filter, const int16_t *filterPos,
2000 for (i=0; i<dstW; i++) {
2002 int srcPos= filterPos[i];
2004 for (j=0; j<filterSize; j++) {
2005 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2007 //filter += hFilterSize;
2008 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2013 static inline void hScale16N_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2014 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2018 for (i=0; i<dstW; i++) {
2019 int srcPos= filterPos[i];
2021 for (j=0; j<filterSize; j++) {
2022 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2024 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2028 static inline void hScale16NX_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2029 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2032 for (i=0; i<dstW; i++) {
2033 int srcPos= filterPos[i];
2035 for (j=0; j<filterSize; j++) {
2036 val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
2038 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2042 //FIXME all pal and rgb srcFormats could do this convertion as well
2043 //FIXME all scalers more complex than bilinear could do half of this transform
2044 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2047 for (i = 0; i < width; i++) {
2048 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2049 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2052 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2055 for (i = 0; i < width; i++) {
2056 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2057 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2060 static void lumRangeToJpeg_c(int16_t *dst, int width)
2063 for (i = 0; i < width; i++)
2064 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2066 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2069 for (i = 0; i < width; i++)
2070 dst[i] = (dst[i]*14071 + 33561947)>>14;
2073 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2076 int32_t *dstU = (int32_t *) _dstU;
2077 int32_t *dstV = (int32_t *) _dstV;
2078 for (i = 0; i < width; i++) {
2079 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2080 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2083 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2086 int32_t *dstU = (int32_t *) _dstU;
2087 int32_t *dstV = (int32_t *) _dstV;
2088 for (i = 0; i < width; i++) {
2089 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2090 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2093 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2096 int32_t *dst = (int32_t *) _dst;
2097 for (i = 0; i < width; i++)
2098 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2100 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2103 int32_t *dst = (int32_t *) _dst;
2104 for (i = 0; i < width; i++)
2105 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2108 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2109 const uint8_t *src, int srcW, int xInc)
2112 unsigned int xpos=0;
2113 for (i=0;i<dstWidth;i++) {
2114 register unsigned int xx=xpos>>16;
2115 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2116 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2119 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2120 dst[i] = src[srcW-1]*128;
2123 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
2126 uint8_t *dst = (uint8_t *) _dst;
2127 for (i = len - 1; i >= 0; i--) {
2128 dst[i * 2] = dst[i * 2 + 1] = src[i];
2132 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
2135 for (i = 0; i < len; i++) {
2136 dst[i] = src[i] >> 4;
2140 // *** horizontal scale Y line to temp buffer
2141 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2142 const uint8_t *src, int srcW, int xInc,
2143 const int16_t *hLumFilter,
2144 const int16_t *hLumFilterPos, int hLumFilterSize,
2145 uint8_t *formatConvBuffer,
2146 uint32_t *pal, int isAlpha)
2148 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2149 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2152 toYV12(formatConvBuffer, src, srcW, pal);
2153 src= formatConvBuffer;
2156 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2157 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2158 src = formatConvBuffer;
2162 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2163 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2164 } else if (!c->hyscale_fast) {
2165 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2166 } else { // fast bilinear upscale / crap downscale
2167 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2171 convertRange(dst, dstWidth);
2173 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2174 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2178 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2179 int dstWidth, const uint8_t *src1,
2180 const uint8_t *src2, int srcW, int xInc)
2183 unsigned int xpos=0;
2184 for (i=0;i<dstWidth;i++) {
2185 register unsigned int xx=xpos>>16;
2186 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2187 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2188 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2191 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2192 dst1[i] = src1[srcW-1]*128;
2193 dst2[i] = src2[srcW-1]*128;
2197 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2198 const uint8_t *src1, const uint8_t *src2,
2199 int srcW, int xInc, const int16_t *hChrFilter,
2200 const int16_t *hChrFilterPos, int hChrFilterSize,
2201 uint8_t *formatConvBuffer, uint32_t *pal)
2204 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2205 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2206 src1= formatConvBuffer;
2210 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2211 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2+78, 16));
2212 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2213 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2214 src1 = formatConvBuffer;
2219 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2220 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2221 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2222 } else if (!c->hcscale_fast) {
2223 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2224 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2225 } else { // fast bilinear upscale / crap downscale
2226 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2229 if (c->chrConvertRange)
2230 c->chrConvertRange(dst1, dst2, dstWidth);
2232 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2233 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2234 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2238 static av_always_inline void
2239 find_c_packed_planar_out_funcs(SwsContext *c,
2240 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2241 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2242 yuv2packedX_fn *yuv2packedX)
2244 enum PixelFormat dstFormat = c->dstFormat;
2246 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2247 *yuv2yuvX = yuv2nv12X_c;
2248 } else if (is16BPS(dstFormat)) {
2249 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2250 } else if (is9_OR_10BPS(dstFormat)) {
2251 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2252 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2254 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2257 *yuv2yuv1 = yuv2yuv1_c;
2258 *yuv2yuvX = yuv2yuvX_c;
2260 if(c->flags & SWS_FULL_CHR_H_INT) {
2261 switch (dstFormat) {
2264 *yuv2packedX = yuv2rgba32_full_X_c;
2266 #if CONFIG_SWSCALE_ALPHA
2268 *yuv2packedX = yuv2rgba32_full_X_c;
2270 #endif /* CONFIG_SWSCALE_ALPHA */
2272 *yuv2packedX = yuv2rgbx32_full_X_c;
2274 #endif /* !CONFIG_SMALL */
2278 *yuv2packedX = yuv2argb32_full_X_c;
2280 #if CONFIG_SWSCALE_ALPHA
2282 *yuv2packedX = yuv2argb32_full_X_c;
2284 #endif /* CONFIG_SWSCALE_ALPHA */
2286 *yuv2packedX = yuv2xrgb32_full_X_c;
2288 #endif /* !CONFIG_SMALL */
2292 *yuv2packedX = yuv2bgra32_full_X_c;
2294 #if CONFIG_SWSCALE_ALPHA
2296 *yuv2packedX = yuv2bgra32_full_X_c;
2298 #endif /* CONFIG_SWSCALE_ALPHA */
2300 *yuv2packedX = yuv2bgrx32_full_X_c;
2302 #endif /* !CONFIG_SMALL */
2306 *yuv2packedX = yuv2abgr32_full_X_c;
2308 #if CONFIG_SWSCALE_ALPHA
2310 *yuv2packedX = yuv2abgr32_full_X_c;
2312 #endif /* CONFIG_SWSCALE_ALPHA */
2314 *yuv2packedX = yuv2xbgr32_full_X_c;
2316 #endif /* !CONFIG_SMALL */
2319 *yuv2packedX = yuv2rgb24_full_X_c;
2322 *yuv2packedX = yuv2bgr24_full_X_c;
2329 switch (dstFormat) {
2330 case PIX_FMT_GRAY16BE:
2331 *yuv2packed1 = yuv2gray16BE_1_c;
2332 *yuv2packed2 = yuv2gray16BE_2_c;
2333 *yuv2packedX = yuv2gray16BE_X_c;
2335 case PIX_FMT_GRAY16LE:
2336 *yuv2packed1 = yuv2gray16LE_1_c;
2337 *yuv2packed2 = yuv2gray16LE_2_c;
2338 *yuv2packedX = yuv2gray16LE_X_c;
2340 case PIX_FMT_MONOWHITE:
2341 *yuv2packed1 = yuv2monowhite_1_c;
2342 *yuv2packed2 = yuv2monowhite_2_c;
2343 *yuv2packedX = yuv2monowhite_X_c;
2345 case PIX_FMT_MONOBLACK:
2346 *yuv2packed1 = yuv2monoblack_1_c;
2347 *yuv2packed2 = yuv2monoblack_2_c;
2348 *yuv2packedX = yuv2monoblack_X_c;
2350 case PIX_FMT_YUYV422:
2351 *yuv2packed1 = yuv2yuyv422_1_c;
2352 *yuv2packed2 = yuv2yuyv422_2_c;
2353 *yuv2packedX = yuv2yuyv422_X_c;
2355 case PIX_FMT_UYVY422:
2356 *yuv2packed1 = yuv2uyvy422_1_c;
2357 *yuv2packed2 = yuv2uyvy422_2_c;
2358 *yuv2packedX = yuv2uyvy422_X_c;
2360 case PIX_FMT_RGB48LE:
2361 *yuv2packed1 = yuv2rgb48le_1_c;
2362 *yuv2packed2 = yuv2rgb48le_2_c;
2363 *yuv2packedX = yuv2rgb48le_X_c;
2365 case PIX_FMT_RGB48BE:
2366 *yuv2packed1 = yuv2rgb48be_1_c;
2367 *yuv2packed2 = yuv2rgb48be_2_c;
2368 *yuv2packedX = yuv2rgb48be_X_c;
2370 case PIX_FMT_BGR48LE:
2371 *yuv2packed1 = yuv2bgr48le_1_c;
2372 *yuv2packed2 = yuv2bgr48le_2_c;
2373 *yuv2packedX = yuv2bgr48le_X_c;
2375 case PIX_FMT_BGR48BE:
2376 *yuv2packed1 = yuv2bgr48be_1_c;
2377 *yuv2packed2 = yuv2bgr48be_2_c;
2378 *yuv2packedX = yuv2bgr48be_X_c;
2383 *yuv2packed1 = yuv2rgb32_1_c;
2384 *yuv2packed2 = yuv2rgb32_2_c;
2385 *yuv2packedX = yuv2rgb32_X_c;
2387 #if CONFIG_SWSCALE_ALPHA
2389 *yuv2packed1 = yuv2rgba32_1_c;
2390 *yuv2packed2 = yuv2rgba32_2_c;
2391 *yuv2packedX = yuv2rgba32_X_c;
2393 #endif /* CONFIG_SWSCALE_ALPHA */
2395 *yuv2packed1 = yuv2rgbx32_1_c;
2396 *yuv2packed2 = yuv2rgbx32_2_c;
2397 *yuv2packedX = yuv2rgbx32_X_c;
2399 #endif /* !CONFIG_SMALL */
2401 case PIX_FMT_RGB32_1:
2402 case PIX_FMT_BGR32_1:
2404 *yuv2packed1 = yuv2rgb32_1_1_c;
2405 *yuv2packed2 = yuv2rgb32_1_2_c;
2406 *yuv2packedX = yuv2rgb32_1_X_c;
2408 #if CONFIG_SWSCALE_ALPHA
2410 *yuv2packed1 = yuv2rgba32_1_1_c;
2411 *yuv2packed2 = yuv2rgba32_1_2_c;
2412 *yuv2packedX = yuv2rgba32_1_X_c;
2414 #endif /* CONFIG_SWSCALE_ALPHA */
2416 *yuv2packed1 = yuv2rgbx32_1_1_c;
2417 *yuv2packed2 = yuv2rgbx32_1_2_c;
2418 *yuv2packedX = yuv2rgbx32_1_X_c;
2420 #endif /* !CONFIG_SMALL */
2423 *yuv2packed1 = yuv2rgb24_1_c;
2424 *yuv2packed2 = yuv2rgb24_2_c;
2425 *yuv2packedX = yuv2rgb24_X_c;
2428 *yuv2packed1 = yuv2bgr24_1_c;
2429 *yuv2packed2 = yuv2bgr24_2_c;
2430 *yuv2packedX = yuv2bgr24_X_c;
2432 case PIX_FMT_RGB565LE:
2433 case PIX_FMT_RGB565BE:
2434 case PIX_FMT_BGR565LE:
2435 case PIX_FMT_BGR565BE:
2436 *yuv2packed1 = yuv2rgb16_1_c;
2437 *yuv2packed2 = yuv2rgb16_2_c;
2438 *yuv2packedX = yuv2rgb16_X_c;
2440 case PIX_FMT_RGB555LE:
2441 case PIX_FMT_RGB555BE:
2442 case PIX_FMT_BGR555LE:
2443 case PIX_FMT_BGR555BE:
2444 *yuv2packed1 = yuv2rgb15_1_c;
2445 *yuv2packed2 = yuv2rgb15_2_c;
2446 *yuv2packedX = yuv2rgb15_X_c;
2448 case PIX_FMT_RGB444LE:
2449 case PIX_FMT_RGB444BE:
2450 case PIX_FMT_BGR444LE:
2451 case PIX_FMT_BGR444BE:
2452 *yuv2packed1 = yuv2rgb12_1_c;
2453 *yuv2packed2 = yuv2rgb12_2_c;
2454 *yuv2packedX = yuv2rgb12_X_c;
2458 *yuv2packed1 = yuv2rgb8_1_c;
2459 *yuv2packed2 = yuv2rgb8_2_c;
2460 *yuv2packedX = yuv2rgb8_X_c;
2464 *yuv2packed1 = yuv2rgb4_1_c;
2465 *yuv2packed2 = yuv2rgb4_2_c;
2466 *yuv2packedX = yuv2rgb4_X_c;
2468 case PIX_FMT_RGB4_BYTE:
2469 case PIX_FMT_BGR4_BYTE:
2470 *yuv2packed1 = yuv2rgb4b_1_c;
2471 *yuv2packed2 = yuv2rgb4b_2_c;
2472 *yuv2packedX = yuv2rgb4b_X_c;
2478 #define DEBUG_SWSCALE_BUFFERS 0
2479 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2481 static int swScale(SwsContext *c, const uint8_t* src[],
2482 int srcStride[], int srcSliceY,
2483 int srcSliceH, uint8_t* dst[], int dstStride[])
2485 /* load a few things into local vars to make the code more readable? and faster */
2486 const int srcW= c->srcW;
2487 const int dstW= c->dstW;
2488 const int dstH= c->dstH;
2489 const int chrDstW= c->chrDstW;
2490 const int chrSrcW= c->chrSrcW;
2491 const int lumXInc= c->lumXInc;
2492 const int chrXInc= c->chrXInc;
2493 const enum PixelFormat dstFormat= c->dstFormat;
2494 const int flags= c->flags;
2495 int16_t *vLumFilterPos= c->vLumFilterPos;
2496 int16_t *vChrFilterPos= c->vChrFilterPos;
2497 int16_t *hLumFilterPos= c->hLumFilterPos;
2498 int16_t *hChrFilterPos= c->hChrFilterPos;
2499 int16_t *vLumFilter= c->vLumFilter;
2500 int16_t *vChrFilter= c->vChrFilter;
2501 int16_t *hLumFilter= c->hLumFilter;
2502 int16_t *hChrFilter= c->hChrFilter;
2503 int32_t *lumMmxFilter= c->lumMmxFilter;
2504 int32_t *chrMmxFilter= c->chrMmxFilter;
2505 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2506 const int vLumFilterSize= c->vLumFilterSize;
2507 const int vChrFilterSize= c->vChrFilterSize;
2508 const int hLumFilterSize= c->hLumFilterSize;
2509 const int hChrFilterSize= c->hChrFilterSize;
2510 int16_t **lumPixBuf= c->lumPixBuf;
2511 int16_t **chrUPixBuf= c->chrUPixBuf;
2512 int16_t **chrVPixBuf= c->chrVPixBuf;
2513 int16_t **alpPixBuf= c->alpPixBuf;
2514 const int vLumBufSize= c->vLumBufSize;
2515 const int vChrBufSize= c->vChrBufSize;
2516 uint8_t *formatConvBuffer= c->formatConvBuffer;
2517 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2518 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2520 uint32_t *pal=c->pal_yuv;
2521 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2522 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2523 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2524 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2525 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2526 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2528 /* vars which will change and which we need to store back in the context */
2530 int lumBufIndex= c->lumBufIndex;
2531 int chrBufIndex= c->chrBufIndex;
2532 int lastInLumBuf= c->lastInLumBuf;
2533 int lastInChrBuf= c->lastInChrBuf;
2535 if (isPacked(c->srcFormat)) {
2543 srcStride[3]= srcStride[0];
2545 srcStride[1]<<= c->vChrDrop;
2546 srcStride[2]<<= c->vChrDrop;
2548 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2549 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2550 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2551 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2552 srcSliceY, srcSliceH, dstY, dstH);
2553 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2554 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2556 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2557 static int warnedAlready=0; //FIXME move this into the context perhaps
2558 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2559 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2560 " ->cannot do aligned memory accesses anymore\n");
2565 /* Note the user might start scaling the picture in the middle so this
2566 will not get executed. This is not really intended but works
2567 currently, so people might do it. */
2568 if (srcSliceY ==0) {
2578 for (;dstY < dstH; dstY++) {
2579 const int chrDstY= dstY>>c->chrDstVSubSample;
2580 uint8_t *dest[4] = {
2581 dst[0] + dstStride[0] * dstY,
2582 dst[1] + dstStride[1] * chrDstY,
2583 dst[2] + dstStride[2] * chrDstY,
2584 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2586 const uint8_t *lumDither= should_dither ? dithers[7][dstY &7] : flat64;
2587 const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64;
2589 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2590 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2591 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2592 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2593 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2594 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2597 //handle holes (FAST_BILINEAR & weird filters)
2598 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2599 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2600 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2601 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2603 DEBUG_BUFFERS("dstY: %d\n", dstY);
2604 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2605 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2606 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2607 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2609 // Do we have enough lines in this slice to output the dstY line
2610 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2612 if (!enough_lines) {
2613 lastLumSrcY = srcSliceY + srcSliceH - 1;
2614 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2615 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2616 lastLumSrcY, lastChrSrcY);
2619 //Do horizontal scaling
2620 while(lastInLumBuf < lastLumSrcY) {
2621 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2622 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2624 assert(lumBufIndex < 2*vLumBufSize);
2625 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2626 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2627 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2628 hLumFilter, hLumFilterPos, hLumFilterSize,
2631 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2632 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2633 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2637 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2638 lumBufIndex, lastInLumBuf);
2640 while(lastInChrBuf < lastChrSrcY) {
2641 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2642 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2644 assert(chrBufIndex < 2*vChrBufSize);
2645 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2646 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2647 //FIXME replace parameters through context struct (some at least)
2649 if (c->needs_hcscale)
2650 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2651 chrDstW, src1, src2, chrSrcW, chrXInc,
2652 hChrFilter, hChrFilterPos, hChrFilterSize,
2653 formatConvBuffer, pal);
2655 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2656 chrBufIndex, lastInChrBuf);
2658 //wrap buf index around to stay inside the ring buffer
2659 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2660 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2662 break; //we can't output a dstY line so let's try with the next slice
2665 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2667 if (dstY >= dstH-2) {
2668 // hmm looks like we can't use MMX here without overwriting this array's tail
2669 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2670 &yuv2packed1, &yuv2packed2,
2675 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2676 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2677 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2678 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2680 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2681 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2682 if ((dstY&chrSkipMask) || isGray(dstFormat))
2683 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2684 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2685 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2686 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2687 dest, dstW, chrDstW, lumDither, chrDither);
2688 } else { //General YV12
2689 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2690 lumSrcPtr, vLumFilterSize,
2691 vChrFilter + chrDstY * vChrFilterSize,
2692 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2693 alpSrcPtr, dest, dstW, chrDstW, lumDither, chrDither);
2696 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2697 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2698 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2699 int chrAlpha = vChrFilter[2 * dstY + 1];
2700 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2701 alpPixBuf ? *alpSrcPtr : NULL,
2702 dest[0], dstW, chrAlpha, dstY);
2703 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2704 int lumAlpha = vLumFilter[2 * dstY + 1];
2705 int chrAlpha = vChrFilter[2 * dstY + 1];
2707 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2709 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2710 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2711 alpPixBuf ? alpSrcPtr : NULL,
2712 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2713 } else { //general RGB
2714 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2715 lumSrcPtr, vLumFilterSize,
2716 vChrFilter + dstY * vChrFilterSize,
2717 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2718 alpSrcPtr, dest[0], dstW, dstY);
2724 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2725 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2728 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2729 __asm__ volatile("sfence":::"memory");
2733 /* store changed local vars back in the context */
2735 c->lumBufIndex= lumBufIndex;
2736 c->chrBufIndex= chrBufIndex;
2737 c->lastInLumBuf= lastInLumBuf;
2738 c->lastInChrBuf= lastInChrBuf;
2740 return dstY - lastDstY;
2743 static av_cold void sws_init_swScale_c(SwsContext *c)
2745 enum PixelFormat srcFormat = c->srcFormat;
2747 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2748 &c->yuv2packed1, &c->yuv2packed2,
2751 c->chrToYV12 = NULL;
2753 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2754 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2755 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2756 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2760 case PIX_FMT_BGR4_BYTE:
2761 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2762 case PIX_FMT_YUV444P9BE:
2763 case PIX_FMT_YUV420P9BE:
2764 case PIX_FMT_YUV444P10BE:
2765 case PIX_FMT_YUV422P10BE:
2766 case PIX_FMT_YUV420P10BE: c->hScale16= HAVE_BIGENDIAN ? hScale16N_c : hScale16NX_c; break;
2767 case PIX_FMT_YUV444P9LE:
2768 case PIX_FMT_YUV420P9LE:
2769 case PIX_FMT_YUV422P10LE:
2770 case PIX_FMT_YUV420P10LE:
2771 case PIX_FMT_YUV444P10LE: c->hScale16= HAVE_BIGENDIAN ? hScale16NX_c : hScale16N_c; break;
2773 case PIX_FMT_YUV420P16LE:
2774 case PIX_FMT_YUV422P16LE:
2775 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2777 case PIX_FMT_YUV420P16BE:
2778 case PIX_FMT_YUV422P16BE:
2779 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2782 if (c->chrSrcHSubSample) {
2784 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2785 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2786 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2787 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2788 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2789 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2790 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2791 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2792 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2793 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2794 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2795 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2796 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2797 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2798 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2799 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2800 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2801 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2805 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2806 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2807 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2808 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2809 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2810 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2811 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2812 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2813 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2814 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2815 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2816 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2817 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2818 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2819 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2820 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2821 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2822 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2826 c->lumToYV12 = NULL;
2827 c->alpToYV12 = NULL;
2828 switch (srcFormat) {
2830 case PIX_FMT_YUV420P16LE:
2831 case PIX_FMT_YUV422P16LE:
2832 case PIX_FMT_YUV444P16LE:
2833 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2835 case PIX_FMT_YUV420P16BE:
2836 case PIX_FMT_YUV422P16BE:
2837 case PIX_FMT_YUV444P16BE:
2838 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2840 case PIX_FMT_YUYV422 :
2841 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2842 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2843 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2844 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2845 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2846 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2847 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2848 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2849 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2850 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2851 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2852 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2856 case PIX_FMT_BGR4_BYTE:
2857 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2858 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2859 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2860 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2861 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2862 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2863 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2864 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2865 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2866 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2867 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2870 switch (srcFormat) {
2872 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2874 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2875 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2876 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2880 if((isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2881 || c->srcFormat == PIX_FMT_PAL8)
2882 c->hScale16= hScale16N_c;
2884 if (c->scalingBpp == 8) {
2885 c->hScale = hScale_c;
2886 if (c->flags & SWS_FAST_BILINEAR) {
2887 c->hyscale_fast = hyscale_fast_c;
2888 c->hcscale_fast = hcscale_fast_c;
2891 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2893 c->lumConvertRange = lumRangeFromJpeg_c;
2894 c->chrConvertRange = chrRangeFromJpeg_c;
2896 c->lumConvertRange = lumRangeToJpeg_c;
2897 c->chrConvertRange = chrRangeToJpeg_c;
2901 c->hScale = hScale16_c;
2902 c->scale19To15Fw = scale19To15Fw_c;
2903 c->scale8To16Rv = scale8To16Rv_c;
2905 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2907 c->lumConvertRange = lumRangeFromJpeg16_c;
2908 c->chrConvertRange = chrRangeFromJpeg16_c;
2910 c->lumConvertRange = lumRangeToJpeg16_c;
2911 c->chrConvertRange = chrRangeToJpeg16_c;
2916 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2917 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2918 c->needs_hcscale = 1;
2921 SwsFunc ff_getSwsFunc(SwsContext *c)
2923 sws_init_swScale_c(c);
2926 ff_sws_init_swScale_mmx(c);
2928 ff_sws_init_swScale_altivec(c);