2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
199 { 0, 1, 0, 1, 0, 1, 0, 1,},
200 { 1, 0, 1, 0, 1, 0, 1, 0,},
201 { 0, 1, 0, 1, 0, 1, 0, 1,},
202 { 1, 0, 1, 0, 1, 0, 1, 0,},
203 { 0, 1, 0, 1, 0, 1, 0, 1,},
204 { 1, 0, 1, 0, 1, 0, 1, 0,},
205 { 0, 1, 0, 1, 0, 1, 0, 1,},
206 { 1, 0, 1, 0, 1, 0, 1, 0,},
208 { 1, 2, 1, 2, 1, 2, 1, 2,},
209 { 3, 0, 3, 0, 3, 0, 3, 0,},
210 { 1, 2, 1, 2, 1, 2, 1, 2,},
211 { 3, 0, 3, 0, 3, 0, 3, 0,},
212 { 1, 2, 1, 2, 1, 2, 1, 2,},
213 { 3, 0, 3, 0, 3, 0, 3, 0,},
214 { 1, 2, 1, 2, 1, 2, 1, 2,},
215 { 3, 0, 3, 0, 3, 0, 3, 0,},
217 { 2, 4, 3, 5, 2, 4, 3, 5,},
218 { 6, 0, 7, 1, 6, 0, 7, 1,},
219 { 3, 5, 2, 4, 3, 5, 2, 4,},
220 { 7, 1, 6, 0, 7, 1, 6, 0,},
221 { 2, 4, 3, 5, 2, 4, 3, 5,},
222 { 6, 0, 7, 1, 6, 0, 7, 1,},
223 { 3, 5, 2, 4, 3, 5, 2, 4,},
224 { 7, 1, 6, 0, 7, 1, 6, 0,},
226 { 4, 8, 7, 11, 4, 8, 7, 11,},
227 { 12, 0, 15, 3, 12, 0, 15, 3,},
228 { 6, 10, 5, 9, 6, 10, 5, 9,},
229 { 14, 2, 13, 1, 14, 2, 13, 1,},
230 { 4, 8, 7, 11, 4, 8, 7, 11,},
231 { 12, 0, 15, 3, 12, 0, 15, 3,},
232 { 6, 10, 5, 9, 6, 10, 5, 9,},
233 { 14, 2, 13, 1, 14, 2, 13, 1,},
235 { 9, 17, 15, 23, 8, 16, 14, 22,},
236 { 25, 1, 31, 7, 24, 0, 30, 6,},
237 { 13, 21, 11, 19, 12, 20, 10, 18,},
238 { 29, 5, 27, 3, 28, 4, 26, 2,},
239 { 8, 16, 14, 22, 9, 17, 15, 23,},
240 { 24, 0, 30, 6, 25, 1, 31, 7,},
241 { 12, 20, 10, 18, 13, 21, 11, 19,},
242 { 28, 4, 26, 2, 29, 5, 27, 3,},
244 { 18, 34, 30, 46, 17, 33, 29, 45,},
245 { 50, 2, 62, 14, 49, 1, 61, 13,},
246 { 26, 42, 22, 38, 25, 41, 21, 37,},
247 { 58, 10, 54, 6, 57, 9, 53, 5,},
248 { 16, 32, 28, 44, 19, 35, 31, 47,},
249 { 48, 0, 60, 12, 51, 3, 63, 15,},
250 { 24, 40, 20, 36, 27, 43, 23, 39,},
251 { 56, 8, 52, 4, 59, 11, 55, 7,},
253 { 18, 34, 30, 46, 17, 33, 29, 45,},
254 { 50, 2, 62, 14, 49, 1, 61, 13,},
255 { 26, 42, 22, 38, 25, 41, 21, 37,},
256 { 58, 10, 54, 6, 57, 9, 53, 5,},
257 { 16, 32, 28, 44, 19, 35, 31, 47,},
258 { 48, 0, 60, 12, 51, 3, 63, 15,},
259 { 24, 40, 20, 36, 27, 43, 23, 39,},
260 { 56, 8, 52, 4, 59, 11, 55, 7,},
262 { 36, 68, 60, 92, 34, 66, 58, 90,},
263 { 100, 4,124, 28, 98, 2,122, 26,},
264 { 52, 84, 44, 76, 50, 82, 42, 74,},
265 { 116, 20,108, 12,114, 18,106, 10,},
266 { 32, 64, 56, 88, 38, 70, 62, 94,},
267 { 96, 0,120, 24,102, 6,126, 30,},
268 { 48, 80, 40, 72, 54, 86, 46, 78,},
269 { 112, 16,104, 8,118, 22,110, 14,},
272 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
274 const uint16_t dither_scale[15][16]={
275 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
276 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
277 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
278 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
279 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
280 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
281 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
282 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
283 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
284 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
285 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
286 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
287 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
288 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
289 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
292 static av_always_inline void
293 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
294 int lumFilterSize, const int16_t *chrFilter,
295 const int32_t **chrUSrc, const int32_t **chrVSrc,
296 int chrFilterSize, const int32_t **alpSrc,
297 uint16_t *dest[4], int dstW, int chrDstW,
298 int big_endian, int output_bits)
300 //FIXME Optimize (just quickly written not optimized..)
302 int dword= output_bits == 16;
303 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
304 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
305 int shift = 11 + 4*dword + 16 - output_bits - 1;
307 #define output_pixel(pos, val) \
309 if (output_bits == 16) { \
310 AV_WB16(pos, av_clip_uint16(val >> shift)); \
312 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
315 if (output_bits == 16) { \
316 AV_WL16(pos, av_clip_uint16(val >> shift)); \
318 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
321 for (i = 0; i < dstW; i++) {
322 int val = 1 << (26-output_bits + 4*dword - 1);
325 for (j = 0; j < lumFilterSize; j++)
326 val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
328 output_pixel(&yDest[i], val);
332 for (i = 0; i < chrDstW; i++) {
333 int u = 1 << (26-output_bits + 4*dword - 1);
334 int v = 1 << (26-output_bits + 4*dword - 1);
337 for (j = 0; j < chrFilterSize; j++) {
338 u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
339 v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
342 output_pixel(&uDest[i], u);
343 output_pixel(&vDest[i], v);
347 if (CONFIG_SWSCALE_ALPHA && aDest) {
348 for (i = 0; i < dstW; i++) {
349 int val = 1 << (26-output_bits + 4*dword - 1);
352 for (j = 0; j < lumFilterSize; j++)
353 val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
355 output_pixel(&aDest[i], val);
361 #define yuv2NBPS(bits, BE_LE, is_be) \
362 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
363 const int16_t **_lumSrc, int lumFilterSize, \
364 const int16_t *chrFilter, const int16_t **_chrUSrc, \
365 const int16_t **_chrVSrc, \
366 int chrFilterSize, const int16_t **_alpSrc, \
367 uint8_t *_dest[4], int dstW, int chrDstW) \
369 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
370 **chrUSrc = (const int32_t **) _chrUSrc, \
371 **chrVSrc = (const int32_t **) _chrVSrc, \
372 **alpSrc = (const int32_t **) _alpSrc; \
373 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
374 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
375 alpSrc, (uint16_t **) _dest, \
376 dstW, chrDstW, is_be, bits); \
385 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
386 const int16_t **lumSrc, int lumFilterSize,
387 const int16_t *chrFilter, const int16_t **chrUSrc,
388 const int16_t **chrVSrc,
389 int chrFilterSize, const int16_t **alpSrc,
390 uint8_t *dest[4], int dstW, int chrDstW,
391 const uint8_t *lumDither, const uint8_t *chrDither)
393 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
394 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
397 //FIXME Optimize (just quickly written not optimized..)
398 for (i=0; i<dstW; i++) {
399 int val = lumDither[i&7] << 12;
401 for (j=0; j<lumFilterSize; j++)
402 val += lumSrc[j][i] * lumFilter[j];
404 yDest[i]= av_clip_uint8(val>>19);
408 for (i=0; i<chrDstW; i++) {
409 int u = chrDither[i&7] << 12;
410 int v = chrDither[(i+3)&7] << 12;
412 for (j=0; j<chrFilterSize; j++) {
413 u += chrUSrc[j][i] * chrFilter[j];
414 v += chrVSrc[j][i] * chrFilter[j];
417 uDest[i]= av_clip_uint8(u>>19);
418 vDest[i]= av_clip_uint8(v>>19);
421 if (CONFIG_SWSCALE_ALPHA && aDest)
422 for (i=0; i<dstW; i++) {
423 int val = lumDither[i&7] << 12;
425 for (j=0; j<lumFilterSize; j++)
426 val += alpSrc[j][i] * lumFilter[j];
428 aDest[i]= av_clip_uint8(val>>19);
432 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
433 const int16_t *chrUSrc, const int16_t *chrVSrc,
434 const int16_t *alpSrc,
435 uint8_t *dest[4], int dstW, int chrDstW,
436 const uint8_t *lumDither, const uint8_t *chrDither)
438 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
439 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
442 for (i=0; i<dstW; i++) {
443 int val= (lumSrc[i]+lumDither[i&7])>>7;
444 yDest[i]= av_clip_uint8(val);
448 for (i=0; i<chrDstW; i++) {
449 int u=(chrUSrc[i]+chrDither[i&7])>>7;
450 int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
451 uDest[i]= av_clip_uint8(u);
452 vDest[i]= av_clip_uint8(v);
455 if (CONFIG_SWSCALE_ALPHA && aDest)
456 for (i=0; i<dstW; i++) {
457 int val= (alpSrc[i]+lumDither[i&7])>>7;
458 aDest[i]= av_clip_uint8(val);
462 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
463 const int16_t **lumSrc, int lumFilterSize,
464 const int16_t *chrFilter, const int16_t **chrUSrc,
465 const int16_t **chrVSrc, int chrFilterSize,
466 const int16_t **alpSrc, uint8_t *dest[4],
467 int dstW, int chrDstW,
468 const uint8_t *lumDither, const uint8_t *chrDither)
470 uint8_t *yDest = dest[0], *uDest = dest[1];
471 enum PixelFormat dstFormat = c->dstFormat;
473 //FIXME Optimize (just quickly written not optimized..)
475 for (i=0; i<dstW; i++) {
476 int val = lumDither[i&7]<<12;
478 for (j=0; j<lumFilterSize; j++)
479 val += lumSrc[j][i] * lumFilter[j];
481 yDest[i]= av_clip_uint8(val>>19);
487 if (dstFormat == PIX_FMT_NV12)
488 for (i=0; i<chrDstW; i++) {
489 int u = chrDither[i&7]<<12;
490 int v = chrDither[(i+3)&7]<<12;
492 for (j=0; j<chrFilterSize; j++) {
493 u += chrUSrc[j][i] * chrFilter[j];
494 v += chrVSrc[j][i] * chrFilter[j];
497 uDest[2*i]= av_clip_uint8(u>>19);
498 uDest[2*i+1]= av_clip_uint8(v>>19);
501 for (i=0; i<chrDstW; i++) {
502 int u = chrDither[i&7]<<12;
503 int v = chrDither[(i+3)&7]<<12;
505 for (j=0; j<chrFilterSize; j++) {
506 u += chrUSrc[j][i] * chrFilter[j];
507 v += chrVSrc[j][i] * chrFilter[j];
510 uDest[2*i]= av_clip_uint8(v>>19);
511 uDest[2*i+1]= av_clip_uint8(u>>19);
515 #define output_pixel(pos, val) \
516 if (target == PIX_FMT_GRAY16BE) { \
522 static av_always_inline void
523 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
524 const int32_t **lumSrc, int lumFilterSize,
525 const int16_t *chrFilter, const int32_t **chrUSrc,
526 const int32_t **chrVSrc, int chrFilterSize,
527 const int32_t **alpSrc, uint16_t *dest, int dstW,
528 int y, enum PixelFormat target)
532 for (i = 0; i < (dstW >> 1); i++) {
537 for (j = 0; j < lumFilterSize; j++) {
538 Y1 += lumSrc[j][i * 2] * lumFilter[j];
539 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
543 if ((Y1 | Y2) & 0x10000) {
544 Y1 = av_clip_uint16(Y1);
545 Y2 = av_clip_uint16(Y2);
547 output_pixel(&dest[i * 2 + 0], Y1);
548 output_pixel(&dest[i * 2 + 1], Y2);
552 static av_always_inline void
553 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
554 const int32_t *ubuf[2], const int32_t *vbuf[2],
555 const int32_t *abuf[2], uint16_t *dest, int dstW,
556 int yalpha, int uvalpha, int y,
557 enum PixelFormat target)
559 int yalpha1 = 4095 - yalpha;
561 const int32_t *buf0 = buf[0], *buf1 = buf[1];
563 for (i = 0; i < (dstW >> 1); i++) {
564 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
565 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
567 output_pixel(&dest[i * 2 + 0], Y1);
568 output_pixel(&dest[i * 2 + 1], Y2);
572 static av_always_inline void
573 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
574 const int32_t *ubuf[2], const int32_t *vbuf[2],
575 const int32_t *abuf0, uint16_t *dest, int dstW,
576 int uvalpha, int y, enum PixelFormat target)
580 for (i = 0; i < (dstW >> 1); i++) {
581 int Y1 = (buf0[i * 2 ]+4)>>3;
582 int Y2 = (buf0[i * 2 + 1]+4)>>3;
584 output_pixel(&dest[i * 2 + 0], Y1);
585 output_pixel(&dest[i * 2 + 1], Y2);
591 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
592 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
593 const int16_t **_lumSrc, int lumFilterSize, \
594 const int16_t *chrFilter, const int16_t **_chrUSrc, \
595 const int16_t **_chrVSrc, int chrFilterSize, \
596 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
599 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
600 **chrUSrc = (const int32_t **) _chrUSrc, \
601 **chrVSrc = (const int32_t **) _chrVSrc, \
602 **alpSrc = (const int32_t **) _alpSrc; \
603 uint16_t *dest = (uint16_t *) _dest; \
604 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
605 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
606 alpSrc, dest, dstW, y, fmt); \
609 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
610 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
611 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
612 int yalpha, int uvalpha, int y) \
614 const int32_t **buf = (const int32_t **) _buf, \
615 **ubuf = (const int32_t **) _ubuf, \
616 **vbuf = (const int32_t **) _vbuf, \
617 **abuf = (const int32_t **) _abuf; \
618 uint16_t *dest = (uint16_t *) _dest; \
619 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
620 dest, dstW, yalpha, uvalpha, y, fmt); \
623 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
624 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
625 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
626 int uvalpha, int y) \
628 const int32_t *buf0 = (const int32_t *) _buf0, \
629 **ubuf = (const int32_t **) _ubuf, \
630 **vbuf = (const int32_t **) _vbuf, \
631 *abuf0 = (const int32_t *) _abuf0; \
632 uint16_t *dest = (uint16_t *) _dest; \
633 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
634 dstW, uvalpha, y, fmt); \
637 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
638 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
640 #define output_pixel(pos, acc) \
641 if (target == PIX_FMT_MONOBLACK) { \
647 static av_always_inline void
648 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
649 const int16_t **lumSrc, int lumFilterSize,
650 const int16_t *chrFilter, const int16_t **chrUSrc,
651 const int16_t **chrVSrc, int chrFilterSize,
652 const int16_t **alpSrc, uint8_t *dest, int dstW,
653 int y, enum PixelFormat target)
655 const uint8_t * const d128=dither_8x8_220[y&7];
656 uint8_t *g = c->table_gU[128] + c->table_gV[128];
660 for (i = 0; i < dstW - 1; i += 2) {
665 for (j = 0; j < lumFilterSize; j++) {
666 Y1 += lumSrc[j][i] * lumFilter[j];
667 Y2 += lumSrc[j][i+1] * lumFilter[j];
671 if ((Y1 | Y2) & 0x100) {
672 Y1 = av_clip_uint8(Y1);
673 Y2 = av_clip_uint8(Y2);
675 acc += acc + g[Y1 + d128[(i + 0) & 7]];
676 acc += acc + g[Y2 + d128[(i + 1) & 7]];
678 output_pixel(*dest++, acc);
683 static av_always_inline void
684 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
685 const int16_t *ubuf[2], const int16_t *vbuf[2],
686 const int16_t *abuf[2], uint8_t *dest, int dstW,
687 int yalpha, int uvalpha, int y,
688 enum PixelFormat target)
690 const int16_t *buf0 = buf[0], *buf1 = buf[1];
691 const uint8_t * const d128 = dither_8x8_220[y & 7];
692 uint8_t *g = c->table_gU[128] + c->table_gV[128];
693 int yalpha1 = 4095 - yalpha;
696 for (i = 0; i < dstW - 7; i += 8) {
697 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
698 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
699 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
700 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
701 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
702 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
703 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
704 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
705 output_pixel(*dest++, acc);
709 static av_always_inline void
710 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
711 const int16_t *ubuf[2], const int16_t *vbuf[2],
712 const int16_t *abuf0, uint8_t *dest, int dstW,
713 int uvalpha, int y, enum PixelFormat target)
715 const uint8_t * const d128 = dither_8x8_220[y & 7];
716 uint8_t *g = c->table_gU[128] + c->table_gV[128];
719 for (i = 0; i < dstW - 7; i += 8) {
720 int acc = g[(buf0[i ] >> 7) + d128[0]];
721 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
722 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
723 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
724 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
725 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
726 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
727 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
728 output_pixel(*dest++, acc);
734 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
735 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
736 const int16_t **lumSrc, int lumFilterSize, \
737 const int16_t *chrFilter, const int16_t **chrUSrc, \
738 const int16_t **chrVSrc, int chrFilterSize, \
739 const int16_t **alpSrc, uint8_t *dest, int dstW, \
742 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
743 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
744 alpSrc, dest, dstW, y, fmt); \
747 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
748 const int16_t *ubuf[2], const int16_t *vbuf[2], \
749 const int16_t *abuf[2], uint8_t *dest, int dstW, \
750 int yalpha, int uvalpha, int y) \
752 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
753 dest, dstW, yalpha, uvalpha, y, fmt); \
756 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
757 const int16_t *ubuf[2], const int16_t *vbuf[2], \
758 const int16_t *abuf0, uint8_t *dest, int dstW, \
759 int uvalpha, int y) \
761 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
762 abuf0, dest, dstW, uvalpha, \
766 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
767 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
769 #define output_pixels(pos, Y1, U, Y2, V) \
770 if (target == PIX_FMT_YUYV422) { \
771 dest[pos + 0] = Y1; \
773 dest[pos + 2] = Y2; \
777 dest[pos + 1] = Y1; \
779 dest[pos + 3] = Y2; \
782 static av_always_inline void
783 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
784 const int16_t **lumSrc, int lumFilterSize,
785 const int16_t *chrFilter, const int16_t **chrUSrc,
786 const int16_t **chrVSrc, int chrFilterSize,
787 const int16_t **alpSrc, uint8_t *dest, int dstW,
788 int y, enum PixelFormat target)
792 for (i = 0; i < (dstW >> 1); i++) {
799 for (j = 0; j < lumFilterSize; j++) {
800 Y1 += lumSrc[j][i * 2] * lumFilter[j];
801 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
803 for (j = 0; j < chrFilterSize; j++) {
804 U += chrUSrc[j][i] * chrFilter[j];
805 V += chrVSrc[j][i] * chrFilter[j];
811 if ((Y1 | Y2 | U | V) & 0x100) {
812 Y1 = av_clip_uint8(Y1);
813 Y2 = av_clip_uint8(Y2);
814 U = av_clip_uint8(U);
815 V = av_clip_uint8(V);
817 output_pixels(4*i, Y1, U, Y2, V);
821 static av_always_inline void
822 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
823 const int16_t *ubuf[2], const int16_t *vbuf[2],
824 const int16_t *abuf[2], uint8_t *dest, int dstW,
825 int yalpha, int uvalpha, int y,
826 enum PixelFormat target)
828 const int16_t *buf0 = buf[0], *buf1 = buf[1],
829 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
830 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
831 int yalpha1 = 4095 - yalpha;
832 int uvalpha1 = 4095 - uvalpha;
835 for (i = 0; i < (dstW >> 1); i++) {
836 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
837 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
838 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
839 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
841 output_pixels(i * 4, Y1, U, Y2, V);
845 static av_always_inline void
846 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
847 const int16_t *ubuf[2], const int16_t *vbuf[2],
848 const int16_t *abuf0, uint8_t *dest, int dstW,
849 int uvalpha, int y, enum PixelFormat target)
851 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
852 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
855 if (uvalpha < 2048) {
856 for (i = 0; i < (dstW >> 1); i++) {
857 int Y1 = buf0[i * 2] >> 7;
858 int Y2 = buf0[i * 2 + 1] >> 7;
859 int U = ubuf1[i] >> 7;
860 int V = vbuf1[i] >> 7;
862 output_pixels(i * 4, Y1, U, Y2, V);
865 for (i = 0; i < (dstW >> 1); i++) {
866 int Y1 = buf0[i * 2] >> 7;
867 int Y2 = buf0[i * 2 + 1] >> 7;
868 int U = (ubuf0[i] + ubuf1[i]) >> 8;
869 int V = (vbuf0[i] + vbuf1[i]) >> 8;
871 output_pixels(i * 4, Y1, U, Y2, V);
878 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
879 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
881 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
882 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
883 #define output_pixel(pos, val) \
884 if (isBE(target)) { \
890 static av_always_inline void
891 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
892 const int32_t **lumSrc, int lumFilterSize,
893 const int16_t *chrFilter, const int32_t **chrUSrc,
894 const int32_t **chrVSrc, int chrFilterSize,
895 const int32_t **alpSrc, uint16_t *dest, int dstW,
896 int y, enum PixelFormat target)
900 for (i = 0; i < (dstW >> 1); i++) {
904 int U = -128 << 23; // 19
908 for (j = 0; j < lumFilterSize; j++) {
909 Y1 += lumSrc[j][i * 2] * lumFilter[j];
910 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
912 for (j = 0; j < chrFilterSize; j++) {
913 U += chrUSrc[j][i] * chrFilter[j];
914 V += chrVSrc[j][i] * chrFilter[j];
917 // 8bit: 12+15=27; 16-bit: 12+19=31
923 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
924 Y1 -= c->yuv2rgb_y_offset;
925 Y2 -= c->yuv2rgb_y_offset;
926 Y1 *= c->yuv2rgb_y_coeff;
927 Y2 *= c->yuv2rgb_y_coeff;
930 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
932 R = V * c->yuv2rgb_v2r_coeff;
933 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
934 B = U * c->yuv2rgb_u2b_coeff;
936 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
937 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
938 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
939 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
940 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
941 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
942 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
947 static av_always_inline void
948 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
949 const int32_t *ubuf[2], const int32_t *vbuf[2],
950 const int32_t *abuf[2], uint16_t *dest, int dstW,
951 int yalpha, int uvalpha, int y,
952 enum PixelFormat target)
954 const int32_t *buf0 = buf[0], *buf1 = buf[1],
955 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
956 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
957 int yalpha1 = 4095 - yalpha;
958 int uvalpha1 = 4095 - uvalpha;
961 for (i = 0; i < (dstW >> 1); i++) {
962 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
963 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
964 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
965 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
968 Y1 -= c->yuv2rgb_y_offset;
969 Y2 -= c->yuv2rgb_y_offset;
970 Y1 *= c->yuv2rgb_y_coeff;
971 Y2 *= c->yuv2rgb_y_coeff;
975 R = V * c->yuv2rgb_v2r_coeff;
976 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
977 B = U * c->yuv2rgb_u2b_coeff;
979 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
980 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
981 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
982 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
983 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
984 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
989 static av_always_inline void
990 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
991 const int32_t *ubuf[2], const int32_t *vbuf[2],
992 const int32_t *abuf0, uint16_t *dest, int dstW,
993 int uvalpha, int y, enum PixelFormat target)
995 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
996 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
999 if (uvalpha < 2048) {
1000 for (i = 0; i < (dstW >> 1); i++) {
1001 int Y1 = (buf0[i * 2] ) >> 2;
1002 int Y2 = (buf0[i * 2 + 1]) >> 2;
1003 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1004 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1007 Y1 -= c->yuv2rgb_y_offset;
1008 Y2 -= c->yuv2rgb_y_offset;
1009 Y1 *= c->yuv2rgb_y_coeff;
1010 Y2 *= c->yuv2rgb_y_coeff;
1014 R = V * c->yuv2rgb_v2r_coeff;
1015 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1016 B = U * c->yuv2rgb_u2b_coeff;
1018 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1019 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1020 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1021 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1022 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1023 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1027 for (i = 0; i < (dstW >> 1); i++) {
1028 int Y1 = (buf0[i * 2] ) >> 2;
1029 int Y2 = (buf0[i * 2 + 1]) >> 2;
1030 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
1031 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
1034 Y1 -= c->yuv2rgb_y_offset;
1035 Y2 -= c->yuv2rgb_y_offset;
1036 Y1 *= c->yuv2rgb_y_coeff;
1037 Y2 *= c->yuv2rgb_y_coeff;
1041 R = V * c->yuv2rgb_v2r_coeff;
1042 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1043 B = U * c->yuv2rgb_u2b_coeff;
1045 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1046 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1047 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1048 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1049 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1050 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1060 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1061 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1062 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1063 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1065 static av_always_inline void
1066 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1067 int U, int V, int A1, int A2,
1068 const void *_r, const void *_g, const void *_b, int y,
1069 enum PixelFormat target, int hasAlpha)
1071 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1072 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1073 uint32_t *dest = (uint32_t *) _dest;
1074 const uint32_t *r = (const uint32_t *) _r;
1075 const uint32_t *g = (const uint32_t *) _g;
1076 const uint32_t *b = (const uint32_t *) _b;
1079 int sh = hasAlpha ? ((fmt == PIX_FMT_RGB32_1 || fmt == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1081 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1082 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1085 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1087 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1088 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1090 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1091 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1094 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1095 uint8_t *dest = (uint8_t *) _dest;
1096 const uint8_t *r = (const uint8_t *) _r;
1097 const uint8_t *g = (const uint8_t *) _g;
1098 const uint8_t *b = (const uint8_t *) _b;
1100 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1101 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1103 dest[i * 6 + 0] = r_b[Y1];
1104 dest[i * 6 + 1] = g[Y1];
1105 dest[i * 6 + 2] = b_r[Y1];
1106 dest[i * 6 + 3] = r_b[Y2];
1107 dest[i * 6 + 4] = g[Y2];
1108 dest[i * 6 + 5] = b_r[Y2];
1111 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1112 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1113 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1114 uint16_t *dest = (uint16_t *) _dest;
1115 const uint16_t *r = (const uint16_t *) _r;
1116 const uint16_t *g = (const uint16_t *) _g;
1117 const uint16_t *b = (const uint16_t *) _b;
1118 int dr1, dg1, db1, dr2, dg2, db2;
1120 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1121 dr1 = dither_2x2_8[ y & 1 ][0];
1122 dg1 = dither_2x2_4[ y & 1 ][0];
1123 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1124 dr2 = dither_2x2_8[ y & 1 ][1];
1125 dg2 = dither_2x2_4[ y & 1 ][1];
1126 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1127 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1128 dr1 = dither_2x2_8[ y & 1 ][0];
1129 dg1 = dither_2x2_8[ y & 1 ][1];
1130 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1131 dr2 = dither_2x2_8[ y & 1 ][1];
1132 dg2 = dither_2x2_8[ y & 1 ][0];
1133 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1135 dr1 = dither_4x4_16[ y & 3 ][0];
1136 dg1 = dither_4x4_16[ y & 3 ][1];
1137 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1138 dr2 = dither_4x4_16[ y & 3 ][1];
1139 dg2 = dither_4x4_16[ y & 3 ][0];
1140 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1143 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1144 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1145 } else /* 8/4-bit */ {
1146 uint8_t *dest = (uint8_t *) _dest;
1147 const uint8_t *r = (const uint8_t *) _r;
1148 const uint8_t *g = (const uint8_t *) _g;
1149 const uint8_t *b = (const uint8_t *) _b;
1150 int dr1, dg1, db1, dr2, dg2, db2;
1152 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1153 const uint8_t * const d64 = dither_8x8_73[y & 7];
1154 const uint8_t * const d32 = dither_8x8_32[y & 7];
1155 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1156 db1 = d64[(i * 2 + 0) & 7];
1157 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1158 db2 = d64[(i * 2 + 1) & 7];
1160 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1161 const uint8_t * const d128 = dither_8x8_220[y & 7];
1162 dr1 = db1 = d128[(i * 2 + 0) & 7];
1163 dg1 = d64[(i * 2 + 0) & 7];
1164 dr2 = db2 = d128[(i * 2 + 1) & 7];
1165 dg2 = d64[(i * 2 + 1) & 7];
1168 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1169 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1170 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1172 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1173 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1178 static av_always_inline void
1179 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1180 const int16_t **lumSrc, int lumFilterSize,
1181 const int16_t *chrFilter, const int16_t **chrUSrc,
1182 const int16_t **chrVSrc, int chrFilterSize,
1183 const int16_t **alpSrc, uint8_t *dest, int dstW,
1184 int y, enum PixelFormat target, int hasAlpha)
1188 for (i = 0; i < (dstW >> 1); i++) {
1194 int av_unused A1, A2;
1195 const void *r, *g, *b;
1197 for (j = 0; j < lumFilterSize; j++) {
1198 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1199 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1201 for (j = 0; j < chrFilterSize; j++) {
1202 U += chrUSrc[j][i] * chrFilter[j];
1203 V += chrVSrc[j][i] * chrFilter[j];
1209 if ((Y1 | Y2 | U | V) & 0x100) {
1210 Y1 = av_clip_uint8(Y1);
1211 Y2 = av_clip_uint8(Y2);
1212 U = av_clip_uint8(U);
1213 V = av_clip_uint8(V);
1218 for (j = 0; j < lumFilterSize; j++) {
1219 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1220 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1224 if ((A1 | A2) & 0x100) {
1225 A1 = av_clip_uint8(A1);
1226 A2 = av_clip_uint8(A2);
1230 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1232 g = (c->table_gU[U] + c->table_gV[V]);
1235 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1236 r, g, b, y, target, hasAlpha);
1240 static av_always_inline void
1241 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1242 const int16_t *ubuf[2], const int16_t *vbuf[2],
1243 const int16_t *abuf[2], uint8_t *dest, int dstW,
1244 int yalpha, int uvalpha, int y,
1245 enum PixelFormat target, int hasAlpha)
1247 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1248 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1249 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1250 *abuf0 = hasAlpha ? abuf[0] : NULL,
1251 *abuf1 = hasAlpha ? abuf[1] : NULL;
1252 int yalpha1 = 4095 - yalpha;
1253 int uvalpha1 = 4095 - uvalpha;
1256 for (i = 0; i < (dstW >> 1); i++) {
1257 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1258 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1259 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1260 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1262 const void *r = c->table_rV[V],
1263 *g = (c->table_gU[U] + c->table_gV[V]),
1264 *b = c->table_bU[U];
1267 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1268 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1271 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1272 r, g, b, y, target, hasAlpha);
1276 static av_always_inline void
1277 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1278 const int16_t *ubuf[2], const int16_t *vbuf[2],
1279 const int16_t *abuf0, uint8_t *dest, int dstW,
1280 int uvalpha, int y, enum PixelFormat target,
1283 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1284 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1287 if (uvalpha < 2048) {
1288 for (i = 0; i < (dstW >> 1); i++) {
1289 int Y1 = buf0[i * 2] >> 7;
1290 int Y2 = buf0[i * 2 + 1] >> 7;
1291 int U = ubuf1[i] >> 7;
1292 int V = vbuf1[i] >> 7;
1294 const void *r = c->table_rV[V],
1295 *g = (c->table_gU[U] + c->table_gV[V]),
1296 *b = c->table_bU[U];
1299 A1 = abuf0[i * 2 ] >> 7;
1300 A2 = abuf0[i * 2 + 1] >> 7;
1303 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1304 r, g, b, y, target, hasAlpha);
1307 for (i = 0; i < (dstW >> 1); i++) {
1308 int Y1 = buf0[i * 2] >> 7;
1309 int Y2 = buf0[i * 2 + 1] >> 7;
1310 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1311 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1313 const void *r = c->table_rV[V],
1314 *g = (c->table_gU[U] + c->table_gV[V]),
1315 *b = c->table_bU[U];
1318 A1 = abuf0[i * 2 ] >> 7;
1319 A2 = abuf0[i * 2 + 1] >> 7;
1322 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1323 r, g, b, y, target, hasAlpha);
1328 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1329 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1330 const int16_t **lumSrc, int lumFilterSize, \
1331 const int16_t *chrFilter, const int16_t **chrUSrc, \
1332 const int16_t **chrVSrc, int chrFilterSize, \
1333 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1336 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1337 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1338 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1340 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1341 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1342 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1343 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1344 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1345 int yalpha, int uvalpha, int y) \
1347 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1348 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1351 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1352 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1353 const int16_t *abuf0, uint8_t *dest, int dstW, \
1354 int uvalpha, int y) \
1356 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1357 dstW, uvalpha, y, fmt, hasAlpha); \
1361 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1362 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1364 #if CONFIG_SWSCALE_ALPHA
1365 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1366 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1368 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1369 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1371 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1372 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1373 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1374 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1375 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1376 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1377 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1378 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1380 static av_always_inline void
1381 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1382 const int16_t **lumSrc, int lumFilterSize,
1383 const int16_t *chrFilter, const int16_t **chrUSrc,
1384 const int16_t **chrVSrc, int chrFilterSize,
1385 const int16_t **alpSrc, uint8_t *dest,
1386 int dstW, int y, enum PixelFormat target, int hasAlpha)
1389 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1391 for (i = 0; i < dstW; i++) {
1394 int U = (1<<9)-(128 << 19);
1395 int V = (1<<9)-(128 << 19);
1399 for (j = 0; j < lumFilterSize; j++) {
1400 Y += lumSrc[j][i] * lumFilter[j];
1402 for (j = 0; j < chrFilterSize; j++) {
1403 U += chrUSrc[j][i] * chrFilter[j];
1404 V += chrVSrc[j][i] * chrFilter[j];
1411 for (j = 0; j < lumFilterSize; j++) {
1412 A += alpSrc[j][i] * lumFilter[j];
1416 A = av_clip_uint8(A);
1418 Y -= c->yuv2rgb_y_offset;
1419 Y *= c->yuv2rgb_y_coeff;
1421 R = Y + V*c->yuv2rgb_v2r_coeff;
1422 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1423 B = Y + U*c->yuv2rgb_u2b_coeff;
1424 if ((R | G | B) & 0xC0000000) {
1425 R = av_clip_uintp2(R, 30);
1426 G = av_clip_uintp2(G, 30);
1427 B = av_clip_uintp2(B, 30);
1432 dest[0] = hasAlpha ? A : 255;
1446 dest[3] = hasAlpha ? A : 255;
1449 dest[0] = hasAlpha ? A : 255;
1463 dest[3] = hasAlpha ? A : 255;
1471 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1472 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1473 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1474 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1476 #if CONFIG_SWSCALE_ALPHA
1477 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1478 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1479 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1480 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1482 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1483 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1484 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1485 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1487 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1488 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1490 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1491 int width, int height,
1495 uint8_t *ptr = plane + stride*y;
1496 for (i=0; i<height; i++) {
1497 memset(ptr, val, width);
1502 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1504 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1505 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1507 static av_always_inline void
1508 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1509 enum PixelFormat origin)
1512 for (i = 0; i < width; i++) {
1513 unsigned int r_b = input_pixel(&src[i*3+0]);
1514 unsigned int g = input_pixel(&src[i*3+1]);
1515 unsigned int b_r = input_pixel(&src[i*3+2]);
1517 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1521 static av_always_inline void
1522 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1523 const uint16_t *src1, const uint16_t *src2,
1524 int width, enum PixelFormat origin)
1528 for (i = 0; i < width; i++) {
1529 int r_b = input_pixel(&src1[i*3+0]);
1530 int g = input_pixel(&src1[i*3+1]);
1531 int b_r = input_pixel(&src1[i*3+2]);
1533 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1534 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1538 static av_always_inline void
1539 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1540 const uint16_t *src1, const uint16_t *src2,
1541 int width, enum PixelFormat origin)
1545 for (i = 0; i < width; i++) {
1546 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1547 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1548 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1550 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1551 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1559 #define rgb48funcs(pattern, BE_LE, origin) \
1560 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1561 int width, uint32_t *unused) \
1563 const uint16_t *src = (const uint16_t *) _src; \
1564 uint16_t *dst = (uint16_t *) _dst; \
1565 rgb48ToY_c_template(dst, src, width, origin); \
1568 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1569 const uint8_t *_src1, const uint8_t *_src2, \
1570 int width, uint32_t *unused) \
1572 const uint16_t *src1 = (const uint16_t *) _src1, \
1573 *src2 = (const uint16_t *) _src2; \
1574 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1575 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1578 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1579 const uint8_t *_src1, const uint8_t *_src2, \
1580 int width, uint32_t *unused) \
1582 const uint16_t *src1 = (const uint16_t *) _src1, \
1583 *src2 = (const uint16_t *) _src2; \
1584 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1585 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1588 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1589 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1590 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1591 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1593 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1594 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1595 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1597 static av_always_inline void
1598 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1599 int width, enum PixelFormat origin,
1600 int shr, int shg, int shb, int shp,
1601 int maskr, int maskg, int maskb,
1602 int rsh, int gsh, int bsh, int S)
1604 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1605 rnd = (32<<((S)-1)) + (1<<(S-7));
1608 for (i = 0; i < width; i++) {
1609 int px = input_pixel(i) >> shp;
1610 int b = (px & maskb) >> shb;
1611 int g = (px & maskg) >> shg;
1612 int r = (px & maskr) >> shr;
1614 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1618 static av_always_inline void
1619 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1620 const uint8_t *src, int width,
1621 enum PixelFormat origin,
1622 int shr, int shg, int shb, int shp,
1623 int maskr, int maskg, int maskb,
1624 int rsh, int gsh, int bsh, int S)
1626 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1627 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1628 rnd = (256<<((S)-1)) + (1<<(S-7));
1631 for (i = 0; i < width; i++) {
1632 int px = input_pixel(i) >> shp;
1633 int b = (px & maskb) >> shb;
1634 int g = (px & maskg) >> shg;
1635 int r = (px & maskr) >> shr;
1637 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1638 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1642 static av_always_inline void
1643 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1644 const uint8_t *src, int width,
1645 enum PixelFormat origin,
1646 int shr, int shg, int shb, int shp,
1647 int maskr, int maskg, int maskb,
1648 int rsh, int gsh, int bsh, int S)
1650 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1651 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1652 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1655 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1656 for (i = 0; i < width; i++) {
1657 int px0 = input_pixel(2 * i + 0) >> shp;
1658 int px1 = input_pixel(2 * i + 1) >> shp;
1659 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1660 int rb = px0 + px1 - g;
1662 b = (rb & maskb) >> shb;
1663 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1664 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1667 g = (g & maskg) >> shg;
1669 r = (rb & maskr) >> shr;
1671 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1672 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1678 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1679 maskg, maskb, rsh, gsh, bsh, S) \
1680 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1681 int width, uint32_t *unused) \
1683 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1684 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1687 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1688 const uint8_t *src, const uint8_t *dummy, \
1689 int width, uint32_t *unused) \
1691 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1692 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1695 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1696 const uint8_t *src, const uint8_t *dummy, \
1697 int width, uint32_t *unused) \
1699 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1700 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1703 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1704 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1705 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1706 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1707 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1708 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1709 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1710 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1711 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1712 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1713 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1714 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1716 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1719 for (i=0; i<width; i++) {
1720 dst[i]= src[4*i]<<6;
1724 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1727 for (i=0; i<width; i++) {
1728 dst[i]= src[4*i+3]<<6;
1732 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1735 for (i=0; i<width; i++) {
1738 dst[i]= (pal[d] >> 24)<<6;
1742 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1745 for (i=0; i<width; i++) {
1748 dst[i]= (pal[d] & 0xFF)<<6;
1752 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1753 const uint8_t *src1, const uint8_t *src2,
1754 int width, uint32_t *pal)
1757 assert(src1 == src2);
1758 for (i=0; i<width; i++) {
1759 int p= pal[src1[i]];
1761 dstU[i]= (uint8_t)(p>> 8)<<6;
1762 dstV[i]= (uint8_t)(p>>16)<<6;
1766 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1769 for (i=0; i<width/8; i++) {
1772 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1776 for(j=0; j<(width&7); j++)
1777 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1781 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1784 for (i=0; i<width/8; i++) {
1787 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1791 for(j=0; j<(width&7); j++)
1792 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1796 //FIXME yuy2* can read up to 7 samples too much
1798 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1802 for (i=0; i<width; i++)
1806 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1807 const uint8_t *src2, int width, uint32_t *unused)
1810 for (i=0; i<width; i++) {
1811 dstU[i]= src1[4*i + 1];
1812 dstV[i]= src1[4*i + 3];
1814 assert(src1 == src2);
1817 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1820 const uint16_t *src = (const uint16_t *) _src;
1821 uint16_t *dst = (uint16_t *) _dst;
1822 for (i=0; i<width; i++) {
1823 dst[i] = av_bswap16(src[i]);
1827 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1828 const uint8_t *_src2, int width, uint32_t *unused)
1831 const uint16_t *src1 = (const uint16_t *) _src1,
1832 *src2 = (const uint16_t *) _src2;
1833 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1834 for (i=0; i<width; i++) {
1835 dstU[i] = av_bswap16(src1[i]);
1836 dstV[i] = av_bswap16(src2[i]);
1840 /* This is almost identical to the previous, end exists only because
1841 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1842 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1846 for (i=0; i<width; i++)
1850 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1851 const uint8_t *src2, int width, uint32_t *unused)
1854 for (i=0; i<width; i++) {
1855 dstU[i]= src1[4*i + 0];
1856 dstV[i]= src1[4*i + 2];
1858 assert(src1 == src2);
1861 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1862 const uint8_t *src, int width)
1865 for (i = 0; i < width; i++) {
1866 dst1[i] = src[2*i+0];
1867 dst2[i] = src[2*i+1];
1871 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1872 const uint8_t *src1, const uint8_t *src2,
1873 int width, uint32_t *unused)
1875 nvXXtoUV_c(dstU, dstV, src1, width);
1878 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1879 const uint8_t *src1, const uint8_t *src2,
1880 int width, uint32_t *unused)
1882 nvXXtoUV_c(dstV, dstU, src1, width);
1885 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1887 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1888 int width, uint32_t *unused)
1891 for (i=0; i<width; i++) {
1896 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1900 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1901 const uint8_t *src2, int width, uint32_t *unused)
1904 for (i=0; i<width; i++) {
1905 int b= src1[3*i + 0];
1906 int g= src1[3*i + 1];
1907 int r= src1[3*i + 2];
1909 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1910 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1912 assert(src1 == src2);
1915 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1916 const uint8_t *src2, int width, uint32_t *unused)
1919 for (i=0; i<width; i++) {
1920 int b= src1[6*i + 0] + src1[6*i + 3];
1921 int g= src1[6*i + 1] + src1[6*i + 4];
1922 int r= src1[6*i + 2] + src1[6*i + 5];
1924 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1925 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1927 assert(src1 == src2);
1930 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1934 for (i=0; i<width; i++) {
1939 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1943 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1944 const uint8_t *src2, int width, uint32_t *unused)
1948 for (i=0; i<width; i++) {
1949 int r= src1[3*i + 0];
1950 int g= src1[3*i + 1];
1951 int b= src1[3*i + 2];
1953 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1954 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1958 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1959 const uint8_t *src2, int width, uint32_t *unused)
1963 for (i=0; i<width; i++) {
1964 int r= src1[6*i + 0] + src1[6*i + 3];
1965 int g= src1[6*i + 1] + src1[6*i + 4];
1966 int b= src1[6*i + 2] + src1[6*i + 5];
1968 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1969 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1973 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1974 const int16_t *filter,
1975 const int16_t *filterPos, int filterSize)
1978 int32_t *dst = (int32_t *) _dst;
1979 const uint16_t *src = (const uint16_t *) _src;
1980 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1981 int sh = (bits <= 7) ? 11 : (bits - 4);
1983 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
1986 for (i = 0; i < dstW; i++) {
1988 int srcPos = filterPos[i];
1991 for (j = 0; j < filterSize; j++) {
1992 val += src[srcPos + j] * filter[filterSize * i + j];
1994 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1995 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1999 // bilinear / bicubic scaling
2000 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2001 const int16_t *filter, const int16_t *filterPos,
2005 for (i=0; i<dstW; i++) {
2007 int srcPos= filterPos[i];
2009 for (j=0; j<filterSize; j++) {
2010 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2012 //filter += hFilterSize;
2013 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2018 static inline void hScale16N_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2019 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2023 for (i=0; i<dstW; i++) {
2024 int srcPos= filterPos[i];
2026 for (j=0; j<filterSize; j++) {
2027 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2029 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2033 static inline void hScale16NX_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2034 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2037 for (i=0; i<dstW; i++) {
2038 int srcPos= filterPos[i];
2040 for (j=0; j<filterSize; j++) {
2041 val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
2043 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2047 //FIXME all pal and rgb srcFormats could do this convertion as well
2048 //FIXME all scalers more complex than bilinear could do half of this transform
2049 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2052 for (i = 0; i < width; i++) {
2053 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2054 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2057 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2060 for (i = 0; i < width; i++) {
2061 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2062 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2065 static void lumRangeToJpeg_c(int16_t *dst, int width)
2068 for (i = 0; i < width; i++)
2069 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2071 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2074 for (i = 0; i < width; i++)
2075 dst[i] = (dst[i]*14071 + 33561947)>>14;
2078 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2081 int32_t *dstU = (int32_t *) _dstU;
2082 int32_t *dstV = (int32_t *) _dstV;
2083 for (i = 0; i < width; i++) {
2084 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2085 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2088 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2091 int32_t *dstU = (int32_t *) _dstU;
2092 int32_t *dstV = (int32_t *) _dstV;
2093 for (i = 0; i < width; i++) {
2094 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2095 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2098 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2101 int32_t *dst = (int32_t *) _dst;
2102 for (i = 0; i < width; i++)
2103 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2105 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2108 int32_t *dst = (int32_t *) _dst;
2109 for (i = 0; i < width; i++)
2110 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2113 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2114 const uint8_t *src, int srcW, int xInc)
2117 unsigned int xpos=0;
2118 for (i=0;i<dstWidth;i++) {
2119 register unsigned int xx=xpos>>16;
2120 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2121 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2124 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2125 dst[i] = src[srcW-1]*128;
2128 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
2131 uint8_t *dst = (uint8_t *) _dst;
2132 for (i = len - 1; i >= 0; i--) {
2133 dst[i * 2] = dst[i * 2 + 1] = src[i];
2137 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
2140 for (i = 0; i < len; i++) {
2141 dst[i] = src[i] >> 4;
2145 // *** horizontal scale Y line to temp buffer
2146 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2147 const uint8_t *src, int srcW, int xInc,
2148 const int16_t *hLumFilter,
2149 const int16_t *hLumFilterPos, int hLumFilterSize,
2150 uint8_t *formatConvBuffer,
2151 uint32_t *pal, int isAlpha)
2153 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2154 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2157 toYV12(formatConvBuffer, src, srcW, pal);
2158 src= formatConvBuffer;
2161 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16 && !isAnyRGB(c->srcFormat)) {
2162 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2163 src = formatConvBuffer;
2167 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2168 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2169 } else if (!c->hyscale_fast) {
2170 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2171 } else { // fast bilinear upscale / crap downscale
2172 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2176 convertRange(dst, dstWidth);
2178 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 15 && c->scalingBpp == 16) {
2179 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2183 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2184 int dstWidth, const uint8_t *src1,
2185 const uint8_t *src2, int srcW, int xInc)
2188 unsigned int xpos=0;
2189 for (i=0;i<dstWidth;i++) {
2190 register unsigned int xx=xpos>>16;
2191 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2192 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2193 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2196 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2197 dst1[i] = src1[srcW-1]*128;
2198 dst2[i] = src2[srcW-1]*128;
2202 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2203 const uint8_t *src1, const uint8_t *src2,
2204 int srcW, int xInc, const int16_t *hChrFilter,
2205 const int16_t *hChrFilterPos, int hChrFilterSize,
2206 uint8_t *formatConvBuffer, uint32_t *pal)
2209 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2210 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2211 src1= formatConvBuffer;
2215 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16 && !isAnyRGB(c->srcFormat)) {
2216 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2+78, 16));
2217 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2218 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2219 src1 = formatConvBuffer;
2224 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2225 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2226 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2227 } else if (!c->hcscale_fast) {
2228 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2229 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2230 } else { // fast bilinear upscale / crap downscale
2231 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2234 if (c->chrConvertRange)
2235 c->chrConvertRange(dst1, dst2, dstWidth);
2237 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 15 && c->scalingBpp == 16) {
2238 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2239 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2243 static av_always_inline void
2244 find_c_packed_planar_out_funcs(SwsContext *c,
2245 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2246 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2247 yuv2packedX_fn *yuv2packedX)
2249 enum PixelFormat dstFormat = c->dstFormat;
2251 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2252 *yuv2yuvX = yuv2nv12X_c;
2253 } else if (is16BPS(dstFormat)) {
2254 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2255 } else if (is9_OR_10BPS(dstFormat)) {
2256 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2257 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2259 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2262 *yuv2yuv1 = yuv2yuv1_c;
2263 *yuv2yuvX = yuv2yuvX_c;
2265 if(c->flags & SWS_FULL_CHR_H_INT) {
2266 switch (dstFormat) {
2269 *yuv2packedX = yuv2rgba32_full_X_c;
2271 #if CONFIG_SWSCALE_ALPHA
2273 *yuv2packedX = yuv2rgba32_full_X_c;
2275 #endif /* CONFIG_SWSCALE_ALPHA */
2277 *yuv2packedX = yuv2rgbx32_full_X_c;
2279 #endif /* !CONFIG_SMALL */
2283 *yuv2packedX = yuv2argb32_full_X_c;
2285 #if CONFIG_SWSCALE_ALPHA
2287 *yuv2packedX = yuv2argb32_full_X_c;
2289 #endif /* CONFIG_SWSCALE_ALPHA */
2291 *yuv2packedX = yuv2xrgb32_full_X_c;
2293 #endif /* !CONFIG_SMALL */
2297 *yuv2packedX = yuv2bgra32_full_X_c;
2299 #if CONFIG_SWSCALE_ALPHA
2301 *yuv2packedX = yuv2bgra32_full_X_c;
2303 #endif /* CONFIG_SWSCALE_ALPHA */
2305 *yuv2packedX = yuv2bgrx32_full_X_c;
2307 #endif /* !CONFIG_SMALL */
2311 *yuv2packedX = yuv2abgr32_full_X_c;
2313 #if CONFIG_SWSCALE_ALPHA
2315 *yuv2packedX = yuv2abgr32_full_X_c;
2317 #endif /* CONFIG_SWSCALE_ALPHA */
2319 *yuv2packedX = yuv2xbgr32_full_X_c;
2321 #endif /* !CONFIG_SMALL */
2324 *yuv2packedX = yuv2rgb24_full_X_c;
2327 *yuv2packedX = yuv2bgr24_full_X_c;
2334 switch (dstFormat) {
2335 case PIX_FMT_GRAY16BE:
2336 *yuv2packed1 = yuv2gray16BE_1_c;
2337 *yuv2packed2 = yuv2gray16BE_2_c;
2338 *yuv2packedX = yuv2gray16BE_X_c;
2340 case PIX_FMT_GRAY16LE:
2341 *yuv2packed1 = yuv2gray16LE_1_c;
2342 *yuv2packed2 = yuv2gray16LE_2_c;
2343 *yuv2packedX = yuv2gray16LE_X_c;
2345 case PIX_FMT_MONOWHITE:
2346 *yuv2packed1 = yuv2monowhite_1_c;
2347 *yuv2packed2 = yuv2monowhite_2_c;
2348 *yuv2packedX = yuv2monowhite_X_c;
2350 case PIX_FMT_MONOBLACK:
2351 *yuv2packed1 = yuv2monoblack_1_c;
2352 *yuv2packed2 = yuv2monoblack_2_c;
2353 *yuv2packedX = yuv2monoblack_X_c;
2355 case PIX_FMT_YUYV422:
2356 *yuv2packed1 = yuv2yuyv422_1_c;
2357 *yuv2packed2 = yuv2yuyv422_2_c;
2358 *yuv2packedX = yuv2yuyv422_X_c;
2360 case PIX_FMT_UYVY422:
2361 *yuv2packed1 = yuv2uyvy422_1_c;
2362 *yuv2packed2 = yuv2uyvy422_2_c;
2363 *yuv2packedX = yuv2uyvy422_X_c;
2365 case PIX_FMT_RGB48LE:
2366 *yuv2packed1 = yuv2rgb48le_1_c;
2367 *yuv2packed2 = yuv2rgb48le_2_c;
2368 *yuv2packedX = yuv2rgb48le_X_c;
2370 case PIX_FMT_RGB48BE:
2371 *yuv2packed1 = yuv2rgb48be_1_c;
2372 *yuv2packed2 = yuv2rgb48be_2_c;
2373 *yuv2packedX = yuv2rgb48be_X_c;
2375 case PIX_FMT_BGR48LE:
2376 *yuv2packed1 = yuv2bgr48le_1_c;
2377 *yuv2packed2 = yuv2bgr48le_2_c;
2378 *yuv2packedX = yuv2bgr48le_X_c;
2380 case PIX_FMT_BGR48BE:
2381 *yuv2packed1 = yuv2bgr48be_1_c;
2382 *yuv2packed2 = yuv2bgr48be_2_c;
2383 *yuv2packedX = yuv2bgr48be_X_c;
2388 *yuv2packed1 = yuv2rgb32_1_c;
2389 *yuv2packed2 = yuv2rgb32_2_c;
2390 *yuv2packedX = yuv2rgb32_X_c;
2392 #if CONFIG_SWSCALE_ALPHA
2394 *yuv2packed1 = yuv2rgba32_1_c;
2395 *yuv2packed2 = yuv2rgba32_2_c;
2396 *yuv2packedX = yuv2rgba32_X_c;
2398 #endif /* CONFIG_SWSCALE_ALPHA */
2400 *yuv2packed1 = yuv2rgbx32_1_c;
2401 *yuv2packed2 = yuv2rgbx32_2_c;
2402 *yuv2packedX = yuv2rgbx32_X_c;
2404 #endif /* !CONFIG_SMALL */
2406 case PIX_FMT_RGB32_1:
2407 case PIX_FMT_BGR32_1:
2409 *yuv2packed1 = yuv2rgb32_1_1_c;
2410 *yuv2packed2 = yuv2rgb32_1_2_c;
2411 *yuv2packedX = yuv2rgb32_1_X_c;
2413 #if CONFIG_SWSCALE_ALPHA
2415 *yuv2packed1 = yuv2rgba32_1_1_c;
2416 *yuv2packed2 = yuv2rgba32_1_2_c;
2417 *yuv2packedX = yuv2rgba32_1_X_c;
2419 #endif /* CONFIG_SWSCALE_ALPHA */
2421 *yuv2packed1 = yuv2rgbx32_1_1_c;
2422 *yuv2packed2 = yuv2rgbx32_1_2_c;
2423 *yuv2packedX = yuv2rgbx32_1_X_c;
2425 #endif /* !CONFIG_SMALL */
2428 *yuv2packed1 = yuv2rgb24_1_c;
2429 *yuv2packed2 = yuv2rgb24_2_c;
2430 *yuv2packedX = yuv2rgb24_X_c;
2433 *yuv2packed1 = yuv2bgr24_1_c;
2434 *yuv2packed2 = yuv2bgr24_2_c;
2435 *yuv2packedX = yuv2bgr24_X_c;
2437 case PIX_FMT_RGB565LE:
2438 case PIX_FMT_RGB565BE:
2439 case PIX_FMT_BGR565LE:
2440 case PIX_FMT_BGR565BE:
2441 *yuv2packed1 = yuv2rgb16_1_c;
2442 *yuv2packed2 = yuv2rgb16_2_c;
2443 *yuv2packedX = yuv2rgb16_X_c;
2445 case PIX_FMT_RGB555LE:
2446 case PIX_FMT_RGB555BE:
2447 case PIX_FMT_BGR555LE:
2448 case PIX_FMT_BGR555BE:
2449 *yuv2packed1 = yuv2rgb15_1_c;
2450 *yuv2packed2 = yuv2rgb15_2_c;
2451 *yuv2packedX = yuv2rgb15_X_c;
2453 case PIX_FMT_RGB444LE:
2454 case PIX_FMT_RGB444BE:
2455 case PIX_FMT_BGR444LE:
2456 case PIX_FMT_BGR444BE:
2457 *yuv2packed1 = yuv2rgb12_1_c;
2458 *yuv2packed2 = yuv2rgb12_2_c;
2459 *yuv2packedX = yuv2rgb12_X_c;
2463 *yuv2packed1 = yuv2rgb8_1_c;
2464 *yuv2packed2 = yuv2rgb8_2_c;
2465 *yuv2packedX = yuv2rgb8_X_c;
2469 *yuv2packed1 = yuv2rgb4_1_c;
2470 *yuv2packed2 = yuv2rgb4_2_c;
2471 *yuv2packedX = yuv2rgb4_X_c;
2473 case PIX_FMT_RGB4_BYTE:
2474 case PIX_FMT_BGR4_BYTE:
2475 *yuv2packed1 = yuv2rgb4b_1_c;
2476 *yuv2packed2 = yuv2rgb4b_2_c;
2477 *yuv2packedX = yuv2rgb4b_X_c;
2483 #define DEBUG_SWSCALE_BUFFERS 0
2484 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2486 static int swScale(SwsContext *c, const uint8_t* src[],
2487 int srcStride[], int srcSliceY,
2488 int srcSliceH, uint8_t* dst[], int dstStride[])
2490 /* load a few things into local vars to make the code more readable? and faster */
2491 const int srcW= c->srcW;
2492 const int dstW= c->dstW;
2493 const int dstH= c->dstH;
2494 const int chrDstW= c->chrDstW;
2495 const int chrSrcW= c->chrSrcW;
2496 const int lumXInc= c->lumXInc;
2497 const int chrXInc= c->chrXInc;
2498 const enum PixelFormat dstFormat= c->dstFormat;
2499 const int flags= c->flags;
2500 int16_t *vLumFilterPos= c->vLumFilterPos;
2501 int16_t *vChrFilterPos= c->vChrFilterPos;
2502 int16_t *hLumFilterPos= c->hLumFilterPos;
2503 int16_t *hChrFilterPos= c->hChrFilterPos;
2504 int16_t *vLumFilter= c->vLumFilter;
2505 int16_t *vChrFilter= c->vChrFilter;
2506 int16_t *hLumFilter= c->hLumFilter;
2507 int16_t *hChrFilter= c->hChrFilter;
2508 int32_t *lumMmxFilter= c->lumMmxFilter;
2509 int32_t *chrMmxFilter= c->chrMmxFilter;
2510 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2511 const int vLumFilterSize= c->vLumFilterSize;
2512 const int vChrFilterSize= c->vChrFilterSize;
2513 const int hLumFilterSize= c->hLumFilterSize;
2514 const int hChrFilterSize= c->hChrFilterSize;
2515 int16_t **lumPixBuf= c->lumPixBuf;
2516 int16_t **chrUPixBuf= c->chrUPixBuf;
2517 int16_t **chrVPixBuf= c->chrVPixBuf;
2518 int16_t **alpPixBuf= c->alpPixBuf;
2519 const int vLumBufSize= c->vLumBufSize;
2520 const int vChrBufSize= c->vChrBufSize;
2521 uint8_t *formatConvBuffer= c->formatConvBuffer;
2522 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2523 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2525 uint32_t *pal=c->pal_yuv;
2526 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2527 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2528 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2529 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2530 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2531 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2533 /* vars which will change and which we need to store back in the context */
2535 int lumBufIndex= c->lumBufIndex;
2536 int chrBufIndex= c->chrBufIndex;
2537 int lastInLumBuf= c->lastInLumBuf;
2538 int lastInChrBuf= c->lastInChrBuf;
2540 if (isPacked(c->srcFormat)) {
2548 srcStride[3]= srcStride[0];
2550 srcStride[1]<<= c->vChrDrop;
2551 srcStride[2]<<= c->vChrDrop;
2553 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2554 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2555 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2556 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2557 srcSliceY, srcSliceH, dstY, dstH);
2558 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2559 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2561 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2562 static int warnedAlready=0; //FIXME move this into the context perhaps
2563 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2564 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2565 " ->cannot do aligned memory accesses anymore\n");
2570 /* Note the user might start scaling the picture in the middle so this
2571 will not get executed. This is not really intended but works
2572 currently, so people might do it. */
2573 if (srcSliceY ==0) {
2583 for (;dstY < dstH; dstY++) {
2584 const int chrDstY= dstY>>c->chrDstVSubSample;
2585 uint8_t *dest[4] = {
2586 dst[0] + dstStride[0] * dstY,
2587 dst[1] + dstStride[1] * chrDstY,
2588 dst[2] + dstStride[2] * chrDstY,
2589 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2591 const uint8_t *lumDither= should_dither ? dithers[7][dstY &7] : flat64;
2592 const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64;
2594 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2595 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2596 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2597 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2598 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2599 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2602 //handle holes (FAST_BILINEAR & weird filters)
2603 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2604 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2605 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2606 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2608 DEBUG_BUFFERS("dstY: %d\n", dstY);
2609 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2610 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2611 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2612 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2614 // Do we have enough lines in this slice to output the dstY line
2615 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2617 if (!enough_lines) {
2618 lastLumSrcY = srcSliceY + srcSliceH - 1;
2619 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2620 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2621 lastLumSrcY, lastChrSrcY);
2624 //Do horizontal scaling
2625 while(lastInLumBuf < lastLumSrcY) {
2626 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2627 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2629 assert(lumBufIndex < 2*vLumBufSize);
2630 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2631 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2632 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2633 hLumFilter, hLumFilterPos, hLumFilterSize,
2636 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2637 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2638 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2642 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2643 lumBufIndex, lastInLumBuf);
2645 while(lastInChrBuf < lastChrSrcY) {
2646 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2647 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2649 assert(chrBufIndex < 2*vChrBufSize);
2650 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2651 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2652 //FIXME replace parameters through context struct (some at least)
2654 if (c->needs_hcscale)
2655 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2656 chrDstW, src1, src2, chrSrcW, chrXInc,
2657 hChrFilter, hChrFilterPos, hChrFilterSize,
2658 formatConvBuffer, pal);
2660 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2661 chrBufIndex, lastInChrBuf);
2663 //wrap buf index around to stay inside the ring buffer
2664 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2665 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2667 break; //we can't output a dstY line so let's try with the next slice
2670 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2672 if (dstY >= dstH-2) {
2673 // hmm looks like we can't use MMX here without overwriting this array's tail
2674 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2675 &yuv2packed1, &yuv2packed2,
2680 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2681 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2682 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2683 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2685 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2686 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2687 if ((dstY&chrSkipMask) || isGray(dstFormat))
2688 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2689 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2690 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2691 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2692 dest, dstW, chrDstW, lumDither, chrDither);
2693 } else { //General YV12
2694 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2695 lumSrcPtr, vLumFilterSize,
2696 vChrFilter + chrDstY * vChrFilterSize,
2697 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2698 alpSrcPtr, dest, dstW, chrDstW, lumDither, chrDither);
2701 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2702 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2703 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2704 int chrAlpha = vChrFilter[2 * dstY + 1];
2705 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2706 alpPixBuf ? *alpSrcPtr : NULL,
2707 dest[0], dstW, chrAlpha, dstY);
2708 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2709 int lumAlpha = vLumFilter[2 * dstY + 1];
2710 int chrAlpha = vChrFilter[2 * dstY + 1];
2712 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2714 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2715 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2716 alpPixBuf ? alpSrcPtr : NULL,
2717 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2718 } else { //general RGB
2719 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2720 lumSrcPtr, vLumFilterSize,
2721 vChrFilter + dstY * vChrFilterSize,
2722 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2723 alpSrcPtr, dest[0], dstW, dstY);
2729 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2730 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2733 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2734 __asm__ volatile("sfence":::"memory");
2738 /* store changed local vars back in the context */
2740 c->lumBufIndex= lumBufIndex;
2741 c->chrBufIndex= chrBufIndex;
2742 c->lastInLumBuf= lastInLumBuf;
2743 c->lastInChrBuf= lastInChrBuf;
2745 return dstY - lastDstY;
2748 static av_cold void sws_init_swScale_c(SwsContext *c)
2750 enum PixelFormat srcFormat = c->srcFormat;
2752 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2753 &c->yuv2packed1, &c->yuv2packed2,
2756 c->chrToYV12 = NULL;
2758 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2759 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2760 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2761 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2765 case PIX_FMT_BGR4_BYTE:
2766 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2767 case PIX_FMT_YUV444P9BE:
2768 case PIX_FMT_YUV420P9BE:
2769 case PIX_FMT_YUV444P10BE:
2770 case PIX_FMT_YUV422P10BE:
2771 case PIX_FMT_YUV420P10BE: c->hScale16= HAVE_BIGENDIAN ? hScale16N_c : hScale16NX_c; break;
2772 case PIX_FMT_YUV444P9LE:
2773 case PIX_FMT_YUV420P9LE:
2774 case PIX_FMT_YUV422P10LE:
2775 case PIX_FMT_YUV420P10LE:
2776 case PIX_FMT_YUV444P10LE: c->hScale16= HAVE_BIGENDIAN ? hScale16NX_c : hScale16N_c; break;
2778 case PIX_FMT_YUV420P16LE:
2779 case PIX_FMT_YUV422P16LE:
2780 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2782 case PIX_FMT_YUV420P16BE:
2783 case PIX_FMT_YUV422P16BE:
2784 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2787 if (c->chrSrcHSubSample) {
2789 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2790 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2791 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2792 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2793 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2794 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2795 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2796 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2797 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2798 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2799 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2800 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2801 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2802 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2803 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2804 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2805 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2806 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2810 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2811 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2812 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2813 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2814 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2815 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2816 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2817 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2818 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2819 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2820 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2821 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2822 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2823 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2824 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2825 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2826 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2827 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2831 c->lumToYV12 = NULL;
2832 c->alpToYV12 = NULL;
2833 switch (srcFormat) {
2835 case PIX_FMT_YUV420P16LE:
2836 case PIX_FMT_YUV422P16LE:
2837 case PIX_FMT_YUV444P16LE:
2838 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2840 case PIX_FMT_YUV420P16BE:
2841 case PIX_FMT_YUV422P16BE:
2842 case PIX_FMT_YUV444P16BE:
2843 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2845 case PIX_FMT_YUYV422 :
2846 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2847 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2848 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2849 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2850 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2851 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2852 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2853 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2854 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2855 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2856 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2857 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2861 case PIX_FMT_BGR4_BYTE:
2862 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2863 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2864 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2865 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2866 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2867 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2868 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2869 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2870 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2871 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2872 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2875 switch (srcFormat) {
2877 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2879 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2880 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2881 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2885 if((isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2886 || c->srcFormat == PIX_FMT_PAL8)
2887 c->hScale16= hScale16N_c;
2889 if (c->scalingBpp == 8) {
2890 c->hScale = hScale_c;
2891 if (c->flags & SWS_FAST_BILINEAR) {
2892 c->hyscale_fast = hyscale_fast_c;
2893 c->hcscale_fast = hcscale_fast_c;
2896 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2898 c->lumConvertRange = lumRangeFromJpeg_c;
2899 c->chrConvertRange = chrRangeFromJpeg_c;
2901 c->lumConvertRange = lumRangeToJpeg_c;
2902 c->chrConvertRange = chrRangeToJpeg_c;
2906 if(c->hScale16 == hScale16NX_c && !isAnyRGB(c->srcFormat)){
2907 c->chrToYV12 = bswap16UV_c;
2908 c->lumToYV12 = bswap16Y_c;
2911 c->hScale = hScale16_c;
2912 c->scale19To15Fw = scale19To15Fw_c;
2913 c->scale8To16Rv = scale8To16Rv_c;
2915 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2917 c->lumConvertRange = lumRangeFromJpeg16_c;
2918 c->chrConvertRange = chrRangeFromJpeg16_c;
2920 c->lumConvertRange = lumRangeToJpeg16_c;
2921 c->chrConvertRange = chrRangeToJpeg16_c;
2926 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2927 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2928 c->needs_hcscale = 1;
2931 SwsFunc ff_getSwsFunc(SwsContext *c)
2933 sws_init_swScale_c(c);
2936 ff_sws_init_swScale_mmx(c);
2938 ff_sws_init_swScale_altivec(c);