2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 static av_always_inline void
294 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
295 int lumFilterSize, const int16_t *chrFilter,
296 const int32_t **chrUSrc, const int32_t **chrVSrc,
297 int chrFilterSize, const int32_t **alpSrc,
298 uint16_t *dest[4], int dstW, int chrDstW,
299 int big_endian, int output_bits)
301 //FIXME Optimize (just quickly written not optimized..)
303 int dword= output_bits == 16;
304 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
305 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
306 int shift = 11 + 4*dword + 16 - output_bits - 1;
308 #define output_pixel(pos, val) \
310 if (output_bits == 16) { \
311 AV_WB16(pos, av_clip_uint16(val >> shift)); \
313 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
316 if (output_bits == 16) { \
317 AV_WL16(pos, av_clip_uint16(val >> shift)); \
319 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
322 for (i = 0; i < dstW; i++) {
323 int val = 1 << (26-output_bits + 4*dword - 1);
326 for (j = 0; j < lumFilterSize; j++)
327 val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
329 output_pixel(&yDest[i], val);
333 for (i = 0; i < chrDstW; i++) {
334 int u = 1 << (26-output_bits + 4*dword - 1);
335 int v = 1 << (26-output_bits + 4*dword - 1);
338 for (j = 0; j < chrFilterSize; j++) {
339 u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
340 v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
343 output_pixel(&uDest[i], u);
344 output_pixel(&vDest[i], v);
348 if (CONFIG_SWSCALE_ALPHA && aDest) {
349 for (i = 0; i < dstW; i++) {
350 int val = 1 << (26-output_bits + 4*dword - 1);
353 for (j = 0; j < lumFilterSize; j++)
354 val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
356 output_pixel(&aDest[i], val);
362 #define yuv2NBPS(bits, BE_LE, is_be) \
363 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
364 const int16_t **_lumSrc, int lumFilterSize, \
365 const int16_t *chrFilter, const int16_t **_chrUSrc, \
366 const int16_t **_chrVSrc, \
367 int chrFilterSize, const int16_t **_alpSrc, \
368 uint8_t *_dest[4], int dstW, int chrDstW) \
370 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
371 **chrUSrc = (const int32_t **) _chrUSrc, \
372 **chrVSrc = (const int32_t **) _chrVSrc, \
373 **alpSrc = (const int32_t **) _alpSrc; \
374 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
375 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
376 alpSrc, (uint16_t **) _dest, \
377 dstW, chrDstW, is_be, bits); \
386 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
387 const int16_t **lumSrc, int lumFilterSize,
388 const int16_t *chrFilter, const int16_t **chrUSrc,
389 const int16_t **chrVSrc,
390 int chrFilterSize, const int16_t **alpSrc,
391 uint8_t *dest[4], int dstW, int chrDstW)
393 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
394 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
396 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
398 //FIXME Optimize (just quickly written not optimized..)
399 for (i=0; i<dstW; i++) {
400 int val = lumDither[i & 7] << 12;
402 for (j=0; j<lumFilterSize; j++)
403 val += lumSrc[j][i] * lumFilter[j];
405 yDest[i]= av_clip_uint8(val>>19);
409 for (i=0; i<chrDstW; i++) {
410 int u = chrDither[i & 7] << 12;
411 int v = chrDither[(i + 3) & 7] << 12;
413 for (j=0; j<chrFilterSize; j++) {
414 u += chrUSrc[j][i] * chrFilter[j];
415 v += chrVSrc[j][i] * chrFilter[j];
418 uDest[i]= av_clip_uint8(u>>19);
419 vDest[i]= av_clip_uint8(v>>19);
422 if (CONFIG_SWSCALE_ALPHA && aDest)
423 for (i=0; i<dstW; i++) {
424 int val = lumDither[i & 7] << 12;
426 for (j=0; j<lumFilterSize; j++)
427 val += alpSrc[j][i] * lumFilter[j];
429 aDest[i]= av_clip_uint8(val>>19);
433 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
434 const int16_t *chrUSrc, const int16_t *chrVSrc,
435 const int16_t *alpSrc,
436 uint8_t *dest[4], int dstW, int chrDstW)
438 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
439 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
441 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
443 for (i=0; i<dstW; i++) {
444 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
445 yDest[i]= av_clip_uint8(val);
449 for (i=0; i<chrDstW; i++) {
450 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
451 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
452 uDest[i]= av_clip_uint8(u);
453 vDest[i]= av_clip_uint8(v);
456 if (CONFIG_SWSCALE_ALPHA && aDest)
457 for (i=0; i<dstW; i++) {
458 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
459 aDest[i]= av_clip_uint8(val);
463 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
464 const int16_t **lumSrc, int lumFilterSize,
465 const int16_t *chrFilter, const int16_t **chrUSrc,
466 const int16_t **chrVSrc, int chrFilterSize,
467 const int16_t **alpSrc, uint8_t *dest[4],
468 int dstW, int chrDstW)
470 uint8_t *yDest = dest[0], *uDest = dest[1];
471 enum PixelFormat dstFormat = c->dstFormat;
472 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
474 //FIXME Optimize (just quickly written not optimized..)
476 for (i=0; i<dstW; i++) {
477 int val = lumDither[i & 7] << 12;
479 for (j=0; j<lumFilterSize; j++)
480 val += lumSrc[j][i] * lumFilter[j];
482 yDest[i]= av_clip_uint8(val>>19);
488 if (dstFormat == PIX_FMT_NV12)
489 for (i=0; i<chrDstW; i++) {
490 int u = chrDither[i & 7] << 12;
491 int v = chrDither[(i + 3) & 7] << 12;
493 for (j=0; j<chrFilterSize; j++) {
494 u += chrUSrc[j][i] * chrFilter[j];
495 v += chrVSrc[j][i] * chrFilter[j];
498 uDest[2*i]= av_clip_uint8(u>>19);
499 uDest[2*i+1]= av_clip_uint8(v>>19);
502 for (i=0; i<chrDstW; i++) {
503 int u = chrDither[i & 7] << 12;
504 int v = chrDither[(i + 3) & 7] << 12;
506 for (j=0; j<chrFilterSize; j++) {
507 u += chrUSrc[j][i] * chrFilter[j];
508 v += chrVSrc[j][i] * chrFilter[j];
511 uDest[2*i]= av_clip_uint8(v>>19);
512 uDest[2*i+1]= av_clip_uint8(u>>19);
516 #define output_pixel(pos, val) \
517 if (target == PIX_FMT_GRAY16BE) { \
523 static av_always_inline void
524 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
525 const int32_t **lumSrc, int lumFilterSize,
526 const int16_t *chrFilter, const int32_t **chrUSrc,
527 const int32_t **chrVSrc, int chrFilterSize,
528 const int32_t **alpSrc, uint16_t *dest, int dstW,
529 int y, enum PixelFormat target)
533 for (i = 0; i < (dstW >> 1); i++) {
538 for (j = 0; j < lumFilterSize; j++) {
539 Y1 += lumSrc[j][i * 2] * lumFilter[j];
540 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
544 if ((Y1 | Y2) & 0x10000) {
545 Y1 = av_clip_uint16(Y1);
546 Y2 = av_clip_uint16(Y2);
548 output_pixel(&dest[i * 2 + 0], Y1);
549 output_pixel(&dest[i * 2 + 1], Y2);
553 static av_always_inline void
554 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
555 const int32_t *ubuf[2], const int32_t *vbuf[2],
556 const int32_t *abuf[2], uint16_t *dest, int dstW,
557 int yalpha, int uvalpha, int y,
558 enum PixelFormat target)
560 int yalpha1 = 4095 - yalpha;
562 const int32_t *buf0 = buf[0], *buf1 = buf[1];
564 for (i = 0; i < (dstW >> 1); i++) {
565 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
566 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
568 output_pixel(&dest[i * 2 + 0], Y1);
569 output_pixel(&dest[i * 2 + 1], Y2);
573 static av_always_inline void
574 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
575 const int32_t *ubuf[2], const int32_t *vbuf[2],
576 const int32_t *abuf0, uint16_t *dest, int dstW,
577 int uvalpha, int y, enum PixelFormat target)
581 for (i = 0; i < (dstW >> 1); i++) {
582 int Y1 = (buf0[i * 2 ]+4)>>3;
583 int Y2 = (buf0[i * 2 + 1]+4)>>3;
585 output_pixel(&dest[i * 2 + 0], Y1);
586 output_pixel(&dest[i * 2 + 1], Y2);
592 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
593 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
594 const int16_t **_lumSrc, int lumFilterSize, \
595 const int16_t *chrFilter, const int16_t **_chrUSrc, \
596 const int16_t **_chrVSrc, int chrFilterSize, \
597 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
600 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
601 **chrUSrc = (const int32_t **) _chrUSrc, \
602 **chrVSrc = (const int32_t **) _chrVSrc, \
603 **alpSrc = (const int32_t **) _alpSrc; \
604 uint16_t *dest = (uint16_t *) _dest; \
605 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
606 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
607 alpSrc, dest, dstW, y, fmt); \
610 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
611 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
612 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
613 int yalpha, int uvalpha, int y) \
615 const int32_t **buf = (const int32_t **) _buf, \
616 **ubuf = (const int32_t **) _ubuf, \
617 **vbuf = (const int32_t **) _vbuf, \
618 **abuf = (const int32_t **) _abuf; \
619 uint16_t *dest = (uint16_t *) _dest; \
620 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
621 dest, dstW, yalpha, uvalpha, y, fmt); \
624 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
625 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
626 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
627 int uvalpha, int y) \
629 const int32_t *buf0 = (const int32_t *) _buf0, \
630 **ubuf = (const int32_t **) _ubuf, \
631 **vbuf = (const int32_t **) _vbuf, \
632 *abuf0 = (const int32_t *) _abuf0; \
633 uint16_t *dest = (uint16_t *) _dest; \
634 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
635 dstW, uvalpha, y, fmt); \
638 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
639 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
641 #define output_pixel(pos, acc) \
642 if (target == PIX_FMT_MONOBLACK) { \
648 static av_always_inline void
649 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
650 const int16_t **lumSrc, int lumFilterSize,
651 const int16_t *chrFilter, const int16_t **chrUSrc,
652 const int16_t **chrVSrc, int chrFilterSize,
653 const int16_t **alpSrc, uint8_t *dest, int dstW,
654 int y, enum PixelFormat target)
656 const uint8_t * const d128=dither_8x8_220[y&7];
657 uint8_t *g = c->table_gU[128] + c->table_gV[128];
661 for (i = 0; i < dstW - 1; i += 2) {
666 for (j = 0; j < lumFilterSize; j++) {
667 Y1 += lumSrc[j][i] * lumFilter[j];
668 Y2 += lumSrc[j][i+1] * lumFilter[j];
672 if ((Y1 | Y2) & 0x100) {
673 Y1 = av_clip_uint8(Y1);
674 Y2 = av_clip_uint8(Y2);
676 acc += acc + g[Y1 + d128[(i + 0) & 7]];
677 acc += acc + g[Y2 + d128[(i + 1) & 7]];
679 output_pixel(*dest++, acc);
684 static av_always_inline void
685 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
686 const int16_t *ubuf[2], const int16_t *vbuf[2],
687 const int16_t *abuf[2], uint8_t *dest, int dstW,
688 int yalpha, int uvalpha, int y,
689 enum PixelFormat target)
691 const int16_t *buf0 = buf[0], *buf1 = buf[1];
692 const uint8_t * const d128 = dither_8x8_220[y & 7];
693 uint8_t *g = c->table_gU[128] + c->table_gV[128];
694 int yalpha1 = 4095 - yalpha;
697 for (i = 0; i < dstW - 7; i += 8) {
698 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
699 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
700 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
701 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
702 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
703 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
704 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
705 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
706 output_pixel(*dest++, acc);
710 static av_always_inline void
711 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
712 const int16_t *ubuf[2], const int16_t *vbuf[2],
713 const int16_t *abuf0, uint8_t *dest, int dstW,
714 int uvalpha, int y, enum PixelFormat target)
716 const uint8_t * const d128 = dither_8x8_220[y & 7];
717 uint8_t *g = c->table_gU[128] + c->table_gV[128];
720 for (i = 0; i < dstW - 7; i += 8) {
721 int acc = g[(buf0[i ] >> 7) + d128[0]];
722 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
723 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
724 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
725 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
726 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
727 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
728 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
729 output_pixel(*dest++, acc);
735 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
736 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
737 const int16_t **lumSrc, int lumFilterSize, \
738 const int16_t *chrFilter, const int16_t **chrUSrc, \
739 const int16_t **chrVSrc, int chrFilterSize, \
740 const int16_t **alpSrc, uint8_t *dest, int dstW, \
743 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
744 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
745 alpSrc, dest, dstW, y, fmt); \
748 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
749 const int16_t *ubuf[2], const int16_t *vbuf[2], \
750 const int16_t *abuf[2], uint8_t *dest, int dstW, \
751 int yalpha, int uvalpha, int y) \
753 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
754 dest, dstW, yalpha, uvalpha, y, fmt); \
757 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
758 const int16_t *ubuf[2], const int16_t *vbuf[2], \
759 const int16_t *abuf0, uint8_t *dest, int dstW, \
760 int uvalpha, int y) \
762 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
763 abuf0, dest, dstW, uvalpha, \
767 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
768 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
770 #define output_pixels(pos, Y1, U, Y2, V) \
771 if (target == PIX_FMT_YUYV422) { \
772 dest[pos + 0] = Y1; \
774 dest[pos + 2] = Y2; \
778 dest[pos + 1] = Y1; \
780 dest[pos + 3] = Y2; \
783 static av_always_inline void
784 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
785 const int16_t **lumSrc, int lumFilterSize,
786 const int16_t *chrFilter, const int16_t **chrUSrc,
787 const int16_t **chrVSrc, int chrFilterSize,
788 const int16_t **alpSrc, uint8_t *dest, int dstW,
789 int y, enum PixelFormat target)
793 for (i = 0; i < (dstW >> 1); i++) {
800 for (j = 0; j < lumFilterSize; j++) {
801 Y1 += lumSrc[j][i * 2] * lumFilter[j];
802 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
804 for (j = 0; j < chrFilterSize; j++) {
805 U += chrUSrc[j][i] * chrFilter[j];
806 V += chrVSrc[j][i] * chrFilter[j];
812 if ((Y1 | Y2 | U | V) & 0x100) {
813 Y1 = av_clip_uint8(Y1);
814 Y2 = av_clip_uint8(Y2);
815 U = av_clip_uint8(U);
816 V = av_clip_uint8(V);
818 output_pixels(4*i, Y1, U, Y2, V);
822 static av_always_inline void
823 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
824 const int16_t *ubuf[2], const int16_t *vbuf[2],
825 const int16_t *abuf[2], uint8_t *dest, int dstW,
826 int yalpha, int uvalpha, int y,
827 enum PixelFormat target)
829 const int16_t *buf0 = buf[0], *buf1 = buf[1],
830 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
831 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
832 int yalpha1 = 4095 - yalpha;
833 int uvalpha1 = 4095 - uvalpha;
836 for (i = 0; i < (dstW >> 1); i++) {
837 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
838 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
839 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
840 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
842 output_pixels(i * 4, Y1, U, Y2, V);
846 static av_always_inline void
847 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
848 const int16_t *ubuf[2], const int16_t *vbuf[2],
849 const int16_t *abuf0, uint8_t *dest, int dstW,
850 int uvalpha, int y, enum PixelFormat target)
852 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
853 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
856 if (uvalpha < 2048) {
857 for (i = 0; i < (dstW >> 1); i++) {
858 int Y1 = buf0[i * 2] >> 7;
859 int Y2 = buf0[i * 2 + 1] >> 7;
860 int U = ubuf1[i] >> 7;
861 int V = vbuf1[i] >> 7;
863 output_pixels(i * 4, Y1, U, Y2, V);
866 for (i = 0; i < (dstW >> 1); i++) {
867 int Y1 = buf0[i * 2] >> 7;
868 int Y2 = buf0[i * 2 + 1] >> 7;
869 int U = (ubuf0[i] + ubuf1[i]) >> 8;
870 int V = (vbuf0[i] + vbuf1[i]) >> 8;
872 output_pixels(i * 4, Y1, U, Y2, V);
879 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
880 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
882 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
883 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
884 #define output_pixel(pos, val) \
885 if (isBE(target)) { \
891 static av_always_inline void
892 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
893 const int32_t **lumSrc, int lumFilterSize,
894 const int16_t *chrFilter, const int32_t **chrUSrc,
895 const int32_t **chrVSrc, int chrFilterSize,
896 const int32_t **alpSrc, uint16_t *dest, int dstW,
897 int y, enum PixelFormat target)
901 for (i = 0; i < (dstW >> 1); i++) {
905 int U = -128 << 23; // 19
909 for (j = 0; j < lumFilterSize; j++) {
910 Y1 += lumSrc[j][i * 2] * lumFilter[j];
911 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
913 for (j = 0; j < chrFilterSize; j++) {
914 U += chrUSrc[j][i] * chrFilter[j];
915 V += chrVSrc[j][i] * chrFilter[j];
918 // 8bit: 12+15=27; 16-bit: 12+19=31
924 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
925 Y1 -= c->yuv2rgb_y_offset;
926 Y2 -= c->yuv2rgb_y_offset;
927 Y1 *= c->yuv2rgb_y_coeff;
928 Y2 *= c->yuv2rgb_y_coeff;
931 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
933 R = V * c->yuv2rgb_v2r_coeff;
934 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
935 B = U * c->yuv2rgb_u2b_coeff;
937 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
938 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
939 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
940 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
941 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
942 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
943 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
948 static av_always_inline void
949 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
950 const int32_t *ubuf[2], const int32_t *vbuf[2],
951 const int32_t *abuf[2], uint16_t *dest, int dstW,
952 int yalpha, int uvalpha, int y,
953 enum PixelFormat target)
955 const int32_t *buf0 = buf[0], *buf1 = buf[1],
956 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
957 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
958 int yalpha1 = 4095 - yalpha;
959 int uvalpha1 = 4095 - uvalpha;
962 for (i = 0; i < (dstW >> 1); i++) {
963 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
964 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
965 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
966 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
969 Y1 -= c->yuv2rgb_y_offset;
970 Y2 -= c->yuv2rgb_y_offset;
971 Y1 *= c->yuv2rgb_y_coeff;
972 Y2 *= c->yuv2rgb_y_coeff;
976 R = V * c->yuv2rgb_v2r_coeff;
977 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
978 B = U * c->yuv2rgb_u2b_coeff;
980 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
981 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
982 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
983 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
984 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
985 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
990 static av_always_inline void
991 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
992 const int32_t *ubuf[2], const int32_t *vbuf[2],
993 const int32_t *abuf0, uint16_t *dest, int dstW,
994 int uvalpha, int y, enum PixelFormat target)
996 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
997 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1000 if (uvalpha < 2048) {
1001 for (i = 0; i < (dstW >> 1); i++) {
1002 int Y1 = (buf0[i * 2] ) >> 2;
1003 int Y2 = (buf0[i * 2 + 1]) >> 2;
1004 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1005 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1008 Y1 -= c->yuv2rgb_y_offset;
1009 Y2 -= c->yuv2rgb_y_offset;
1010 Y1 *= c->yuv2rgb_y_coeff;
1011 Y2 *= c->yuv2rgb_y_coeff;
1015 R = V * c->yuv2rgb_v2r_coeff;
1016 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1017 B = U * c->yuv2rgb_u2b_coeff;
1019 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1020 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1021 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1022 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1023 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1024 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1028 for (i = 0; i < (dstW >> 1); i++) {
1029 int Y1 = (buf0[i * 2] ) >> 2;
1030 int Y2 = (buf0[i * 2 + 1]) >> 2;
1031 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
1032 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
1035 Y1 -= c->yuv2rgb_y_offset;
1036 Y2 -= c->yuv2rgb_y_offset;
1037 Y1 *= c->yuv2rgb_y_coeff;
1038 Y2 *= c->yuv2rgb_y_coeff;
1042 R = V * c->yuv2rgb_v2r_coeff;
1043 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1044 B = U * c->yuv2rgb_u2b_coeff;
1046 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1047 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1048 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1049 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1050 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1051 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1061 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1062 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1063 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1064 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1066 static av_always_inline void
1067 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1068 int U, int V, int A1, int A2,
1069 const void *_r, const void *_g, const void *_b, int y,
1070 enum PixelFormat target, int hasAlpha)
1072 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1073 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1074 uint32_t *dest = (uint32_t *) _dest;
1075 const uint32_t *r = (const uint32_t *) _r;
1076 const uint32_t *g = (const uint32_t *) _g;
1077 const uint32_t *b = (const uint32_t *) _b;
1080 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1082 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1083 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1086 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1088 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1089 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1091 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1092 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1095 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1096 uint8_t *dest = (uint8_t *) _dest;
1097 const uint8_t *r = (const uint8_t *) _r;
1098 const uint8_t *g = (const uint8_t *) _g;
1099 const uint8_t *b = (const uint8_t *) _b;
1101 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1102 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1104 dest[i * 6 + 0] = r_b[Y1];
1105 dest[i * 6 + 1] = g[Y1];
1106 dest[i * 6 + 2] = b_r[Y1];
1107 dest[i * 6 + 3] = r_b[Y2];
1108 dest[i * 6 + 4] = g[Y2];
1109 dest[i * 6 + 5] = b_r[Y2];
1112 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1113 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1114 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1115 uint16_t *dest = (uint16_t *) _dest;
1116 const uint16_t *r = (const uint16_t *) _r;
1117 const uint16_t *g = (const uint16_t *) _g;
1118 const uint16_t *b = (const uint16_t *) _b;
1119 int dr1, dg1, db1, dr2, dg2, db2;
1121 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1122 dr1 = dither_2x2_8[ y & 1 ][0];
1123 dg1 = dither_2x2_4[ y & 1 ][0];
1124 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1125 dr2 = dither_2x2_8[ y & 1 ][1];
1126 dg2 = dither_2x2_4[ y & 1 ][1];
1127 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1128 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1129 dr1 = dither_2x2_8[ y & 1 ][0];
1130 dg1 = dither_2x2_8[ y & 1 ][1];
1131 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1132 dr2 = dither_2x2_8[ y & 1 ][1];
1133 dg2 = dither_2x2_8[ y & 1 ][0];
1134 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1136 dr1 = dither_4x4_16[ y & 3 ][0];
1137 dg1 = dither_4x4_16[ y & 3 ][1];
1138 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1139 dr2 = dither_4x4_16[ y & 3 ][1];
1140 dg2 = dither_4x4_16[ y & 3 ][0];
1141 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1144 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1145 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1146 } else /* 8/4-bit */ {
1147 uint8_t *dest = (uint8_t *) _dest;
1148 const uint8_t *r = (const uint8_t *) _r;
1149 const uint8_t *g = (const uint8_t *) _g;
1150 const uint8_t *b = (const uint8_t *) _b;
1151 int dr1, dg1, db1, dr2, dg2, db2;
1153 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1154 const uint8_t * const d64 = dither_8x8_73[y & 7];
1155 const uint8_t * const d32 = dither_8x8_32[y & 7];
1156 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1157 db1 = d64[(i * 2 + 0) & 7];
1158 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1159 db2 = d64[(i * 2 + 1) & 7];
1161 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1162 const uint8_t * const d128 = dither_8x8_220[y & 7];
1163 dr1 = db1 = d128[(i * 2 + 0) & 7];
1164 dg1 = d64[(i * 2 + 0) & 7];
1165 dr2 = db2 = d128[(i * 2 + 1) & 7];
1166 dg2 = d64[(i * 2 + 1) & 7];
1169 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1170 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1171 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1173 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1174 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1179 static av_always_inline void
1180 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1181 const int16_t **lumSrc, int lumFilterSize,
1182 const int16_t *chrFilter, const int16_t **chrUSrc,
1183 const int16_t **chrVSrc, int chrFilterSize,
1184 const int16_t **alpSrc, uint8_t *dest, int dstW,
1185 int y, enum PixelFormat target, int hasAlpha)
1189 for (i = 0; i < (dstW >> 1); i++) {
1195 int av_unused A1, A2;
1196 const void *r, *g, *b;
1198 for (j = 0; j < lumFilterSize; j++) {
1199 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1200 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1202 for (j = 0; j < chrFilterSize; j++) {
1203 U += chrUSrc[j][i] * chrFilter[j];
1204 V += chrVSrc[j][i] * chrFilter[j];
1210 if ((Y1 | Y2 | U | V) & 0x100) {
1211 Y1 = av_clip_uint8(Y1);
1212 Y2 = av_clip_uint8(Y2);
1213 U = av_clip_uint8(U);
1214 V = av_clip_uint8(V);
1219 for (j = 0; j < lumFilterSize; j++) {
1220 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1221 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1225 if ((A1 | A2) & 0x100) {
1226 A1 = av_clip_uint8(A1);
1227 A2 = av_clip_uint8(A2);
1231 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1233 g = (c->table_gU[U] + c->table_gV[V]);
1236 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1237 r, g, b, y, target, hasAlpha);
1241 static av_always_inline void
1242 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1243 const int16_t *ubuf[2], const int16_t *vbuf[2],
1244 const int16_t *abuf[2], uint8_t *dest, int dstW,
1245 int yalpha, int uvalpha, int y,
1246 enum PixelFormat target, int hasAlpha)
1248 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1249 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1250 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1251 *abuf0 = hasAlpha ? abuf[0] : NULL,
1252 *abuf1 = hasAlpha ? abuf[1] : NULL;
1253 int yalpha1 = 4095 - yalpha;
1254 int uvalpha1 = 4095 - uvalpha;
1257 for (i = 0; i < (dstW >> 1); i++) {
1258 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1259 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1260 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1261 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1263 const void *r = c->table_rV[V],
1264 *g = (c->table_gU[U] + c->table_gV[V]),
1265 *b = c->table_bU[U];
1268 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1269 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1272 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1273 r, g, b, y, target, hasAlpha);
1277 static av_always_inline void
1278 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1279 const int16_t *ubuf[2], const int16_t *vbuf[2],
1280 const int16_t *abuf0, uint8_t *dest, int dstW,
1281 int uvalpha, int y, enum PixelFormat target,
1284 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1285 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1288 if (uvalpha < 2048) {
1289 for (i = 0; i < (dstW >> 1); i++) {
1290 int Y1 = buf0[i * 2] >> 7;
1291 int Y2 = buf0[i * 2 + 1] >> 7;
1292 int U = ubuf1[i] >> 7;
1293 int V = vbuf1[i] >> 7;
1295 const void *r = c->table_rV[V],
1296 *g = (c->table_gU[U] + c->table_gV[V]),
1297 *b = c->table_bU[U];
1300 A1 = abuf0[i * 2 ] >> 7;
1301 A2 = abuf0[i * 2 + 1] >> 7;
1304 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1305 r, g, b, y, target, hasAlpha);
1308 for (i = 0; i < (dstW >> 1); i++) {
1309 int Y1 = buf0[i * 2] >> 7;
1310 int Y2 = buf0[i * 2 + 1] >> 7;
1311 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1312 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1314 const void *r = c->table_rV[V],
1315 *g = (c->table_gU[U] + c->table_gV[V]),
1316 *b = c->table_bU[U];
1319 A1 = abuf0[i * 2 ] >> 7;
1320 A2 = abuf0[i * 2 + 1] >> 7;
1323 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1324 r, g, b, y, target, hasAlpha);
1329 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1330 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1331 const int16_t **lumSrc, int lumFilterSize, \
1332 const int16_t *chrFilter, const int16_t **chrUSrc, \
1333 const int16_t **chrVSrc, int chrFilterSize, \
1334 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1337 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1338 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1339 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1341 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1342 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1343 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1344 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1345 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1346 int yalpha, int uvalpha, int y) \
1348 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1349 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1352 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1353 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1354 const int16_t *abuf0, uint8_t *dest, int dstW, \
1355 int uvalpha, int y) \
1357 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1358 dstW, uvalpha, y, fmt, hasAlpha); \
1362 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1363 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1365 #if CONFIG_SWSCALE_ALPHA
1366 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1367 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1369 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1370 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1372 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1373 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1374 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1375 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1376 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1377 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1378 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1379 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1381 static av_always_inline void
1382 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1383 const int16_t **lumSrc, int lumFilterSize,
1384 const int16_t *chrFilter, const int16_t **chrUSrc,
1385 const int16_t **chrVSrc, int chrFilterSize,
1386 const int16_t **alpSrc, uint8_t *dest,
1387 int dstW, int y, enum PixelFormat target, int hasAlpha)
1390 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1392 for (i = 0; i < dstW; i++) {
1395 int U = (1<<9)-(128 << 19);
1396 int V = (1<<9)-(128 << 19);
1400 for (j = 0; j < lumFilterSize; j++) {
1401 Y += lumSrc[j][i] * lumFilter[j];
1403 for (j = 0; j < chrFilterSize; j++) {
1404 U += chrUSrc[j][i] * chrFilter[j];
1405 V += chrVSrc[j][i] * chrFilter[j];
1412 for (j = 0; j < lumFilterSize; j++) {
1413 A += alpSrc[j][i] * lumFilter[j];
1417 A = av_clip_uint8(A);
1419 Y -= c->yuv2rgb_y_offset;
1420 Y *= c->yuv2rgb_y_coeff;
1422 R = Y + V*c->yuv2rgb_v2r_coeff;
1423 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1424 B = Y + U*c->yuv2rgb_u2b_coeff;
1425 if ((R | G | B) & 0xC0000000) {
1426 R = av_clip_uintp2(R, 30);
1427 G = av_clip_uintp2(G, 30);
1428 B = av_clip_uintp2(B, 30);
1433 dest[0] = hasAlpha ? A : 255;
1447 dest[3] = hasAlpha ? A : 255;
1450 dest[0] = hasAlpha ? A : 255;
1464 dest[3] = hasAlpha ? A : 255;
1472 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1473 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1474 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1475 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1477 #if CONFIG_SWSCALE_ALPHA
1478 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1479 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1480 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1481 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1483 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1484 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1485 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1486 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1488 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1489 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1491 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1492 int width, int height,
1496 uint8_t *ptr = plane + stride*y;
1497 for (i=0; i<height; i++) {
1498 memset(ptr, val, width);
1503 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1505 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1506 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1508 static av_always_inline void
1509 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1510 enum PixelFormat origin)
1513 for (i = 0; i < width; i++) {
1514 unsigned int r_b = input_pixel(&src[i*3+0]);
1515 unsigned int g = input_pixel(&src[i*3+1]);
1516 unsigned int b_r = input_pixel(&src[i*3+2]);
1518 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1522 static av_always_inline void
1523 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1524 const uint16_t *src1, const uint16_t *src2,
1525 int width, enum PixelFormat origin)
1529 for (i = 0; i < width; i++) {
1530 int r_b = input_pixel(&src1[i*3+0]);
1531 int g = input_pixel(&src1[i*3+1]);
1532 int b_r = input_pixel(&src1[i*3+2]);
1534 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1535 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1539 static av_always_inline void
1540 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1541 const uint16_t *src1, const uint16_t *src2,
1542 int width, enum PixelFormat origin)
1546 for (i = 0; i < width; i++) {
1547 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1548 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1549 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1551 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1552 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1560 #define rgb48funcs(pattern, BE_LE, origin) \
1561 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1562 int width, uint32_t *unused) \
1564 const uint16_t *src = (const uint16_t *) _src; \
1565 uint16_t *dst = (uint16_t *) _dst; \
1566 rgb48ToY_c_template(dst, src, width, origin); \
1569 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1570 const uint8_t *_src1, const uint8_t *_src2, \
1571 int width, uint32_t *unused) \
1573 const uint16_t *src1 = (const uint16_t *) _src1, \
1574 *src2 = (const uint16_t *) _src2; \
1575 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1576 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1579 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1580 const uint8_t *_src1, const uint8_t *_src2, \
1581 int width, uint32_t *unused) \
1583 const uint16_t *src1 = (const uint16_t *) _src1, \
1584 *src2 = (const uint16_t *) _src2; \
1585 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1586 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1589 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1590 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1591 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1592 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1594 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1595 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1596 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1598 static av_always_inline void
1599 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1600 int width, enum PixelFormat origin,
1601 int shr, int shg, int shb, int shp,
1602 int maskr, int maskg, int maskb,
1603 int rsh, int gsh, int bsh, int S)
1605 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1606 rnd = (32<<((S)-1)) + (1<<(S-7));
1609 for (i = 0; i < width; i++) {
1610 int px = input_pixel(i) >> shp;
1611 int b = (px & maskb) >> shb;
1612 int g = (px & maskg) >> shg;
1613 int r = (px & maskr) >> shr;
1615 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1619 static av_always_inline void
1620 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1621 const uint8_t *src, int width,
1622 enum PixelFormat origin,
1623 int shr, int shg, int shb, int shp,
1624 int maskr, int maskg, int maskb,
1625 int rsh, int gsh, int bsh, int S)
1627 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1628 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1629 rnd = (256<<((S)-1)) + (1<<(S-7));
1632 for (i = 0; i < width; i++) {
1633 int px = input_pixel(i) >> shp;
1634 int b = (px & maskb) >> shb;
1635 int g = (px & maskg) >> shg;
1636 int r = (px & maskr) >> shr;
1638 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1639 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1643 static av_always_inline void
1644 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1645 const uint8_t *src, int width,
1646 enum PixelFormat origin,
1647 int shr, int shg, int shb, int shp,
1648 int maskr, int maskg, int maskb,
1649 int rsh, int gsh, int bsh, int S)
1651 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1652 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1653 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1656 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1657 for (i = 0; i < width; i++) {
1658 int px0 = input_pixel(2 * i + 0) >> shp;
1659 int px1 = input_pixel(2 * i + 1) >> shp;
1660 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1661 int rb = px0 + px1 - g;
1663 b = (rb & maskb) >> shb;
1664 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1665 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1668 g = (g & maskg) >> shg;
1670 r = (rb & maskr) >> shr;
1672 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1673 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1679 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1680 maskg, maskb, rsh, gsh, bsh, S) \
1681 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1682 int width, uint32_t *unused) \
1684 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1685 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1688 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1689 const uint8_t *src, const uint8_t *dummy, \
1690 int width, uint32_t *unused) \
1692 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1693 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1696 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1697 const uint8_t *src, const uint8_t *dummy, \
1698 int width, uint32_t *unused) \
1700 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1701 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1704 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1705 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1706 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1707 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1708 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1709 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1710 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1711 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1712 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1713 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1714 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1715 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1717 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1720 for (i=0; i<width; i++) {
1721 dst[i]= src[4*i]<<6;
1725 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1728 for (i=0; i<width; i++) {
1729 dst[i]= src[4*i+3]<<6;
1733 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1736 for (i=0; i<width; i++) {
1739 dst[i]= (pal[d] >> 24)<<6;
1743 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1746 for (i=0; i<width; i++) {
1749 dst[i]= (pal[d] & 0xFF)<<6;
1753 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1754 const uint8_t *src1, const uint8_t *src2,
1755 int width, uint32_t *pal)
1758 assert(src1 == src2);
1759 for (i=0; i<width; i++) {
1760 int p= pal[src1[i]];
1762 dstU[i]= (uint8_t)(p>> 8)<<6;
1763 dstV[i]= (uint8_t)(p>>16)<<6;
1767 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1770 for (i=0; i<width/8; i++) {
1773 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1777 for(j=0; j<(width&7); j++)
1778 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1782 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1785 for (i=0; i<width/8; i++) {
1788 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1792 for(j=0; j<(width&7); j++)
1793 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1797 //FIXME yuy2* can read up to 7 samples too much
1799 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1803 for (i=0; i<width; i++)
1807 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1808 const uint8_t *src2, int width, uint32_t *unused)
1811 for (i=0; i<width; i++) {
1812 dstU[i]= src1[4*i + 1];
1813 dstV[i]= src1[4*i + 3];
1815 assert(src1 == src2);
1818 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1821 const uint16_t *src = (const uint16_t *) _src;
1822 uint16_t *dst = (uint16_t *) _dst;
1823 for (i=0; i<width; i++) {
1824 dst[i] = av_bswap16(src[i]);
1828 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1829 const uint8_t *_src2, int width, uint32_t *unused)
1832 const uint16_t *src1 = (const uint16_t *) _src1,
1833 *src2 = (const uint16_t *) _src2;
1834 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1835 for (i=0; i<width; i++) {
1836 dstU[i] = av_bswap16(src1[i]);
1837 dstV[i] = av_bswap16(src2[i]);
1841 /* This is almost identical to the previous, end exists only because
1842 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1843 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1847 for (i=0; i<width; i++)
1851 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1852 const uint8_t *src2, int width, uint32_t *unused)
1855 for (i=0; i<width; i++) {
1856 dstU[i]= src1[4*i + 0];
1857 dstV[i]= src1[4*i + 2];
1859 assert(src1 == src2);
1862 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1863 const uint8_t *src, int width)
1866 for (i = 0; i < width; i++) {
1867 dst1[i] = src[2*i+0];
1868 dst2[i] = src[2*i+1];
1872 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1873 const uint8_t *src1, const uint8_t *src2,
1874 int width, uint32_t *unused)
1876 nvXXtoUV_c(dstU, dstV, src1, width);
1879 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1880 const uint8_t *src1, const uint8_t *src2,
1881 int width, uint32_t *unused)
1883 nvXXtoUV_c(dstV, dstU, src1, width);
1886 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1888 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1889 int width, uint32_t *unused)
1892 for (i=0; i<width; i++) {
1897 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1901 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1902 const uint8_t *src2, int width, uint32_t *unused)
1905 for (i=0; i<width; i++) {
1906 int b= src1[3*i + 0];
1907 int g= src1[3*i + 1];
1908 int r= src1[3*i + 2];
1910 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1911 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1913 assert(src1 == src2);
1916 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1917 const uint8_t *src2, int width, uint32_t *unused)
1920 for (i=0; i<width; i++) {
1921 int b= src1[6*i + 0] + src1[6*i + 3];
1922 int g= src1[6*i + 1] + src1[6*i + 4];
1923 int r= src1[6*i + 2] + src1[6*i + 5];
1925 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1926 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1928 assert(src1 == src2);
1931 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1935 for (i=0; i<width; i++) {
1940 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1944 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1945 const uint8_t *src2, int width, uint32_t *unused)
1949 for (i=0; i<width; i++) {
1950 int r= src1[3*i + 0];
1951 int g= src1[3*i + 1];
1952 int b= src1[3*i + 2];
1954 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1955 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1959 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1960 const uint8_t *src2, int width, uint32_t *unused)
1964 for (i=0; i<width; i++) {
1965 int r= src1[6*i + 0] + src1[6*i + 3];
1966 int g= src1[6*i + 1] + src1[6*i + 4];
1967 int b= src1[6*i + 2] + src1[6*i + 5];
1969 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1970 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1974 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1975 const int16_t *filter,
1976 const int16_t *filterPos, int filterSize)
1979 int32_t *dst = (int32_t *) _dst;
1980 const uint16_t *src = (const uint16_t *) _src;
1981 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1982 int sh = (bits <= 7) ? 11 : (bits - 4);
1984 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
1987 for (i = 0; i < dstW; i++) {
1989 int srcPos = filterPos[i];
1992 for (j = 0; j < filterSize; j++) {
1993 val += src[srcPos + j] * filter[filterSize * i + j];
1995 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1996 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2000 // bilinear / bicubic scaling
2001 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2002 const int16_t *filter, const int16_t *filterPos,
2006 for (i=0; i<dstW; i++) {
2008 int srcPos= filterPos[i];
2010 for (j=0; j<filterSize; j++) {
2011 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2013 //filter += hFilterSize;
2014 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2019 static inline void hScale16N_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2020 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2024 for (i=0; i<dstW; i++) {
2025 int srcPos= filterPos[i];
2027 for (j=0; j<filterSize; j++) {
2028 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2030 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2034 static inline void hScale16NX_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2035 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2038 for (i=0; i<dstW; i++) {
2039 int srcPos= filterPos[i];
2041 for (j=0; j<filterSize; j++) {
2042 val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
2044 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2048 //FIXME all pal and rgb srcFormats could do this convertion as well
2049 //FIXME all scalers more complex than bilinear could do half of this transform
2050 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2053 for (i = 0; i < width; i++) {
2054 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2055 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2058 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2061 for (i = 0; i < width; i++) {
2062 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2063 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2066 static void lumRangeToJpeg_c(int16_t *dst, int width)
2069 for (i = 0; i < width; i++)
2070 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2072 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2075 for (i = 0; i < width; i++)
2076 dst[i] = (dst[i]*14071 + 33561947)>>14;
2079 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2082 int32_t *dstU = (int32_t *) _dstU;
2083 int32_t *dstV = (int32_t *) _dstV;
2084 for (i = 0; i < width; i++) {
2085 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2086 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2089 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2092 int32_t *dstU = (int32_t *) _dstU;
2093 int32_t *dstV = (int32_t *) _dstV;
2094 for (i = 0; i < width; i++) {
2095 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2096 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2099 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2102 int32_t *dst = (int32_t *) _dst;
2103 for (i = 0; i < width; i++)
2104 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2106 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2109 int32_t *dst = (int32_t *) _dst;
2110 for (i = 0; i < width; i++)
2111 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2114 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2115 const uint8_t *src, int srcW, int xInc)
2118 unsigned int xpos=0;
2119 for (i=0;i<dstWidth;i++) {
2120 register unsigned int xx=xpos>>16;
2121 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2122 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2125 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2126 dst[i] = src[srcW-1]*128;
2129 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
2132 uint8_t *dst = (uint8_t *) _dst;
2133 for (i = len - 1; i >= 0; i--) {
2134 dst[i * 2] = dst[i * 2 + 1] = src[i];
2138 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
2141 for (i = 0; i < len; i++) {
2142 dst[i] = src[i] >> 4;
2146 // *** horizontal scale Y line to temp buffer
2147 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2148 const uint8_t *src, int srcW, int xInc,
2149 const int16_t *hLumFilter,
2150 const int16_t *hLumFilterPos, int hLumFilterSize,
2151 uint8_t *formatConvBuffer,
2152 uint32_t *pal, int isAlpha)
2154 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2155 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2158 toYV12(formatConvBuffer, src, srcW, pal);
2159 src= formatConvBuffer;
2162 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16 && !isAnyRGB(c->srcFormat)) {
2163 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2164 src = formatConvBuffer;
2168 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2169 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2170 } else if (!c->hyscale_fast) {
2171 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2172 } else { // fast bilinear upscale / crap downscale
2173 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2177 convertRange(dst, dstWidth);
2179 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 15 && c->scalingBpp == 16) {
2180 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2184 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2185 int dstWidth, const uint8_t *src1,
2186 const uint8_t *src2, int srcW, int xInc)
2189 unsigned int xpos=0;
2190 for (i=0;i<dstWidth;i++) {
2191 register unsigned int xx=xpos>>16;
2192 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2193 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2194 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2197 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2198 dst1[i] = src1[srcW-1]*128;
2199 dst2[i] = src2[srcW-1]*128;
2203 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2204 const uint8_t *src1, const uint8_t *src2,
2205 int srcW, int xInc, const int16_t *hChrFilter,
2206 const int16_t *hChrFilterPos, int hChrFilterSize,
2207 uint8_t *formatConvBuffer, uint32_t *pal)
2210 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2211 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2212 src1= formatConvBuffer;
2216 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16 && !isAnyRGB(c->srcFormat)) {
2217 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2+78, 16));
2218 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2219 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2220 src1 = formatConvBuffer;
2225 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2226 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2227 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2228 } else if (!c->hcscale_fast) {
2229 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2230 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2231 } else { // fast bilinear upscale / crap downscale
2232 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2235 if (c->chrConvertRange)
2236 c->chrConvertRange(dst1, dst2, dstWidth);
2238 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 15 && c->scalingBpp == 16) {
2239 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2240 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2244 static av_always_inline void
2245 find_c_packed_planar_out_funcs(SwsContext *c,
2246 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2247 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2248 yuv2packedX_fn *yuv2packedX)
2250 enum PixelFormat dstFormat = c->dstFormat;
2252 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2253 *yuv2yuvX = yuv2nv12X_c;
2254 } else if (is16BPS(dstFormat)) {
2255 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2256 } else if (is9_OR_10BPS(dstFormat)) {
2257 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2258 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2260 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2263 *yuv2yuv1 = yuv2yuv1_c;
2264 *yuv2yuvX = yuv2yuvX_c;
2266 if(c->flags & SWS_FULL_CHR_H_INT) {
2267 switch (dstFormat) {
2270 *yuv2packedX = yuv2rgba32_full_X_c;
2272 #if CONFIG_SWSCALE_ALPHA
2274 *yuv2packedX = yuv2rgba32_full_X_c;
2276 #endif /* CONFIG_SWSCALE_ALPHA */
2278 *yuv2packedX = yuv2rgbx32_full_X_c;
2280 #endif /* !CONFIG_SMALL */
2284 *yuv2packedX = yuv2argb32_full_X_c;
2286 #if CONFIG_SWSCALE_ALPHA
2288 *yuv2packedX = yuv2argb32_full_X_c;
2290 #endif /* CONFIG_SWSCALE_ALPHA */
2292 *yuv2packedX = yuv2xrgb32_full_X_c;
2294 #endif /* !CONFIG_SMALL */
2298 *yuv2packedX = yuv2bgra32_full_X_c;
2300 #if CONFIG_SWSCALE_ALPHA
2302 *yuv2packedX = yuv2bgra32_full_X_c;
2304 #endif /* CONFIG_SWSCALE_ALPHA */
2306 *yuv2packedX = yuv2bgrx32_full_X_c;
2308 #endif /* !CONFIG_SMALL */
2312 *yuv2packedX = yuv2abgr32_full_X_c;
2314 #if CONFIG_SWSCALE_ALPHA
2316 *yuv2packedX = yuv2abgr32_full_X_c;
2318 #endif /* CONFIG_SWSCALE_ALPHA */
2320 *yuv2packedX = yuv2xbgr32_full_X_c;
2322 #endif /* !CONFIG_SMALL */
2325 *yuv2packedX = yuv2rgb24_full_X_c;
2328 *yuv2packedX = yuv2bgr24_full_X_c;
2335 switch (dstFormat) {
2336 case PIX_FMT_GRAY16BE:
2337 *yuv2packed1 = yuv2gray16BE_1_c;
2338 *yuv2packed2 = yuv2gray16BE_2_c;
2339 *yuv2packedX = yuv2gray16BE_X_c;
2341 case PIX_FMT_GRAY16LE:
2342 *yuv2packed1 = yuv2gray16LE_1_c;
2343 *yuv2packed2 = yuv2gray16LE_2_c;
2344 *yuv2packedX = yuv2gray16LE_X_c;
2346 case PIX_FMT_MONOWHITE:
2347 *yuv2packed1 = yuv2monowhite_1_c;
2348 *yuv2packed2 = yuv2monowhite_2_c;
2349 *yuv2packedX = yuv2monowhite_X_c;
2351 case PIX_FMT_MONOBLACK:
2352 *yuv2packed1 = yuv2monoblack_1_c;
2353 *yuv2packed2 = yuv2monoblack_2_c;
2354 *yuv2packedX = yuv2monoblack_X_c;
2356 case PIX_FMT_YUYV422:
2357 *yuv2packed1 = yuv2yuyv422_1_c;
2358 *yuv2packed2 = yuv2yuyv422_2_c;
2359 *yuv2packedX = yuv2yuyv422_X_c;
2361 case PIX_FMT_UYVY422:
2362 *yuv2packed1 = yuv2uyvy422_1_c;
2363 *yuv2packed2 = yuv2uyvy422_2_c;
2364 *yuv2packedX = yuv2uyvy422_X_c;
2366 case PIX_FMT_RGB48LE:
2367 *yuv2packed1 = yuv2rgb48le_1_c;
2368 *yuv2packed2 = yuv2rgb48le_2_c;
2369 *yuv2packedX = yuv2rgb48le_X_c;
2371 case PIX_FMT_RGB48BE:
2372 *yuv2packed1 = yuv2rgb48be_1_c;
2373 *yuv2packed2 = yuv2rgb48be_2_c;
2374 *yuv2packedX = yuv2rgb48be_X_c;
2376 case PIX_FMT_BGR48LE:
2377 *yuv2packed1 = yuv2bgr48le_1_c;
2378 *yuv2packed2 = yuv2bgr48le_2_c;
2379 *yuv2packedX = yuv2bgr48le_X_c;
2381 case PIX_FMT_BGR48BE:
2382 *yuv2packed1 = yuv2bgr48be_1_c;
2383 *yuv2packed2 = yuv2bgr48be_2_c;
2384 *yuv2packedX = yuv2bgr48be_X_c;
2389 *yuv2packed1 = yuv2rgb32_1_c;
2390 *yuv2packed2 = yuv2rgb32_2_c;
2391 *yuv2packedX = yuv2rgb32_X_c;
2393 #if CONFIG_SWSCALE_ALPHA
2395 *yuv2packed1 = yuv2rgba32_1_c;
2396 *yuv2packed2 = yuv2rgba32_2_c;
2397 *yuv2packedX = yuv2rgba32_X_c;
2399 #endif /* CONFIG_SWSCALE_ALPHA */
2401 *yuv2packed1 = yuv2rgbx32_1_c;
2402 *yuv2packed2 = yuv2rgbx32_2_c;
2403 *yuv2packedX = yuv2rgbx32_X_c;
2405 #endif /* !CONFIG_SMALL */
2407 case PIX_FMT_RGB32_1:
2408 case PIX_FMT_BGR32_1:
2410 *yuv2packed1 = yuv2rgb32_1_1_c;
2411 *yuv2packed2 = yuv2rgb32_1_2_c;
2412 *yuv2packedX = yuv2rgb32_1_X_c;
2414 #if CONFIG_SWSCALE_ALPHA
2416 *yuv2packed1 = yuv2rgba32_1_1_c;
2417 *yuv2packed2 = yuv2rgba32_1_2_c;
2418 *yuv2packedX = yuv2rgba32_1_X_c;
2420 #endif /* CONFIG_SWSCALE_ALPHA */
2422 *yuv2packed1 = yuv2rgbx32_1_1_c;
2423 *yuv2packed2 = yuv2rgbx32_1_2_c;
2424 *yuv2packedX = yuv2rgbx32_1_X_c;
2426 #endif /* !CONFIG_SMALL */
2429 *yuv2packed1 = yuv2rgb24_1_c;
2430 *yuv2packed2 = yuv2rgb24_2_c;
2431 *yuv2packedX = yuv2rgb24_X_c;
2434 *yuv2packed1 = yuv2bgr24_1_c;
2435 *yuv2packed2 = yuv2bgr24_2_c;
2436 *yuv2packedX = yuv2bgr24_X_c;
2438 case PIX_FMT_RGB565LE:
2439 case PIX_FMT_RGB565BE:
2440 case PIX_FMT_BGR565LE:
2441 case PIX_FMT_BGR565BE:
2442 *yuv2packed1 = yuv2rgb16_1_c;
2443 *yuv2packed2 = yuv2rgb16_2_c;
2444 *yuv2packedX = yuv2rgb16_X_c;
2446 case PIX_FMT_RGB555LE:
2447 case PIX_FMT_RGB555BE:
2448 case PIX_FMT_BGR555LE:
2449 case PIX_FMT_BGR555BE:
2450 *yuv2packed1 = yuv2rgb15_1_c;
2451 *yuv2packed2 = yuv2rgb15_2_c;
2452 *yuv2packedX = yuv2rgb15_X_c;
2454 case PIX_FMT_RGB444LE:
2455 case PIX_FMT_RGB444BE:
2456 case PIX_FMT_BGR444LE:
2457 case PIX_FMT_BGR444BE:
2458 *yuv2packed1 = yuv2rgb12_1_c;
2459 *yuv2packed2 = yuv2rgb12_2_c;
2460 *yuv2packedX = yuv2rgb12_X_c;
2464 *yuv2packed1 = yuv2rgb8_1_c;
2465 *yuv2packed2 = yuv2rgb8_2_c;
2466 *yuv2packedX = yuv2rgb8_X_c;
2470 *yuv2packed1 = yuv2rgb4_1_c;
2471 *yuv2packed2 = yuv2rgb4_2_c;
2472 *yuv2packedX = yuv2rgb4_X_c;
2474 case PIX_FMT_RGB4_BYTE:
2475 case PIX_FMT_BGR4_BYTE:
2476 *yuv2packed1 = yuv2rgb4b_1_c;
2477 *yuv2packed2 = yuv2rgb4b_2_c;
2478 *yuv2packedX = yuv2rgb4b_X_c;
2484 #define DEBUG_SWSCALE_BUFFERS 0
2485 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2487 static int swScale(SwsContext *c, const uint8_t* src[],
2488 int srcStride[], int srcSliceY,
2489 int srcSliceH, uint8_t* dst[], int dstStride[])
2491 /* load a few things into local vars to make the code more readable? and faster */
2492 const int srcW= c->srcW;
2493 const int dstW= c->dstW;
2494 const int dstH= c->dstH;
2495 const int chrDstW= c->chrDstW;
2496 const int chrSrcW= c->chrSrcW;
2497 const int lumXInc= c->lumXInc;
2498 const int chrXInc= c->chrXInc;
2499 const enum PixelFormat dstFormat= c->dstFormat;
2500 const int flags= c->flags;
2501 int16_t *vLumFilterPos= c->vLumFilterPos;
2502 int16_t *vChrFilterPos= c->vChrFilterPos;
2503 int16_t *hLumFilterPos= c->hLumFilterPos;
2504 int16_t *hChrFilterPos= c->hChrFilterPos;
2505 int16_t *vLumFilter= c->vLumFilter;
2506 int16_t *vChrFilter= c->vChrFilter;
2507 int16_t *hLumFilter= c->hLumFilter;
2508 int16_t *hChrFilter= c->hChrFilter;
2509 int32_t *lumMmxFilter= c->lumMmxFilter;
2510 int32_t *chrMmxFilter= c->chrMmxFilter;
2511 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2512 const int vLumFilterSize= c->vLumFilterSize;
2513 const int vChrFilterSize= c->vChrFilterSize;
2514 const int hLumFilterSize= c->hLumFilterSize;
2515 const int hChrFilterSize= c->hChrFilterSize;
2516 int16_t **lumPixBuf= c->lumPixBuf;
2517 int16_t **chrUPixBuf= c->chrUPixBuf;
2518 int16_t **chrVPixBuf= c->chrVPixBuf;
2519 int16_t **alpPixBuf= c->alpPixBuf;
2520 const int vLumBufSize= c->vLumBufSize;
2521 const int vChrBufSize= c->vChrBufSize;
2522 uint8_t *formatConvBuffer= c->formatConvBuffer;
2523 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2524 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2526 uint32_t *pal=c->pal_yuv;
2528 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2529 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2530 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2531 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2532 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2533 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2535 /* vars which will change and which we need to store back in the context */
2537 int lumBufIndex= c->lumBufIndex;
2538 int chrBufIndex= c->chrBufIndex;
2539 int lastInLumBuf= c->lastInLumBuf;
2540 int lastInChrBuf= c->lastInChrBuf;
2542 if (isPacked(c->srcFormat)) {
2550 srcStride[3]= srcStride[0];
2552 srcStride[1]<<= c->vChrDrop;
2553 srcStride[2]<<= c->vChrDrop;
2555 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2556 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2557 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2558 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2559 srcSliceY, srcSliceH, dstY, dstH);
2560 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2561 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2563 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2564 static int warnedAlready=0; //FIXME move this into the context perhaps
2565 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2566 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2567 " ->cannot do aligned memory accesses anymore\n");
2572 /* Note the user might start scaling the picture in the middle so this
2573 will not get executed. This is not really intended but works
2574 currently, so people might do it. */
2575 if (srcSliceY ==0) {
2583 if (!should_dither) {
2584 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2588 for (;dstY < dstH; dstY++) {
2589 const int chrDstY= dstY>>c->chrDstVSubSample;
2590 uint8_t *dest[4] = {
2591 dst[0] + dstStride[0] * dstY,
2592 dst[1] + dstStride[1] * chrDstY,
2593 dst[2] + dstStride[2] * chrDstY,
2594 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2597 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2598 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2599 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2600 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2601 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2602 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2605 //handle holes (FAST_BILINEAR & weird filters)
2606 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2607 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2608 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2609 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2611 DEBUG_BUFFERS("dstY: %d\n", dstY);
2612 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2613 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2614 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2615 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2617 // Do we have enough lines in this slice to output the dstY line
2618 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2620 if (!enough_lines) {
2621 lastLumSrcY = srcSliceY + srcSliceH - 1;
2622 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2623 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2624 lastLumSrcY, lastChrSrcY);
2627 //Do horizontal scaling
2628 while(lastInLumBuf < lastLumSrcY) {
2629 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2630 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2632 assert(lumBufIndex < 2*vLumBufSize);
2633 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2634 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2635 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2636 hLumFilter, hLumFilterPos, hLumFilterSize,
2639 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2640 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2641 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2645 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2646 lumBufIndex, lastInLumBuf);
2648 while(lastInChrBuf < lastChrSrcY) {
2649 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2650 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2652 assert(chrBufIndex < 2*vChrBufSize);
2653 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2654 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2655 //FIXME replace parameters through context struct (some at least)
2657 if (c->needs_hcscale)
2658 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2659 chrDstW, src1, src2, chrSrcW, chrXInc,
2660 hChrFilter, hChrFilterPos, hChrFilterSize,
2661 formatConvBuffer, pal);
2663 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2664 chrBufIndex, lastInChrBuf);
2666 //wrap buf index around to stay inside the ring buffer
2667 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2668 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2670 break; //we can't output a dstY line so let's try with the next slice
2673 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2675 if (should_dither) {
2676 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2677 c->lumDither8 = dither_8x8_128[dstY & 7];
2679 if (dstY >= dstH-2) {
2680 // hmm looks like we can't use MMX here without overwriting this array's tail
2681 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2682 &yuv2packed1, &yuv2packed2,
2687 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2688 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2689 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2690 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2692 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2693 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2694 if ((dstY&chrSkipMask) || isGray(dstFormat))
2695 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2696 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2697 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2698 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2699 dest, dstW, chrDstW);
2700 } else { //General YV12
2701 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2702 lumSrcPtr, vLumFilterSize,
2703 vChrFilter + chrDstY * vChrFilterSize,
2704 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2705 alpSrcPtr, dest, dstW, chrDstW);
2708 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2709 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2710 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2711 int chrAlpha = vChrFilter[2 * dstY + 1];
2712 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2713 alpPixBuf ? *alpSrcPtr : NULL,
2714 dest[0], dstW, chrAlpha, dstY);
2715 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2716 int lumAlpha = vLumFilter[2 * dstY + 1];
2717 int chrAlpha = vChrFilter[2 * dstY + 1];
2719 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2721 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2722 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2723 alpPixBuf ? alpSrcPtr : NULL,
2724 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2725 } else { //general RGB
2726 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2727 lumSrcPtr, vLumFilterSize,
2728 vChrFilter + dstY * vChrFilterSize,
2729 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2730 alpSrcPtr, dest[0], dstW, dstY);
2736 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2737 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2740 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2741 __asm__ volatile("sfence":::"memory");
2745 /* store changed local vars back in the context */
2747 c->lumBufIndex= lumBufIndex;
2748 c->chrBufIndex= chrBufIndex;
2749 c->lastInLumBuf= lastInLumBuf;
2750 c->lastInChrBuf= lastInChrBuf;
2752 return dstY - lastDstY;
2755 static av_cold void sws_init_swScale_c(SwsContext *c)
2757 enum PixelFormat srcFormat = c->srcFormat;
2759 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2760 &c->yuv2packed1, &c->yuv2packed2,
2763 c->chrToYV12 = NULL;
2765 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2766 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2767 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2768 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2772 case PIX_FMT_BGR4_BYTE:
2773 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2774 case PIX_FMT_YUV444P9BE:
2775 case PIX_FMT_YUV420P9BE:
2776 case PIX_FMT_YUV444P10BE:
2777 case PIX_FMT_YUV422P10BE:
2778 case PIX_FMT_YUV420P10BE: c->hScale16= HAVE_BIGENDIAN ? hScale16N_c : hScale16NX_c; break;
2779 case PIX_FMT_YUV444P9LE:
2780 case PIX_FMT_YUV420P9LE:
2781 case PIX_FMT_YUV422P10LE:
2782 case PIX_FMT_YUV420P10LE:
2783 case PIX_FMT_YUV444P10LE: c->hScale16= HAVE_BIGENDIAN ? hScale16NX_c : hScale16N_c; break;
2785 case PIX_FMT_YUV420P16LE:
2786 case PIX_FMT_YUV422P16LE:
2787 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2789 case PIX_FMT_YUV420P16BE:
2790 case PIX_FMT_YUV422P16BE:
2791 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2794 if (c->chrSrcHSubSample) {
2796 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2797 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2798 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2799 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2800 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2801 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2802 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2803 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2804 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2805 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2806 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2807 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2808 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2809 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2810 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2811 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2812 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2813 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2817 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2818 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2819 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2820 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2821 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2822 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2823 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2824 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2825 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2826 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2827 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2828 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2829 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2830 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2831 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2832 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2833 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2834 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2838 c->lumToYV12 = NULL;
2839 c->alpToYV12 = NULL;
2840 switch (srcFormat) {
2842 case PIX_FMT_YUV420P16LE:
2843 case PIX_FMT_YUV422P16LE:
2844 case PIX_FMT_YUV444P16LE:
2845 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2847 case PIX_FMT_YUV420P16BE:
2848 case PIX_FMT_YUV422P16BE:
2849 case PIX_FMT_YUV444P16BE:
2850 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2852 case PIX_FMT_YUYV422 :
2853 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2854 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2855 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2856 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2857 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2858 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2859 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2860 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2861 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2862 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2863 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2864 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2868 case PIX_FMT_BGR4_BYTE:
2869 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2870 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2871 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2872 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2873 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2874 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2875 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2876 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2877 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2878 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2879 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2882 switch (srcFormat) {
2884 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2886 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2887 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2888 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2892 if((isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2893 || c->srcFormat == PIX_FMT_PAL8)
2894 c->hScale16= hScale16N_c;
2896 if (c->scalingBpp == 8) {
2897 c->hScale = hScale_c;
2898 if (c->flags & SWS_FAST_BILINEAR) {
2899 c->hyscale_fast = hyscale_fast_c;
2900 c->hcscale_fast = hcscale_fast_c;
2903 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2905 c->lumConvertRange = lumRangeFromJpeg_c;
2906 c->chrConvertRange = chrRangeFromJpeg_c;
2908 c->lumConvertRange = lumRangeToJpeg_c;
2909 c->chrConvertRange = chrRangeToJpeg_c;
2913 if(c->hScale16 == hScale16NX_c && !isAnyRGB(c->srcFormat)){
2914 c->chrToYV12 = bswap16UV_c;
2915 c->lumToYV12 = bswap16Y_c;
2918 c->hScale = hScale16_c;
2919 c->scale19To15Fw = scale19To15Fw_c;
2920 c->scale8To16Rv = scale8To16Rv_c;
2922 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2924 c->lumConvertRange = lumRangeFromJpeg16_c;
2925 c->chrConvertRange = chrRangeFromJpeg16_c;
2927 c->lumConvertRange = lumRangeToJpeg16_c;
2928 c->chrConvertRange = chrRangeToJpeg16_c;
2933 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2934 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2935 c->needs_hcscale = 1;
2938 SwsFunc ff_getSwsFunc(SwsContext *c)
2940 sws_init_swScale_c(c);
2943 ff_sws_init_swScale_mmx(c);
2945 ff_sws_init_swScale_altivec(c);