2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 static av_always_inline void
294 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
295 int lumFilterSize, const int16_t *chrFilter,
296 const int32_t **chrUSrc, const int32_t **chrVSrc,
297 int chrFilterSize, const int32_t **alpSrc,
298 uint16_t *dest[4], int dstW, int chrDstW,
299 int big_endian, int output_bits)
301 //FIXME Optimize (just quickly written not optimized..)
303 int dword= output_bits == 16;
304 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
305 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
306 int shift = 11 + 4*dword + 16 - output_bits - 1;
308 #define output_pixel(pos, val) \
310 AV_WB16(pos, av_clip_uint16(val >> shift)); \
312 AV_WL16(pos, av_clip_uint16(val >> shift)); \
314 for (i = 0; i < dstW; i++) {
315 int val = 1 << (26-output_bits + 4*dword - 1);
318 for (j = 0; j < lumFilterSize; j++)
319 val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
321 output_pixel(&yDest[i], val);
325 for (i = 0; i < chrDstW; i++) {
326 int u = 1 << (26-output_bits + 4*dword - 1);
327 int v = 1 << (26-output_bits + 4*dword - 1);
330 for (j = 0; j < chrFilterSize; j++) {
331 u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
332 v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
335 output_pixel(&uDest[i], u);
336 output_pixel(&vDest[i], v);
340 if (CONFIG_SWSCALE_ALPHA && aDest) {
341 for (i = 0; i < dstW; i++) {
342 int val = 1 << (26-output_bits + 4*dword - 1);
345 for (j = 0; j < lumFilterSize; j++)
346 val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
348 output_pixel(&aDest[i], val);
354 static av_always_inline void
355 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
356 int lumFilterSize, const int16_t *chrFilter,
357 const int16_t **chrUSrc, const int16_t **chrVSrc,
358 int chrFilterSize, const int16_t **alpSrc,
359 uint16_t *dest[4], int dstW, int chrDstW,
360 int big_endian, int output_bits)
362 //FIXME Optimize (just quickly written not optimized..)
364 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
365 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
366 int shift = 11 + 16 - output_bits;
368 #define output_pixel(pos, val) \
370 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
372 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
374 for (i = 0; i < dstW; i++) {
375 int val = 1 << (26-output_bits);
378 for (j = 0; j < lumFilterSize; j++)
379 val += lumSrc[j][i] * lumFilter[j];
381 output_pixel(&yDest[i], val);
385 for (i = 0; i < chrDstW; i++) {
386 int u = 1 << (26-output_bits);
387 int v = 1 << (26-output_bits);
390 for (j = 0; j < chrFilterSize; j++) {
391 u += chrUSrc[j][i] * chrFilter[j];
392 v += chrVSrc[j][i] * chrFilter[j];
395 output_pixel(&uDest[i], u);
396 output_pixel(&vDest[i], v);
400 if (CONFIG_SWSCALE_ALPHA && aDest) {
401 for (i = 0; i < dstW; i++) {
402 int val = 1 << (26-output_bits);
405 for (j = 0; j < lumFilterSize; j++)
406 val += alpSrc[j][i] * lumFilter[j];
408 output_pixel(&aDest[i], val);
414 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
415 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
416 const int16_t **_lumSrc, int lumFilterSize, \
417 const int16_t *chrFilter, const int16_t **_chrUSrc, \
418 const int16_t **_chrVSrc, \
419 int chrFilterSize, const int16_t **_alpSrc, \
420 uint8_t *_dest[4], int dstW, int chrDstW) \
422 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
423 **chrUSrc = (const typeX_t **) _chrUSrc, \
424 **chrVSrc = (const typeX_t **) _chrVSrc, \
425 **alpSrc = (const typeX_t **) _alpSrc; \
426 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
427 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
428 alpSrc, (uint16_t **) _dest, \
429 dstW, chrDstW, is_be, bits); \
431 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
432 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
433 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
434 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
435 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
436 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
438 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
439 const int16_t **lumSrc, int lumFilterSize,
440 const int16_t *chrFilter, const int16_t **chrUSrc,
441 const int16_t **chrVSrc,
442 int chrFilterSize, const int16_t **alpSrc,
443 uint8_t *dest[4], int dstW, int chrDstW)
445 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
446 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
448 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
450 //FIXME Optimize (just quickly written not optimized..)
451 for (i=0; i<dstW; i++) {
452 int val = lumDither[i & 7] << 12;
454 for (j=0; j<lumFilterSize; j++)
455 val += lumSrc[j][i] * lumFilter[j];
457 yDest[i]= av_clip_uint8(val>>19);
461 for (i=0; i<chrDstW; i++) {
462 int u = chrDither[i & 7] << 12;
463 int v = chrDither[(i + 3) & 7] << 12;
465 for (j=0; j<chrFilterSize; j++) {
466 u += chrUSrc[j][i] * chrFilter[j];
467 v += chrVSrc[j][i] * chrFilter[j];
470 uDest[i]= av_clip_uint8(u>>19);
471 vDest[i]= av_clip_uint8(v>>19);
474 if (CONFIG_SWSCALE_ALPHA && aDest)
475 for (i=0; i<dstW; i++) {
476 int val = lumDither[i & 7] << 12;
478 for (j=0; j<lumFilterSize; j++)
479 val += alpSrc[j][i] * lumFilter[j];
481 aDest[i]= av_clip_uint8(val>>19);
485 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
486 const int16_t *chrUSrc, const int16_t *chrVSrc,
487 const int16_t *alpSrc,
488 uint8_t *dest[4], int dstW, int chrDstW)
490 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
491 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
493 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
495 for (i=0; i<dstW; i++) {
496 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
497 yDest[i]= av_clip_uint8(val);
501 for (i=0; i<chrDstW; i++) {
502 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
503 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
504 uDest[i]= av_clip_uint8(u);
505 vDest[i]= av_clip_uint8(v);
508 if (CONFIG_SWSCALE_ALPHA && aDest)
509 for (i=0; i<dstW; i++) {
510 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
511 aDest[i]= av_clip_uint8(val);
515 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
516 const int16_t **lumSrc, int lumFilterSize,
517 const int16_t *chrFilter, const int16_t **chrUSrc,
518 const int16_t **chrVSrc, int chrFilterSize,
519 const int16_t **alpSrc, uint8_t *dest[4],
520 int dstW, int chrDstW)
522 uint8_t *yDest = dest[0], *uDest = dest[1];
523 enum PixelFormat dstFormat = c->dstFormat;
524 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
526 //FIXME Optimize (just quickly written not optimized..)
528 for (i=0; i<dstW; i++) {
529 int val = lumDither[i & 7] << 12;
531 for (j=0; j<lumFilterSize; j++)
532 val += lumSrc[j][i] * lumFilter[j];
534 yDest[i]= av_clip_uint8(val>>19);
540 if (dstFormat == PIX_FMT_NV12)
541 for (i=0; i<chrDstW; i++) {
542 int u = chrDither[i & 7] << 12;
543 int v = chrDither[(i + 3) & 7] << 12;
545 for (j=0; j<chrFilterSize; j++) {
546 u += chrUSrc[j][i] * chrFilter[j];
547 v += chrVSrc[j][i] * chrFilter[j];
550 uDest[2*i]= av_clip_uint8(u>>19);
551 uDest[2*i+1]= av_clip_uint8(v>>19);
554 for (i=0; i<chrDstW; i++) {
555 int u = chrDither[i & 7] << 12;
556 int v = chrDither[(i + 3) & 7] << 12;
558 for (j=0; j<chrFilterSize; j++) {
559 u += chrUSrc[j][i] * chrFilter[j];
560 v += chrVSrc[j][i] * chrFilter[j];
563 uDest[2*i]= av_clip_uint8(v>>19);
564 uDest[2*i+1]= av_clip_uint8(u>>19);
568 #define output_pixel(pos, val) \
569 if (target == PIX_FMT_GRAY16BE) { \
575 static av_always_inline void
576 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
577 const int32_t **lumSrc, int lumFilterSize,
578 const int16_t *chrFilter, const int32_t **chrUSrc,
579 const int32_t **chrVSrc, int chrFilterSize,
580 const int32_t **alpSrc, uint16_t *dest, int dstW,
581 int y, enum PixelFormat target)
585 for (i = 0; i < (dstW >> 1); i++) {
590 for (j = 0; j < lumFilterSize; j++) {
591 Y1 += lumSrc[j][i * 2] * lumFilter[j];
592 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
596 if ((Y1 | Y2) & 0x10000) {
597 Y1 = av_clip_uint16(Y1);
598 Y2 = av_clip_uint16(Y2);
600 output_pixel(&dest[i * 2 + 0], Y1);
601 output_pixel(&dest[i * 2 + 1], Y2);
605 static av_always_inline void
606 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
607 const int32_t *ubuf[2], const int32_t *vbuf[2],
608 const int32_t *abuf[2], uint16_t *dest, int dstW,
609 int yalpha, int uvalpha, int y,
610 enum PixelFormat target)
612 int yalpha1 = 4095 - yalpha;
614 const int32_t *buf0 = buf[0], *buf1 = buf[1];
616 for (i = 0; i < (dstW >> 1); i++) {
617 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
618 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
620 output_pixel(&dest[i * 2 + 0], Y1);
621 output_pixel(&dest[i * 2 + 1], Y2);
625 static av_always_inline void
626 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
627 const int32_t *ubuf[2], const int32_t *vbuf[2],
628 const int32_t *abuf0, uint16_t *dest, int dstW,
629 int uvalpha, int y, enum PixelFormat target)
633 for (i = 0; i < (dstW >> 1); i++) {
634 int Y1 = (buf0[i * 2 ]+4)>>3;
635 int Y2 = (buf0[i * 2 + 1]+4)>>3;
637 output_pixel(&dest[i * 2 + 0], Y1);
638 output_pixel(&dest[i * 2 + 1], Y2);
644 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
645 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
646 const int16_t **_lumSrc, int lumFilterSize, \
647 const int16_t *chrFilter, const int16_t **_chrUSrc, \
648 const int16_t **_chrVSrc, int chrFilterSize, \
649 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
652 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
653 **chrUSrc = (const int32_t **) _chrUSrc, \
654 **chrVSrc = (const int32_t **) _chrVSrc, \
655 **alpSrc = (const int32_t **) _alpSrc; \
656 uint16_t *dest = (uint16_t *) _dest; \
657 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
658 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
659 alpSrc, dest, dstW, y, fmt); \
662 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
663 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
664 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
665 int yalpha, int uvalpha, int y) \
667 const int32_t **buf = (const int32_t **) _buf, \
668 **ubuf = (const int32_t **) _ubuf, \
669 **vbuf = (const int32_t **) _vbuf, \
670 **abuf = (const int32_t **) _abuf; \
671 uint16_t *dest = (uint16_t *) _dest; \
672 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
673 dest, dstW, yalpha, uvalpha, y, fmt); \
676 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
677 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
678 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
679 int uvalpha, int y) \
681 const int32_t *buf0 = (const int32_t *) _buf0, \
682 **ubuf = (const int32_t **) _ubuf, \
683 **vbuf = (const int32_t **) _vbuf, \
684 *abuf0 = (const int32_t *) _abuf0; \
685 uint16_t *dest = (uint16_t *) _dest; \
686 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
687 dstW, uvalpha, y, fmt); \
690 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
691 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
693 #define output_pixel(pos, acc) \
694 if (target == PIX_FMT_MONOBLACK) { \
700 static av_always_inline void
701 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
702 const int16_t **lumSrc, int lumFilterSize,
703 const int16_t *chrFilter, const int16_t **chrUSrc,
704 const int16_t **chrVSrc, int chrFilterSize,
705 const int16_t **alpSrc, uint8_t *dest, int dstW,
706 int y, enum PixelFormat target)
708 const uint8_t * const d128=dither_8x8_220[y&7];
709 uint8_t *g = c->table_gU[128] + c->table_gV[128];
713 for (i = 0; i < dstW - 1; i += 2) {
718 for (j = 0; j < lumFilterSize; j++) {
719 Y1 += lumSrc[j][i] * lumFilter[j];
720 Y2 += lumSrc[j][i+1] * lumFilter[j];
724 if ((Y1 | Y2) & 0x100) {
725 Y1 = av_clip_uint8(Y1);
726 Y2 = av_clip_uint8(Y2);
728 acc += acc + g[Y1 + d128[(i + 0) & 7]];
729 acc += acc + g[Y2 + d128[(i + 1) & 7]];
731 output_pixel(*dest++, acc);
736 static av_always_inline void
737 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
738 const int16_t *ubuf[2], const int16_t *vbuf[2],
739 const int16_t *abuf[2], uint8_t *dest, int dstW,
740 int yalpha, int uvalpha, int y,
741 enum PixelFormat target)
743 const int16_t *buf0 = buf[0], *buf1 = buf[1];
744 const uint8_t * const d128 = dither_8x8_220[y & 7];
745 uint8_t *g = c->table_gU[128] + c->table_gV[128];
746 int yalpha1 = 4095 - yalpha;
749 for (i = 0; i < dstW - 7; i += 8) {
750 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
751 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
752 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
753 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
754 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
755 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
756 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
757 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
758 output_pixel(*dest++, acc);
762 static av_always_inline void
763 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
764 const int16_t *ubuf[2], const int16_t *vbuf[2],
765 const int16_t *abuf0, uint8_t *dest, int dstW,
766 int uvalpha, int y, enum PixelFormat target)
768 const uint8_t * const d128 = dither_8x8_220[y & 7];
769 uint8_t *g = c->table_gU[128] + c->table_gV[128];
772 for (i = 0; i < dstW - 7; i += 8) {
773 int acc = g[(buf0[i ] >> 7) + d128[0]];
774 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
775 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
776 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
777 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
778 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
779 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
780 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
781 output_pixel(*dest++, acc);
787 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
788 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
789 const int16_t **lumSrc, int lumFilterSize, \
790 const int16_t *chrFilter, const int16_t **chrUSrc, \
791 const int16_t **chrVSrc, int chrFilterSize, \
792 const int16_t **alpSrc, uint8_t *dest, int dstW, \
795 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
796 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
797 alpSrc, dest, dstW, y, fmt); \
800 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
801 const int16_t *ubuf[2], const int16_t *vbuf[2], \
802 const int16_t *abuf[2], uint8_t *dest, int dstW, \
803 int yalpha, int uvalpha, int y) \
805 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
806 dest, dstW, yalpha, uvalpha, y, fmt); \
809 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
810 const int16_t *ubuf[2], const int16_t *vbuf[2], \
811 const int16_t *abuf0, uint8_t *dest, int dstW, \
812 int uvalpha, int y) \
814 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
815 abuf0, dest, dstW, uvalpha, \
819 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
820 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
822 #define output_pixels(pos, Y1, U, Y2, V) \
823 if (target == PIX_FMT_YUYV422) { \
824 dest[pos + 0] = Y1; \
826 dest[pos + 2] = Y2; \
830 dest[pos + 1] = Y1; \
832 dest[pos + 3] = Y2; \
835 static av_always_inline void
836 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
837 const int16_t **lumSrc, int lumFilterSize,
838 const int16_t *chrFilter, const int16_t **chrUSrc,
839 const int16_t **chrVSrc, int chrFilterSize,
840 const int16_t **alpSrc, uint8_t *dest, int dstW,
841 int y, enum PixelFormat target)
845 for (i = 0; i < (dstW >> 1); i++) {
852 for (j = 0; j < lumFilterSize; j++) {
853 Y1 += lumSrc[j][i * 2] * lumFilter[j];
854 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
856 for (j = 0; j < chrFilterSize; j++) {
857 U += chrUSrc[j][i] * chrFilter[j];
858 V += chrVSrc[j][i] * chrFilter[j];
864 if ((Y1 | Y2 | U | V) & 0x100) {
865 Y1 = av_clip_uint8(Y1);
866 Y2 = av_clip_uint8(Y2);
867 U = av_clip_uint8(U);
868 V = av_clip_uint8(V);
870 output_pixels(4*i, Y1, U, Y2, V);
874 static av_always_inline void
875 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
876 const int16_t *ubuf[2], const int16_t *vbuf[2],
877 const int16_t *abuf[2], uint8_t *dest, int dstW,
878 int yalpha, int uvalpha, int y,
879 enum PixelFormat target)
881 const int16_t *buf0 = buf[0], *buf1 = buf[1],
882 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
883 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
884 int yalpha1 = 4095 - yalpha;
885 int uvalpha1 = 4095 - uvalpha;
888 for (i = 0; i < (dstW >> 1); i++) {
889 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
890 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
891 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
892 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
894 output_pixels(i * 4, Y1, U, Y2, V);
898 static av_always_inline void
899 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
900 const int16_t *ubuf[2], const int16_t *vbuf[2],
901 const int16_t *abuf0, uint8_t *dest, int dstW,
902 int uvalpha, int y, enum PixelFormat target)
904 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
905 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
908 if (uvalpha < 2048) {
909 for (i = 0; i < (dstW >> 1); i++) {
910 int Y1 = buf0[i * 2] >> 7;
911 int Y2 = buf0[i * 2 + 1] >> 7;
912 int U = ubuf1[i] >> 7;
913 int V = vbuf1[i] >> 7;
915 output_pixels(i * 4, Y1, U, Y2, V);
918 for (i = 0; i < (dstW >> 1); i++) {
919 int Y1 = buf0[i * 2] >> 7;
920 int Y2 = buf0[i * 2 + 1] >> 7;
921 int U = (ubuf0[i] + ubuf1[i]) >> 8;
922 int V = (vbuf0[i] + vbuf1[i]) >> 8;
924 output_pixels(i * 4, Y1, U, Y2, V);
931 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
932 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
934 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
935 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
936 #define output_pixel(pos, val) \
937 if (isBE(target)) { \
943 static av_always_inline void
944 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
945 const int32_t **lumSrc, int lumFilterSize,
946 const int16_t *chrFilter, const int32_t **chrUSrc,
947 const int32_t **chrVSrc, int chrFilterSize,
948 const int32_t **alpSrc, uint16_t *dest, int dstW,
949 int y, enum PixelFormat target)
953 for (i = 0; i < (dstW >> 1); i++) {
957 int U = -128 << 23; // 19
961 for (j = 0; j < lumFilterSize; j++) {
962 Y1 += lumSrc[j][i * 2] * lumFilter[j];
963 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
965 for (j = 0; j < chrFilterSize; j++) {
966 U += chrUSrc[j][i] * chrFilter[j];
967 V += chrVSrc[j][i] * chrFilter[j];
970 // 8bit: 12+15=27; 16-bit: 12+19=31
976 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
977 Y1 -= c->yuv2rgb_y_offset;
978 Y2 -= c->yuv2rgb_y_offset;
979 Y1 *= c->yuv2rgb_y_coeff;
980 Y2 *= c->yuv2rgb_y_coeff;
983 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
985 R = V * c->yuv2rgb_v2r_coeff;
986 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
987 B = U * c->yuv2rgb_u2b_coeff;
989 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
990 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
991 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
992 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
993 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
994 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
995 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1000 static av_always_inline void
1001 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
1002 const int32_t *ubuf[2], const int32_t *vbuf[2],
1003 const int32_t *abuf[2], uint16_t *dest, int dstW,
1004 int yalpha, int uvalpha, int y,
1005 enum PixelFormat target)
1007 const int32_t *buf0 = buf[0], *buf1 = buf[1],
1008 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1009 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1010 int yalpha1 = 4095 - yalpha;
1011 int uvalpha1 = 4095 - uvalpha;
1014 for (i = 0; i < (dstW >> 1); i++) {
1015 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
1016 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
1017 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
1018 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
1021 Y1 -= c->yuv2rgb_y_offset;
1022 Y2 -= c->yuv2rgb_y_offset;
1023 Y1 *= c->yuv2rgb_y_coeff;
1024 Y2 *= c->yuv2rgb_y_coeff;
1028 R = V * c->yuv2rgb_v2r_coeff;
1029 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1030 B = U * c->yuv2rgb_u2b_coeff;
1032 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1033 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1034 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1035 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1036 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1037 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1042 static av_always_inline void
1043 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
1044 const int32_t *ubuf[2], const int32_t *vbuf[2],
1045 const int32_t *abuf0, uint16_t *dest, int dstW,
1046 int uvalpha, int y, enum PixelFormat target)
1048 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1049 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1052 if (uvalpha < 2048) {
1053 for (i = 0; i < (dstW >> 1); i++) {
1054 int Y1 = (buf0[i * 2] ) >> 2;
1055 int Y2 = (buf0[i * 2 + 1]) >> 2;
1056 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1057 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1060 Y1 -= c->yuv2rgb_y_offset;
1061 Y2 -= c->yuv2rgb_y_offset;
1062 Y1 *= c->yuv2rgb_y_coeff;
1063 Y2 *= c->yuv2rgb_y_coeff;
1067 R = V * c->yuv2rgb_v2r_coeff;
1068 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1069 B = U * c->yuv2rgb_u2b_coeff;
1071 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1072 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1073 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1074 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1075 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1076 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1080 for (i = 0; i < (dstW >> 1); i++) {
1081 int Y1 = (buf0[i * 2] ) >> 2;
1082 int Y2 = (buf0[i * 2 + 1]) >> 2;
1083 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
1084 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
1087 Y1 -= c->yuv2rgb_y_offset;
1088 Y2 -= c->yuv2rgb_y_offset;
1089 Y1 *= c->yuv2rgb_y_coeff;
1090 Y2 *= c->yuv2rgb_y_coeff;
1094 R = V * c->yuv2rgb_v2r_coeff;
1095 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1096 B = U * c->yuv2rgb_u2b_coeff;
1098 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1099 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1100 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1101 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1102 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1103 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1113 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1114 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1115 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1116 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1118 static av_always_inline void
1119 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1120 int U, int V, int A1, int A2,
1121 const void *_r, const void *_g, const void *_b, int y,
1122 enum PixelFormat target, int hasAlpha)
1124 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1125 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1126 uint32_t *dest = (uint32_t *) _dest;
1127 const uint32_t *r = (const uint32_t *) _r;
1128 const uint32_t *g = (const uint32_t *) _g;
1129 const uint32_t *b = (const uint32_t *) _b;
1132 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1134 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1135 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1138 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1140 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1141 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1143 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1144 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1147 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1148 uint8_t *dest = (uint8_t *) _dest;
1149 const uint8_t *r = (const uint8_t *) _r;
1150 const uint8_t *g = (const uint8_t *) _g;
1151 const uint8_t *b = (const uint8_t *) _b;
1153 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1154 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1156 dest[i * 6 + 0] = r_b[Y1];
1157 dest[i * 6 + 1] = g[Y1];
1158 dest[i * 6 + 2] = b_r[Y1];
1159 dest[i * 6 + 3] = r_b[Y2];
1160 dest[i * 6 + 4] = g[Y2];
1161 dest[i * 6 + 5] = b_r[Y2];
1164 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1165 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1166 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1167 uint16_t *dest = (uint16_t *) _dest;
1168 const uint16_t *r = (const uint16_t *) _r;
1169 const uint16_t *g = (const uint16_t *) _g;
1170 const uint16_t *b = (const uint16_t *) _b;
1171 int dr1, dg1, db1, dr2, dg2, db2;
1173 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1174 dr1 = dither_2x2_8[ y & 1 ][0];
1175 dg1 = dither_2x2_4[ y & 1 ][0];
1176 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1177 dr2 = dither_2x2_8[ y & 1 ][1];
1178 dg2 = dither_2x2_4[ y & 1 ][1];
1179 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1180 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1181 dr1 = dither_2x2_8[ y & 1 ][0];
1182 dg1 = dither_2x2_8[ y & 1 ][1];
1183 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1184 dr2 = dither_2x2_8[ y & 1 ][1];
1185 dg2 = dither_2x2_8[ y & 1 ][0];
1186 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1188 dr1 = dither_4x4_16[ y & 3 ][0];
1189 dg1 = dither_4x4_16[ y & 3 ][1];
1190 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1191 dr2 = dither_4x4_16[ y & 3 ][1];
1192 dg2 = dither_4x4_16[ y & 3 ][0];
1193 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1196 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1197 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1198 } else /* 8/4-bit */ {
1199 uint8_t *dest = (uint8_t *) _dest;
1200 const uint8_t *r = (const uint8_t *) _r;
1201 const uint8_t *g = (const uint8_t *) _g;
1202 const uint8_t *b = (const uint8_t *) _b;
1203 int dr1, dg1, db1, dr2, dg2, db2;
1205 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1206 const uint8_t * const d64 = dither_8x8_73[y & 7];
1207 const uint8_t * const d32 = dither_8x8_32[y & 7];
1208 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1209 db1 = d64[(i * 2 + 0) & 7];
1210 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1211 db2 = d64[(i * 2 + 1) & 7];
1213 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1214 const uint8_t * const d128 = dither_8x8_220[y & 7];
1215 dr1 = db1 = d128[(i * 2 + 0) & 7];
1216 dg1 = d64[(i * 2 + 0) & 7];
1217 dr2 = db2 = d128[(i * 2 + 1) & 7];
1218 dg2 = d64[(i * 2 + 1) & 7];
1221 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1222 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1223 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1225 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1226 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1231 static av_always_inline void
1232 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1233 const int16_t **lumSrc, int lumFilterSize,
1234 const int16_t *chrFilter, const int16_t **chrUSrc,
1235 const int16_t **chrVSrc, int chrFilterSize,
1236 const int16_t **alpSrc, uint8_t *dest, int dstW,
1237 int y, enum PixelFormat target, int hasAlpha)
1241 for (i = 0; i < (dstW >> 1); i++) {
1247 int av_unused A1, A2;
1248 const void *r, *g, *b;
1250 for (j = 0; j < lumFilterSize; j++) {
1251 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1252 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1254 for (j = 0; j < chrFilterSize; j++) {
1255 U += chrUSrc[j][i] * chrFilter[j];
1256 V += chrVSrc[j][i] * chrFilter[j];
1262 if ((Y1 | Y2 | U | V) & 0x100) {
1263 Y1 = av_clip_uint8(Y1);
1264 Y2 = av_clip_uint8(Y2);
1265 U = av_clip_uint8(U);
1266 V = av_clip_uint8(V);
1271 for (j = 0; j < lumFilterSize; j++) {
1272 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1273 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1277 if ((A1 | A2) & 0x100) {
1278 A1 = av_clip_uint8(A1);
1279 A2 = av_clip_uint8(A2);
1283 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1285 g = (c->table_gU[U] + c->table_gV[V]);
1288 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1289 r, g, b, y, target, hasAlpha);
1293 static av_always_inline void
1294 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1295 const int16_t *ubuf[2], const int16_t *vbuf[2],
1296 const int16_t *abuf[2], uint8_t *dest, int dstW,
1297 int yalpha, int uvalpha, int y,
1298 enum PixelFormat target, int hasAlpha)
1300 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1301 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1302 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1303 *abuf0 = hasAlpha ? abuf[0] : NULL,
1304 *abuf1 = hasAlpha ? abuf[1] : NULL;
1305 int yalpha1 = 4095 - yalpha;
1306 int uvalpha1 = 4095 - uvalpha;
1309 for (i = 0; i < (dstW >> 1); i++) {
1310 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1311 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1312 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1313 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1315 const void *r = c->table_rV[V],
1316 *g = (c->table_gU[U] + c->table_gV[V]),
1317 *b = c->table_bU[U];
1320 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1321 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1324 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1325 r, g, b, y, target, hasAlpha);
1329 static av_always_inline void
1330 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1331 const int16_t *ubuf[2], const int16_t *vbuf[2],
1332 const int16_t *abuf0, uint8_t *dest, int dstW,
1333 int uvalpha, int y, enum PixelFormat target,
1336 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1337 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1340 if (uvalpha < 2048) {
1341 for (i = 0; i < (dstW >> 1); i++) {
1342 int Y1 = buf0[i * 2] >> 7;
1343 int Y2 = buf0[i * 2 + 1] >> 7;
1344 int U = ubuf1[i] >> 7;
1345 int V = vbuf1[i] >> 7;
1347 const void *r = c->table_rV[V],
1348 *g = (c->table_gU[U] + c->table_gV[V]),
1349 *b = c->table_bU[U];
1352 A1 = abuf0[i * 2 ] >> 7;
1353 A2 = abuf0[i * 2 + 1] >> 7;
1356 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1357 r, g, b, y, target, hasAlpha);
1360 for (i = 0; i < (dstW >> 1); i++) {
1361 int Y1 = buf0[i * 2] >> 7;
1362 int Y2 = buf0[i * 2 + 1] >> 7;
1363 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1364 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1366 const void *r = c->table_rV[V],
1367 *g = (c->table_gU[U] + c->table_gV[V]),
1368 *b = c->table_bU[U];
1371 A1 = abuf0[i * 2 ] >> 7;
1372 A2 = abuf0[i * 2 + 1] >> 7;
1375 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1376 r, g, b, y, target, hasAlpha);
1381 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1382 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1383 const int16_t **lumSrc, int lumFilterSize, \
1384 const int16_t *chrFilter, const int16_t **chrUSrc, \
1385 const int16_t **chrVSrc, int chrFilterSize, \
1386 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1389 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1390 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1391 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1393 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1394 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1395 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1396 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1397 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1398 int yalpha, int uvalpha, int y) \
1400 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1401 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1404 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1405 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1406 const int16_t *abuf0, uint8_t *dest, int dstW, \
1407 int uvalpha, int y) \
1409 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1410 dstW, uvalpha, y, fmt, hasAlpha); \
1414 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1415 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1417 #if CONFIG_SWSCALE_ALPHA
1418 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1419 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1421 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1422 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1424 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1425 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1426 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1427 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1428 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1429 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1430 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1431 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1433 static av_always_inline void
1434 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1435 const int16_t **lumSrc, int lumFilterSize,
1436 const int16_t *chrFilter, const int16_t **chrUSrc,
1437 const int16_t **chrVSrc, int chrFilterSize,
1438 const int16_t **alpSrc, uint8_t *dest,
1439 int dstW, int y, enum PixelFormat target, int hasAlpha)
1442 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1444 for (i = 0; i < dstW; i++) {
1447 int U = (1<<9)-(128 << 19);
1448 int V = (1<<9)-(128 << 19);
1452 for (j = 0; j < lumFilterSize; j++) {
1453 Y += lumSrc[j][i] * lumFilter[j];
1455 for (j = 0; j < chrFilterSize; j++) {
1456 U += chrUSrc[j][i] * chrFilter[j];
1457 V += chrVSrc[j][i] * chrFilter[j];
1464 for (j = 0; j < lumFilterSize; j++) {
1465 A += alpSrc[j][i] * lumFilter[j];
1469 A = av_clip_uint8(A);
1471 Y -= c->yuv2rgb_y_offset;
1472 Y *= c->yuv2rgb_y_coeff;
1474 R = Y + V*c->yuv2rgb_v2r_coeff;
1475 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1476 B = Y + U*c->yuv2rgb_u2b_coeff;
1477 if ((R | G | B) & 0xC0000000) {
1478 R = av_clip_uintp2(R, 30);
1479 G = av_clip_uintp2(G, 30);
1480 B = av_clip_uintp2(B, 30);
1485 dest[0] = hasAlpha ? A : 255;
1499 dest[3] = hasAlpha ? A : 255;
1502 dest[0] = hasAlpha ? A : 255;
1516 dest[3] = hasAlpha ? A : 255;
1524 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1525 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1526 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1527 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1529 #if CONFIG_SWSCALE_ALPHA
1530 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1531 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1532 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1533 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1535 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1536 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1537 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1538 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1540 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1541 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1543 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1544 int width, int height,
1548 uint8_t *ptr = plane + stride*y;
1549 for (i=0; i<height; i++) {
1550 memset(ptr, val, width);
1555 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1557 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1558 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1560 static av_always_inline void
1561 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1562 enum PixelFormat origin)
1565 for (i = 0; i < width; i++) {
1566 unsigned int r_b = input_pixel(&src[i*3+0]);
1567 unsigned int g = input_pixel(&src[i*3+1]);
1568 unsigned int b_r = input_pixel(&src[i*3+2]);
1570 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1574 static av_always_inline void
1575 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1576 const uint16_t *src1, const uint16_t *src2,
1577 int width, enum PixelFormat origin)
1581 for (i = 0; i < width; i++) {
1582 int r_b = input_pixel(&src1[i*3+0]);
1583 int g = input_pixel(&src1[i*3+1]);
1584 int b_r = input_pixel(&src1[i*3+2]);
1586 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1587 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1591 static av_always_inline void
1592 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1593 const uint16_t *src1, const uint16_t *src2,
1594 int width, enum PixelFormat origin)
1598 for (i = 0; i < width; i++) {
1599 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1600 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1601 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1603 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1604 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1612 #define rgb48funcs(pattern, BE_LE, origin) \
1613 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
1614 int width, uint32_t *unused) \
1616 const uint16_t *src = (const uint16_t *) _src; \
1617 uint16_t *dst = (uint16_t *) _dst; \
1618 rgb48ToY_c_template(dst, src, width, origin); \
1621 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1622 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1623 int width, uint32_t *unused) \
1625 const uint16_t *src1 = (const uint16_t *) _src1, \
1626 *src2 = (const uint16_t *) _src2; \
1627 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1628 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1631 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1632 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1633 int width, uint32_t *unused) \
1635 const uint16_t *src1 = (const uint16_t *) _src1, \
1636 *src2 = (const uint16_t *) _src2; \
1637 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1638 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1641 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1642 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1643 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1644 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1646 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1647 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1648 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1650 static av_always_inline void
1651 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1652 int width, enum PixelFormat origin,
1653 int shr, int shg, int shb, int shp,
1654 int maskr, int maskg, int maskb,
1655 int rsh, int gsh, int bsh, int S)
1657 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1658 rnd = (32<<((S)-1)) + (1<<(S-7));
1661 for (i = 0; i < width; i++) {
1662 int px = input_pixel(i) >> shp;
1663 int b = (px & maskb) >> shb;
1664 int g = (px & maskg) >> shg;
1665 int r = (px & maskr) >> shr;
1667 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1671 static av_always_inline void
1672 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1673 const uint8_t *src, int width,
1674 enum PixelFormat origin,
1675 int shr, int shg, int shb, int shp,
1676 int maskr, int maskg, int maskb,
1677 int rsh, int gsh, int bsh, int S)
1679 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1680 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1681 rnd = (256<<((S)-1)) + (1<<(S-7));
1684 for (i = 0; i < width; i++) {
1685 int px = input_pixel(i) >> shp;
1686 int b = (px & maskb) >> shb;
1687 int g = (px & maskg) >> shg;
1688 int r = (px & maskr) >> shr;
1690 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1691 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1695 static av_always_inline void
1696 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1697 const uint8_t *src, int width,
1698 enum PixelFormat origin,
1699 int shr, int shg, int shb, int shp,
1700 int maskr, int maskg, int maskb,
1701 int rsh, int gsh, int bsh, int S)
1703 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1704 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1705 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1708 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1709 for (i = 0; i < width; i++) {
1710 int px0 = input_pixel(2 * i + 0) >> shp;
1711 int px1 = input_pixel(2 * i + 1) >> shp;
1712 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1713 int rb = px0 + px1 - g;
1715 b = (rb & maskb) >> shb;
1716 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1717 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1720 g = (g & maskg) >> shg;
1722 r = (rb & maskr) >> shr;
1724 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1725 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1731 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1732 maskg, maskb, rsh, gsh, bsh, S) \
1733 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
1734 int width, uint32_t *unused) \
1736 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
1737 shr, shg, shb, shp, \
1738 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1741 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1742 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1743 int width, uint32_t *unused) \
1745 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1746 shr, shg, shb, shp, \
1747 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1750 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1751 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1752 int width, uint32_t *unused) \
1754 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1755 shr, shg, shb, shp, \
1756 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1759 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1760 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1761 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1762 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1763 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1764 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1765 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1766 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1767 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1768 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1769 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1770 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1772 static void gbr24pToY_c(uint16_t *dst, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1773 int width, uint32_t *unused)
1776 for (i = 0; i < width; i++) {
1777 unsigned int g = gsrc[i];
1778 unsigned int b = bsrc[i];
1779 unsigned int r = rsrc[i];
1781 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1785 static void gbr24pToUV_c(uint16_t *dstU, uint16_t *dstV,
1786 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1787 int width, enum PixelFormat origin)
1790 for (i = 0; i < width; i++) {
1791 unsigned int g = gsrc[i];
1792 unsigned int b = bsrc[i];
1793 unsigned int r = rsrc[i];
1795 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1796 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1800 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
1801 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1802 int width, enum PixelFormat origin)
1805 for (i = 0; i < width; i++) {
1806 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
1807 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
1808 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
1810 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1811 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1815 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1818 for (i=0; i<width; i++) {
1819 dst[i]= src[4*i]<<6;
1823 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1826 for (i=0; i<width; i++) {
1827 dst[i]= src[4*i+3]<<6;
1831 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
1834 for (i=0; i<width; i++) {
1837 dst[i]= (pal[d] >> 24)<<6;
1841 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
1844 for (i=0; i<width; i++) {
1847 dst[i]= (pal[d] & 0xFF)<<6;
1851 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1852 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1853 int width, uint32_t *pal)
1856 assert(src1 == src2);
1857 for (i=0; i<width; i++) {
1858 int p= pal[src1[i]];
1860 dstU[i]= (uint8_t)(p>> 8)<<6;
1861 dstV[i]= (uint8_t)(p>>16)<<6;
1865 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1868 for (i=0; i<width/8; i++) {
1871 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1875 for(j=0; j<(width&7); j++)
1876 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1880 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1883 for (i=0; i<width/8; i++) {
1886 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1890 for(j=0; j<(width&7); j++)
1891 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1895 //FIXME yuy2* can read up to 7 samples too much
1897 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1901 for (i=0; i<width; i++)
1905 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1906 const uint8_t *src2, int width, uint32_t *unused)
1909 for (i=0; i<width; i++) {
1910 dstU[i]= src1[4*i + 1];
1911 dstV[i]= src1[4*i + 3];
1913 assert(src1 == src2);
1916 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1919 const uint16_t *src = (const uint16_t *) _src;
1920 uint16_t *dst = (uint16_t *) _dst;
1921 for (i=0; i<width; i++) {
1922 dst[i] = av_bswap16(src[i]);
1926 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
1927 const uint8_t *_src2, int width, uint32_t *unused)
1930 const uint16_t *src1 = (const uint16_t *) _src1,
1931 *src2 = (const uint16_t *) _src2;
1932 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1933 for (i=0; i<width; i++) {
1934 dstU[i] = av_bswap16(src1[i]);
1935 dstV[i] = av_bswap16(src2[i]);
1939 /* This is almost identical to the previous, end exists only because
1940 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1941 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1945 for (i=0; i<width; i++)
1949 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1950 const uint8_t *src2, int width, uint32_t *unused)
1953 for (i=0; i<width; i++) {
1954 dstU[i]= src1[4*i + 0];
1955 dstV[i]= src1[4*i + 2];
1957 assert(src1 == src2);
1960 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1961 const uint8_t *src, int width)
1964 for (i = 0; i < width; i++) {
1965 dst1[i] = src[2*i+0];
1966 dst2[i] = src[2*i+1];
1970 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1971 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1972 int width, uint32_t *unused)
1974 nvXXtoUV_c(dstU, dstV, src1, width);
1977 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1978 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1979 int width, uint32_t *unused)
1981 nvXXtoUV_c(dstV, dstU, src1, width);
1984 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1986 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
1987 int width, uint32_t *unused)
1990 for (i=0; i<width; i++) {
1995 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1999 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
2000 const uint8_t *src2, int width, uint32_t *unused)
2003 for (i=0; i<width; i++) {
2004 int b= src1[3*i + 0];
2005 int g= src1[3*i + 1];
2006 int r= src1[3*i + 2];
2008 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2009 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2011 assert(src1 == src2);
2014 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
2015 const uint8_t *src2, int width, uint32_t *unused)
2018 for (i=0; i<width; i++) {
2019 int b= src1[6*i + 0] + src1[6*i + 3];
2020 int g= src1[6*i + 1] + src1[6*i + 4];
2021 int r= src1[6*i + 2] + src1[6*i + 5];
2023 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2024 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2026 assert(src1 == src2);
2029 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
2033 for (i=0; i<width; i++) {
2038 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
2042 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
2043 const uint8_t *src2, int width, uint32_t *unused)
2047 for (i=0; i<width; i++) {
2048 int r= src1[3*i + 0];
2049 int g= src1[3*i + 1];
2050 int b= src1[3*i + 2];
2052 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2053 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2057 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
2058 const uint8_t *src2, int width, uint32_t *unused)
2062 for (i=0; i<width; i++) {
2063 int r= src1[6*i + 0] + src1[6*i + 3];
2064 int g= src1[6*i + 1] + src1[6*i + 4];
2065 int b= src1[6*i + 2] + src1[6*i + 5];
2067 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2068 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2072 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
2073 const int16_t *filter,
2074 const int16_t *filterPos, int filterSize)
2077 int32_t *dst = (int32_t *) _dst;
2078 const uint16_t *src = (const uint16_t *) _src;
2079 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2082 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2085 for (i = 0; i < dstW; i++) {
2087 int srcPos = filterPos[i];
2090 for (j = 0; j < filterSize; j++) {
2091 val += src[srcPos + j] * filter[filterSize * i + j];
2093 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2094 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2098 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2099 const int16_t *filter,
2100 const int16_t *filterPos, int filterSize)
2103 const uint16_t *src = (const uint16_t *) _src;
2104 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2107 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2109 for (i = 0; i < dstW; i++) {
2111 int srcPos = filterPos[i];
2114 for (j = 0; j < filterSize; j++) {
2115 val += src[srcPos + j] * filter[filterSize * i + j];
2117 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2118 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2122 // bilinear / bicubic scaling
2123 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2124 const int16_t *filter, const int16_t *filterPos,
2128 for (i=0; i<dstW; i++) {
2130 int srcPos= filterPos[i];
2132 for (j=0; j<filterSize; j++) {
2133 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2135 //filter += hFilterSize;
2136 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2141 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2142 const int16_t *filter, const int16_t *filterPos,
2146 int32_t *dst = (int32_t *) _dst;
2147 for (i=0; i<dstW; i++) {
2149 int srcPos= filterPos[i];
2151 for (j=0; j<filterSize; j++) {
2152 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2154 //filter += hFilterSize;
2155 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2160 //FIXME all pal and rgb srcFormats could do this convertion as well
2161 //FIXME all scalers more complex than bilinear could do half of this transform
2162 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2165 for (i = 0; i < width; i++) {
2166 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2167 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2170 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2173 for (i = 0; i < width; i++) {
2174 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2175 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2178 static void lumRangeToJpeg_c(int16_t *dst, int width)
2181 for (i = 0; i < width; i++)
2182 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2184 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2187 for (i = 0; i < width; i++)
2188 dst[i] = (dst[i]*14071 + 33561947)>>14;
2191 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2194 int32_t *dstU = (int32_t *) _dstU;
2195 int32_t *dstV = (int32_t *) _dstV;
2196 for (i = 0; i < width; i++) {
2197 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2198 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2201 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2204 int32_t *dstU = (int32_t *) _dstU;
2205 int32_t *dstV = (int32_t *) _dstV;
2206 for (i = 0; i < width; i++) {
2207 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2208 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2211 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2214 int32_t *dst = (int32_t *) _dst;
2215 for (i = 0; i < width; i++)
2216 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2218 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2221 int32_t *dst = (int32_t *) _dst;
2222 for (i = 0; i < width; i++)
2223 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2226 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2227 const uint8_t *src, int srcW, int xInc)
2230 unsigned int xpos=0;
2231 for (i=0;i<dstWidth;i++) {
2232 register unsigned int xx=xpos>>16;
2233 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2234 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2237 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2238 dst[i] = src[srcW-1]*128;
2241 // *** horizontal scale Y line to temp buffer
2242 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2243 const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
2245 const int16_t *hLumFilter,
2246 const int16_t *hLumFilterPos, int hLumFilterSize,
2247 uint8_t *formatConvBuffer,
2248 uint32_t *pal, int isAlpha)
2250 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2251 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2254 toYV12(formatConvBuffer, src, src2, src3, srcW, pal);
2255 src= formatConvBuffer;
2258 if (!c->hyscale_fast) {
2259 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2260 } else { // fast bilinear upscale / crap downscale
2261 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2265 convertRange(dst, dstWidth);
2268 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2269 int dstWidth, const uint8_t *src1,
2270 const uint8_t *src2, int srcW, int xInc)
2273 unsigned int xpos=0;
2274 for (i=0;i<dstWidth;i++) {
2275 register unsigned int xx=xpos>>16;
2276 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2277 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2278 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2281 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2282 dst1[i] = src1[srcW-1]*128;
2283 dst2[i] = src2[srcW-1]*128;
2287 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2288 const uint8_t *src0, const uint8_t *src1, const uint8_t *src2,
2289 int srcW, int xInc, const int16_t *hChrFilter,
2290 const int16_t *hChrFilterPos, int hChrFilterSize,
2291 uint8_t *formatConvBuffer, uint32_t *pal)
2294 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2295 c->chrToYV12(formatConvBuffer, buf2, src0, src1, src2, srcW, pal);
2296 src1= formatConvBuffer;
2300 if (!c->hcscale_fast) {
2301 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2302 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2303 } else { // fast bilinear upscale / crap downscale
2304 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2307 if (c->chrConvertRange)
2308 c->chrConvertRange(dst1, dst2, dstWidth);
2311 static av_always_inline void
2312 find_c_packed_planar_out_funcs(SwsContext *c,
2313 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2314 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2315 yuv2packedX_fn *yuv2packedX)
2317 enum PixelFormat dstFormat = c->dstFormat;
2319 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2320 *yuv2yuvX = yuv2nv12X_c;
2321 } else if (is16BPS(dstFormat)) {
2322 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2323 } else if (is9_OR_10BPS(dstFormat)) {
2324 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2325 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2327 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2330 *yuv2yuv1 = yuv2yuv1_c;
2331 *yuv2yuvX = yuv2yuvX_c;
2333 if(c->flags & SWS_FULL_CHR_H_INT) {
2334 switch (dstFormat) {
2337 *yuv2packedX = yuv2rgba32_full_X_c;
2339 #if CONFIG_SWSCALE_ALPHA
2341 *yuv2packedX = yuv2rgba32_full_X_c;
2343 #endif /* CONFIG_SWSCALE_ALPHA */
2345 *yuv2packedX = yuv2rgbx32_full_X_c;
2347 #endif /* !CONFIG_SMALL */
2351 *yuv2packedX = yuv2argb32_full_X_c;
2353 #if CONFIG_SWSCALE_ALPHA
2355 *yuv2packedX = yuv2argb32_full_X_c;
2357 #endif /* CONFIG_SWSCALE_ALPHA */
2359 *yuv2packedX = yuv2xrgb32_full_X_c;
2361 #endif /* !CONFIG_SMALL */
2365 *yuv2packedX = yuv2bgra32_full_X_c;
2367 #if CONFIG_SWSCALE_ALPHA
2369 *yuv2packedX = yuv2bgra32_full_X_c;
2371 #endif /* CONFIG_SWSCALE_ALPHA */
2373 *yuv2packedX = yuv2bgrx32_full_X_c;
2375 #endif /* !CONFIG_SMALL */
2379 *yuv2packedX = yuv2abgr32_full_X_c;
2381 #if CONFIG_SWSCALE_ALPHA
2383 *yuv2packedX = yuv2abgr32_full_X_c;
2385 #endif /* CONFIG_SWSCALE_ALPHA */
2387 *yuv2packedX = yuv2xbgr32_full_X_c;
2389 #endif /* !CONFIG_SMALL */
2392 *yuv2packedX = yuv2rgb24_full_X_c;
2395 *yuv2packedX = yuv2bgr24_full_X_c;
2402 switch (dstFormat) {
2403 case PIX_FMT_GRAY16BE:
2404 *yuv2packed1 = yuv2gray16BE_1_c;
2405 *yuv2packed2 = yuv2gray16BE_2_c;
2406 *yuv2packedX = yuv2gray16BE_X_c;
2408 case PIX_FMT_GRAY16LE:
2409 *yuv2packed1 = yuv2gray16LE_1_c;
2410 *yuv2packed2 = yuv2gray16LE_2_c;
2411 *yuv2packedX = yuv2gray16LE_X_c;
2413 case PIX_FMT_MONOWHITE:
2414 *yuv2packed1 = yuv2monowhite_1_c;
2415 *yuv2packed2 = yuv2monowhite_2_c;
2416 *yuv2packedX = yuv2monowhite_X_c;
2418 case PIX_FMT_MONOBLACK:
2419 *yuv2packed1 = yuv2monoblack_1_c;
2420 *yuv2packed2 = yuv2monoblack_2_c;
2421 *yuv2packedX = yuv2monoblack_X_c;
2423 case PIX_FMT_YUYV422:
2424 *yuv2packed1 = yuv2yuyv422_1_c;
2425 *yuv2packed2 = yuv2yuyv422_2_c;
2426 *yuv2packedX = yuv2yuyv422_X_c;
2428 case PIX_FMT_UYVY422:
2429 *yuv2packed1 = yuv2uyvy422_1_c;
2430 *yuv2packed2 = yuv2uyvy422_2_c;
2431 *yuv2packedX = yuv2uyvy422_X_c;
2433 case PIX_FMT_RGB48LE:
2434 *yuv2packed1 = yuv2rgb48le_1_c;
2435 *yuv2packed2 = yuv2rgb48le_2_c;
2436 *yuv2packedX = yuv2rgb48le_X_c;
2438 case PIX_FMT_RGB48BE:
2439 *yuv2packed1 = yuv2rgb48be_1_c;
2440 *yuv2packed2 = yuv2rgb48be_2_c;
2441 *yuv2packedX = yuv2rgb48be_X_c;
2443 case PIX_FMT_BGR48LE:
2444 *yuv2packed1 = yuv2bgr48le_1_c;
2445 *yuv2packed2 = yuv2bgr48le_2_c;
2446 *yuv2packedX = yuv2bgr48le_X_c;
2448 case PIX_FMT_BGR48BE:
2449 *yuv2packed1 = yuv2bgr48be_1_c;
2450 *yuv2packed2 = yuv2bgr48be_2_c;
2451 *yuv2packedX = yuv2bgr48be_X_c;
2456 *yuv2packed1 = yuv2rgb32_1_c;
2457 *yuv2packed2 = yuv2rgb32_2_c;
2458 *yuv2packedX = yuv2rgb32_X_c;
2460 #if CONFIG_SWSCALE_ALPHA
2462 *yuv2packed1 = yuv2rgba32_1_c;
2463 *yuv2packed2 = yuv2rgba32_2_c;
2464 *yuv2packedX = yuv2rgba32_X_c;
2466 #endif /* CONFIG_SWSCALE_ALPHA */
2468 *yuv2packed1 = yuv2rgbx32_1_c;
2469 *yuv2packed2 = yuv2rgbx32_2_c;
2470 *yuv2packedX = yuv2rgbx32_X_c;
2472 #endif /* !CONFIG_SMALL */
2474 case PIX_FMT_RGB32_1:
2475 case PIX_FMT_BGR32_1:
2477 *yuv2packed1 = yuv2rgb32_1_1_c;
2478 *yuv2packed2 = yuv2rgb32_1_2_c;
2479 *yuv2packedX = yuv2rgb32_1_X_c;
2481 #if CONFIG_SWSCALE_ALPHA
2483 *yuv2packed1 = yuv2rgba32_1_1_c;
2484 *yuv2packed2 = yuv2rgba32_1_2_c;
2485 *yuv2packedX = yuv2rgba32_1_X_c;
2487 #endif /* CONFIG_SWSCALE_ALPHA */
2489 *yuv2packed1 = yuv2rgbx32_1_1_c;
2490 *yuv2packed2 = yuv2rgbx32_1_2_c;
2491 *yuv2packedX = yuv2rgbx32_1_X_c;
2493 #endif /* !CONFIG_SMALL */
2496 *yuv2packed1 = yuv2rgb24_1_c;
2497 *yuv2packed2 = yuv2rgb24_2_c;
2498 *yuv2packedX = yuv2rgb24_X_c;
2501 *yuv2packed1 = yuv2bgr24_1_c;
2502 *yuv2packed2 = yuv2bgr24_2_c;
2503 *yuv2packedX = yuv2bgr24_X_c;
2505 case PIX_FMT_RGB565LE:
2506 case PIX_FMT_RGB565BE:
2507 case PIX_FMT_BGR565LE:
2508 case PIX_FMT_BGR565BE:
2509 *yuv2packed1 = yuv2rgb16_1_c;
2510 *yuv2packed2 = yuv2rgb16_2_c;
2511 *yuv2packedX = yuv2rgb16_X_c;
2513 case PIX_FMT_RGB555LE:
2514 case PIX_FMT_RGB555BE:
2515 case PIX_FMT_BGR555LE:
2516 case PIX_FMT_BGR555BE:
2517 *yuv2packed1 = yuv2rgb15_1_c;
2518 *yuv2packed2 = yuv2rgb15_2_c;
2519 *yuv2packedX = yuv2rgb15_X_c;
2521 case PIX_FMT_RGB444LE:
2522 case PIX_FMT_RGB444BE:
2523 case PIX_FMT_BGR444LE:
2524 case PIX_FMT_BGR444BE:
2525 *yuv2packed1 = yuv2rgb12_1_c;
2526 *yuv2packed2 = yuv2rgb12_2_c;
2527 *yuv2packedX = yuv2rgb12_X_c;
2531 *yuv2packed1 = yuv2rgb8_1_c;
2532 *yuv2packed2 = yuv2rgb8_2_c;
2533 *yuv2packedX = yuv2rgb8_X_c;
2537 *yuv2packed1 = yuv2rgb4_1_c;
2538 *yuv2packed2 = yuv2rgb4_2_c;
2539 *yuv2packedX = yuv2rgb4_X_c;
2541 case PIX_FMT_RGB4_BYTE:
2542 case PIX_FMT_BGR4_BYTE:
2543 *yuv2packed1 = yuv2rgb4b_1_c;
2544 *yuv2packed2 = yuv2rgb4b_2_c;
2545 *yuv2packedX = yuv2rgb4b_X_c;
2551 #define DEBUG_SWSCALE_BUFFERS 0
2552 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2554 static int swScale(SwsContext *c, const uint8_t* src[],
2555 int srcStride[], int srcSliceY,
2556 int srcSliceH, uint8_t* dst[], int dstStride[])
2558 /* load a few things into local vars to make the code more readable? and faster */
2559 const int srcW= c->srcW;
2560 const int dstW= c->dstW;
2561 const int dstH= c->dstH;
2562 const int chrDstW= c->chrDstW;
2563 const int chrSrcW= c->chrSrcW;
2564 const int lumXInc= c->lumXInc;
2565 const int chrXInc= c->chrXInc;
2566 const enum PixelFormat dstFormat= c->dstFormat;
2567 const int flags= c->flags;
2568 int16_t *vLumFilterPos= c->vLumFilterPos;
2569 int16_t *vChrFilterPos= c->vChrFilterPos;
2570 int16_t *hLumFilterPos= c->hLumFilterPos;
2571 int16_t *hChrFilterPos= c->hChrFilterPos;
2572 int16_t *vLumFilter= c->vLumFilter;
2573 int16_t *vChrFilter= c->vChrFilter;
2574 int16_t *hLumFilter= c->hLumFilter;
2575 int16_t *hChrFilter= c->hChrFilter;
2576 int32_t *lumMmxFilter= c->lumMmxFilter;
2577 int32_t *chrMmxFilter= c->chrMmxFilter;
2578 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2579 const int vLumFilterSize= c->vLumFilterSize;
2580 const int vChrFilterSize= c->vChrFilterSize;
2581 const int hLumFilterSize= c->hLumFilterSize;
2582 const int hChrFilterSize= c->hChrFilterSize;
2583 int16_t **lumPixBuf= c->lumPixBuf;
2584 int16_t **chrUPixBuf= c->chrUPixBuf;
2585 int16_t **chrVPixBuf= c->chrVPixBuf;
2586 int16_t **alpPixBuf= c->alpPixBuf;
2587 const int vLumBufSize= c->vLumBufSize;
2588 const int vChrBufSize= c->vChrBufSize;
2589 uint8_t *formatConvBuffer= c->formatConvBuffer;
2590 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2591 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2593 uint32_t *pal=c->pal_yuv;
2595 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2596 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2597 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2598 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2599 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2600 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2602 /* vars which will change and which we need to store back in the context */
2604 int lumBufIndex= c->lumBufIndex;
2605 int chrBufIndex= c->chrBufIndex;
2606 int lastInLumBuf= c->lastInLumBuf;
2607 int lastInChrBuf= c->lastInChrBuf;
2609 if (isPacked(c->srcFormat)) {
2617 srcStride[3]= srcStride[0];
2619 srcStride[1]<<= c->vChrDrop;
2620 srcStride[2]<<= c->vChrDrop;
2622 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2623 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2624 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2625 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2626 srcSliceY, srcSliceH, dstY, dstH);
2627 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2628 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2630 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2631 static int warnedAlready=0; //FIXME move this into the context perhaps
2632 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2633 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2634 " ->cannot do aligned memory accesses anymore\n");
2639 /* Note the user might start scaling the picture in the middle so this
2640 will not get executed. This is not really intended but works
2641 currently, so people might do it. */
2642 if (srcSliceY ==0) {
2650 if (!should_dither) {
2651 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2655 for (;dstY < dstH; dstY++) {
2656 const int chrDstY= dstY>>c->chrDstVSubSample;
2657 uint8_t *dest[4] = {
2658 dst[0] + dstStride[0] * dstY,
2659 dst[1] + dstStride[1] * chrDstY,
2660 dst[2] + dstStride[2] * chrDstY,
2661 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2664 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2665 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2666 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2667 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2668 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2669 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2672 //handle holes (FAST_BILINEAR & weird filters)
2673 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2674 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2675 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2676 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2678 DEBUG_BUFFERS("dstY: %d\n", dstY);
2679 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2680 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2681 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2682 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2684 // Do we have enough lines in this slice to output the dstY line
2685 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2687 if (!enough_lines) {
2688 lastLumSrcY = srcSliceY + srcSliceH - 1;
2689 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2690 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2691 lastLumSrcY, lastChrSrcY);
2694 //Do horizontal scaling
2695 while(lastInLumBuf < lastLumSrcY) {
2696 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2697 const uint8_t *src2= src[1]+(lastInLumBuf + 1 - srcSliceY)*srcStride[1];
2698 const uint8_t *src3= src[2]+(lastInLumBuf + 1 - srcSliceY)*srcStride[2];
2699 const uint8_t *src4= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2701 assert(lumBufIndex < 2*vLumBufSize);
2702 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2703 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2704 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, src2, src3, srcW, lumXInc,
2705 hLumFilter, hLumFilterPos, hLumFilterSize,
2708 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2709 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src4, NULL, NULL, srcW,
2710 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2714 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2715 lumBufIndex, lastInLumBuf);
2717 while(lastInChrBuf < lastChrSrcY) {
2718 const uint8_t *src0= src[0]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[0];
2719 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2720 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2722 assert(chrBufIndex < 2*vChrBufSize);
2723 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2724 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2725 //FIXME replace parameters through context struct (some at least)
2727 if (c->needs_hcscale)
2728 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2729 chrDstW, src0, src1, src2, chrSrcW, chrXInc,
2730 hChrFilter, hChrFilterPos, hChrFilterSize,
2731 formatConvBuffer, pal);
2733 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2734 chrBufIndex, lastInChrBuf);
2736 //wrap buf index around to stay inside the ring buffer
2737 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2738 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2740 break; //we can't output a dstY line so let's try with the next slice
2743 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2745 if (should_dither) {
2746 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2747 c->lumDither8 = dither_8x8_128[dstY & 7];
2749 if (dstY >= dstH-2) {
2750 // hmm looks like we can't use MMX here without overwriting this array's tail
2751 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2752 &yuv2packed1, &yuv2packed2,
2757 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2758 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2759 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2760 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2762 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2763 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2764 if ((dstY&chrSkipMask) || isGray(dstFormat))
2765 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2766 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2767 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2768 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2769 dest, dstW, chrDstW);
2770 } else { //General YV12
2771 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2772 lumSrcPtr, vLumFilterSize,
2773 vChrFilter + chrDstY * vChrFilterSize,
2774 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2775 alpSrcPtr, dest, dstW, chrDstW);
2778 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2779 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2780 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2781 int chrAlpha = vChrFilter[2 * dstY + 1];
2782 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2783 alpPixBuf ? *alpSrcPtr : NULL,
2784 dest[0], dstW, chrAlpha, dstY);
2785 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2786 int lumAlpha = vLumFilter[2 * dstY + 1];
2787 int chrAlpha = vChrFilter[2 * dstY + 1];
2789 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2791 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2792 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2793 alpPixBuf ? alpSrcPtr : NULL,
2794 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2795 } else { //general RGB
2796 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2797 lumSrcPtr, vLumFilterSize,
2798 vChrFilter + dstY * vChrFilterSize,
2799 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2800 alpSrcPtr, dest[0], dstW, dstY);
2806 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2807 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2810 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2811 __asm__ volatile("sfence":::"memory");
2815 /* store changed local vars back in the context */
2817 c->lumBufIndex= lumBufIndex;
2818 c->chrBufIndex= chrBufIndex;
2819 c->lastInLumBuf= lastInLumBuf;
2820 c->lastInChrBuf= lastInChrBuf;
2822 return dstY - lastDstY;
2825 static av_cold void sws_init_swScale_c(SwsContext *c)
2827 enum PixelFormat srcFormat = c->srcFormat;
2829 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2830 &c->yuv2packed1, &c->yuv2packed2,
2833 c->chrToYV12 = NULL;
2835 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2836 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2837 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2838 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2842 case PIX_FMT_BGR4_BYTE:
2843 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2845 case PIX_FMT_YUV444P9LE:
2846 case PIX_FMT_YUV420P9LE:
2847 case PIX_FMT_YUV422P10LE:
2848 case PIX_FMT_YUV420P10LE:
2849 case PIX_FMT_YUV444P10LE:
2850 case PIX_FMT_YUV420P16LE:
2851 case PIX_FMT_YUV422P16LE:
2852 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2854 case PIX_FMT_YUV444P9BE:
2855 case PIX_FMT_YUV420P9BE:
2856 case PIX_FMT_YUV444P10BE:
2857 case PIX_FMT_YUV422P10BE:
2858 case PIX_FMT_YUV420P10BE:
2859 case PIX_FMT_YUV420P16BE:
2860 case PIX_FMT_YUV422P16BE:
2861 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2864 if (c->chrSrcHSubSample) {
2866 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2867 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2868 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2869 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2870 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2871 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2872 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2873 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2874 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2875 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2876 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2877 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2878 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2879 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2880 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2881 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2882 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2883 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2884 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
2888 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2889 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2890 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2891 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2892 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2893 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2894 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2895 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2896 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2897 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2898 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2899 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2900 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2901 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2902 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2903 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2904 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2905 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2906 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_c; break;
2910 c->lumToYV12 = NULL;
2911 c->alpToYV12 = NULL;
2912 switch (srcFormat) {
2914 case PIX_FMT_YUV444P9LE:
2915 case PIX_FMT_YUV420P9LE:
2916 case PIX_FMT_YUV422P10LE:
2917 case PIX_FMT_YUV420P10LE:
2918 case PIX_FMT_YUV444P10LE:
2919 case PIX_FMT_YUV420P16LE:
2920 case PIX_FMT_YUV422P16LE:
2921 case PIX_FMT_YUV444P16LE:
2922 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2924 case PIX_FMT_YUV444P9BE:
2925 case PIX_FMT_YUV420P9BE:
2926 case PIX_FMT_YUV444P10BE:
2927 case PIX_FMT_YUV422P10BE:
2928 case PIX_FMT_YUV420P10BE:
2929 case PIX_FMT_YUV420P16BE:
2930 case PIX_FMT_YUV422P16BE:
2931 case PIX_FMT_YUV444P16BE:
2932 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2934 case PIX_FMT_YUYV422 :
2935 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2936 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2937 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2938 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2939 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2940 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2941 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2942 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2943 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2944 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2945 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2946 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2950 case PIX_FMT_BGR4_BYTE:
2951 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2952 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2953 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2954 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2955 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2956 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2957 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2958 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2959 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2960 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2961 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2962 case PIX_FMT_GBR24P : c->lumToYV12 = gbr24pToY_c ; break;
2965 switch (srcFormat) {
2967 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2969 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2970 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2971 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2976 if (c->srcBpc == 8) {
2977 if (c->dstBpc <= 10) {
2978 c->hyScale = c->hcScale = hScale8To15_c;
2979 if (c->flags & SWS_FAST_BILINEAR) {
2980 c->hyscale_fast = hyscale_fast_c;
2981 c->hcscale_fast = hcscale_fast_c;
2984 c->hyScale = c->hcScale = hScale8To19_c;
2987 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2990 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2991 if (c->dstBpc <= 10) {
2993 c->lumConvertRange = lumRangeFromJpeg_c;
2994 c->chrConvertRange = chrRangeFromJpeg_c;
2996 c->lumConvertRange = lumRangeToJpeg_c;
2997 c->chrConvertRange = chrRangeToJpeg_c;
3001 c->lumConvertRange = lumRangeFromJpeg16_c;
3002 c->chrConvertRange = chrRangeFromJpeg16_c;
3004 c->lumConvertRange = lumRangeToJpeg16_c;
3005 c->chrConvertRange = chrRangeToJpeg16_c;
3010 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
3011 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
3012 c->needs_hcscale = 1;
3015 SwsFunc ff_getSwsFunc(SwsContext *c)
3017 sws_init_swScale_c(c);
3020 ff_sws_init_swScale_mmx(c);
3022 ff_sws_init_swScale_altivec(c);