2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 static av_always_inline void
294 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
295 int lumFilterSize, const int16_t *chrFilter,
296 const int32_t **chrUSrc, const int32_t **chrVSrc,
297 int chrFilterSize, const int32_t **alpSrc,
298 uint16_t *dest[4], int dstW, int chrDstW,
299 int big_endian, int output_bits)
301 //FIXME Optimize (just quickly written not optimized..)
303 int dword= output_bits == 16;
304 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
305 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
306 int shift = 11 + 4*dword + 16 - output_bits - 1;
308 #define output_pixel(pos, val) \
310 AV_WB16(pos, av_clip_uint16(val >> shift)); \
312 AV_WL16(pos, av_clip_uint16(val >> shift)); \
314 for (i = 0; i < dstW; i++) {
315 int val = 1 << (26-output_bits + 4*dword - 1);
318 for (j = 0; j < lumFilterSize; j++)
319 val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
321 output_pixel(&yDest[i], val);
325 for (i = 0; i < chrDstW; i++) {
326 int u = 1 << (26-output_bits + 4*dword - 1);
327 int v = 1 << (26-output_bits + 4*dword - 1);
330 for (j = 0; j < chrFilterSize; j++) {
331 u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
332 v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
335 output_pixel(&uDest[i], u);
336 output_pixel(&vDest[i], v);
340 if (CONFIG_SWSCALE_ALPHA && aDest) {
341 for (i = 0; i < dstW; i++) {
342 int val = 1 << (26-output_bits + 4*dword - 1);
345 for (j = 0; j < lumFilterSize; j++)
346 val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
348 output_pixel(&aDest[i], val);
354 static av_always_inline void
355 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
356 int lumFilterSize, const int16_t *chrFilter,
357 const int16_t **chrUSrc, const int16_t **chrVSrc,
358 int chrFilterSize, const int16_t **alpSrc,
359 uint16_t *dest[4], int dstW, int chrDstW,
360 int big_endian, int output_bits)
362 //FIXME Optimize (just quickly written not optimized..)
364 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
365 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
366 int shift = 11 + 16 - output_bits - 1;
368 #define output_pixel(pos, val) \
370 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
372 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
374 for (i = 0; i < dstW; i++) {
375 int val = 1 << (26-output_bits - 1);
378 for (j = 0; j < lumFilterSize; j++)
379 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
381 output_pixel(&yDest[i], val);
385 for (i = 0; i < chrDstW; i++) {
386 int u = 1 << (26-output_bits - 1);
387 int v = 1 << (26-output_bits - 1);
390 for (j = 0; j < chrFilterSize; j++) {
391 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
392 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
395 output_pixel(&uDest[i], u);
396 output_pixel(&vDest[i], v);
400 if (CONFIG_SWSCALE_ALPHA && aDest) {
401 for (i = 0; i < dstW; i++) {
402 int val = 1 << (26-output_bits - 1);
405 for (j = 0; j < lumFilterSize; j++)
406 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
408 output_pixel(&aDest[i], val);
414 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
415 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
416 const int16_t **_lumSrc, int lumFilterSize, \
417 const int16_t *chrFilter, const int16_t **_chrUSrc, \
418 const int16_t **_chrVSrc, \
419 int chrFilterSize, const int16_t **_alpSrc, \
420 uint8_t *_dest[4], int dstW, int chrDstW) \
422 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
423 **chrUSrc = (const typeX_t **) _chrUSrc, \
424 **chrVSrc = (const typeX_t **) _chrVSrc, \
425 **alpSrc = (const typeX_t **) _alpSrc; \
426 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
427 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
428 alpSrc, (uint16_t **) _dest, \
429 dstW, chrDstW, is_be, bits); \
431 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
432 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
433 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
434 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
435 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
436 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
438 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
439 const int16_t **lumSrc, int lumFilterSize,
440 const int16_t *chrFilter, const int16_t **chrUSrc,
441 const int16_t **chrVSrc,
442 int chrFilterSize, const int16_t **alpSrc,
443 uint8_t *dest[4], int dstW, int chrDstW)
445 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
446 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
448 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
450 //FIXME Optimize (just quickly written not optimized..)
451 for (i=0; i<dstW; i++) {
452 int val = lumDither[i & 7] << 12;
454 for (j=0; j<lumFilterSize; j++)
455 val += lumSrc[j][i] * lumFilter[j];
457 yDest[i]= av_clip_uint8(val>>19);
461 for (i=0; i<chrDstW; i++) {
462 int u = chrDither[i & 7] << 12;
463 int v = chrDither[(i + 3) & 7] << 12;
465 for (j=0; j<chrFilterSize; j++) {
466 u += chrUSrc[j][i] * chrFilter[j];
467 v += chrVSrc[j][i] * chrFilter[j];
470 uDest[i]= av_clip_uint8(u>>19);
471 vDest[i]= av_clip_uint8(v>>19);
474 if (CONFIG_SWSCALE_ALPHA && aDest)
475 for (i=0; i<dstW; i++) {
476 int val = lumDither[i & 7] << 12;
478 for (j=0; j<lumFilterSize; j++)
479 val += alpSrc[j][i] * lumFilter[j];
481 aDest[i]= av_clip_uint8(val>>19);
485 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
486 const int16_t *chrUSrc, const int16_t *chrVSrc,
487 const int16_t *alpSrc,
488 uint8_t *dest[4], int dstW, int chrDstW)
490 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
491 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
493 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
495 for (i=0; i<dstW; i++) {
496 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
497 yDest[i]= av_clip_uint8(val);
501 for (i=0; i<chrDstW; i++) {
502 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
503 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
504 uDest[i]= av_clip_uint8(u);
505 vDest[i]= av_clip_uint8(v);
508 if (CONFIG_SWSCALE_ALPHA && aDest)
509 for (i=0; i<dstW; i++) {
510 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
511 aDest[i]= av_clip_uint8(val);
515 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
516 const int16_t **lumSrc, int lumFilterSize,
517 const int16_t *chrFilter, const int16_t **chrUSrc,
518 const int16_t **chrVSrc, int chrFilterSize,
519 const int16_t **alpSrc, uint8_t *dest[4],
520 int dstW, int chrDstW)
522 uint8_t *yDest = dest[0], *uDest = dest[1];
523 enum PixelFormat dstFormat = c->dstFormat;
524 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
526 //FIXME Optimize (just quickly written not optimized..)
528 for (i=0; i<dstW; i++) {
529 int val = lumDither[i & 7] << 12;
531 for (j=0; j<lumFilterSize; j++)
532 val += lumSrc[j][i] * lumFilter[j];
534 yDest[i]= av_clip_uint8(val>>19);
540 if (dstFormat == PIX_FMT_NV12)
541 for (i=0; i<chrDstW; i++) {
542 int u = chrDither[i & 7] << 12;
543 int v = chrDither[(i + 3) & 7] << 12;
545 for (j=0; j<chrFilterSize; j++) {
546 u += chrUSrc[j][i] * chrFilter[j];
547 v += chrVSrc[j][i] * chrFilter[j];
550 uDest[2*i]= av_clip_uint8(u>>19);
551 uDest[2*i+1]= av_clip_uint8(v>>19);
554 for (i=0; i<chrDstW; i++) {
555 int u = chrDither[i & 7] << 12;
556 int v = chrDither[(i + 3) & 7] << 12;
558 for (j=0; j<chrFilterSize; j++) {
559 u += chrUSrc[j][i] * chrFilter[j];
560 v += chrVSrc[j][i] * chrFilter[j];
563 uDest[2*i]= av_clip_uint8(v>>19);
564 uDest[2*i+1]= av_clip_uint8(u>>19);
568 #define output_pixel(pos, val) \
569 if (target == PIX_FMT_GRAY16BE) { \
575 static av_always_inline void
576 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
577 const int32_t **lumSrc, int lumFilterSize,
578 const int16_t *chrFilter, const int32_t **chrUSrc,
579 const int32_t **chrVSrc, int chrFilterSize,
580 const int32_t **alpSrc, uint16_t *dest, int dstW,
581 int y, enum PixelFormat target)
585 for (i = 0; i < (dstW >> 1); i++) {
590 for (j = 0; j < lumFilterSize; j++) {
591 Y1 += lumSrc[j][i * 2] * lumFilter[j];
592 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
596 if ((Y1 | Y2) & 0x10000) {
597 Y1 = av_clip_uint16(Y1);
598 Y2 = av_clip_uint16(Y2);
600 output_pixel(&dest[i * 2 + 0], Y1);
601 output_pixel(&dest[i * 2 + 1], Y2);
605 static av_always_inline void
606 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
607 const int32_t *ubuf[2], const int32_t *vbuf[2],
608 const int32_t *abuf[2], uint16_t *dest, int dstW,
609 int yalpha, int uvalpha, int y,
610 enum PixelFormat target)
612 int yalpha1 = 4095 - yalpha;
614 const int32_t *buf0 = buf[0], *buf1 = buf[1];
616 for (i = 0; i < (dstW >> 1); i++) {
617 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
618 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
620 output_pixel(&dest[i * 2 + 0], Y1);
621 output_pixel(&dest[i * 2 + 1], Y2);
625 static av_always_inline void
626 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
627 const int32_t *ubuf[2], const int32_t *vbuf[2],
628 const int32_t *abuf0, uint16_t *dest, int dstW,
629 int uvalpha, int y, enum PixelFormat target)
633 for (i = 0; i < (dstW >> 1); i++) {
634 int Y1 = (buf0[i * 2 ]+4)>>3;
635 int Y2 = (buf0[i * 2 + 1]+4)>>3;
637 output_pixel(&dest[i * 2 + 0], Y1);
638 output_pixel(&dest[i * 2 + 1], Y2);
644 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
645 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
646 const int16_t **_lumSrc, int lumFilterSize, \
647 const int16_t *chrFilter, const int16_t **_chrUSrc, \
648 const int16_t **_chrVSrc, int chrFilterSize, \
649 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
652 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
653 **chrUSrc = (const int32_t **) _chrUSrc, \
654 **chrVSrc = (const int32_t **) _chrVSrc, \
655 **alpSrc = (const int32_t **) _alpSrc; \
656 uint16_t *dest = (uint16_t *) _dest; \
657 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
658 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
659 alpSrc, dest, dstW, y, fmt); \
662 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
663 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
664 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
665 int yalpha, int uvalpha, int y) \
667 const int32_t **buf = (const int32_t **) _buf, \
668 **ubuf = (const int32_t **) _ubuf, \
669 **vbuf = (const int32_t **) _vbuf, \
670 **abuf = (const int32_t **) _abuf; \
671 uint16_t *dest = (uint16_t *) _dest; \
672 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
673 dest, dstW, yalpha, uvalpha, y, fmt); \
676 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
677 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
678 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
679 int uvalpha, int y) \
681 const int32_t *buf0 = (const int32_t *) _buf0, \
682 **ubuf = (const int32_t **) _ubuf, \
683 **vbuf = (const int32_t **) _vbuf, \
684 *abuf0 = (const int32_t *) _abuf0; \
685 uint16_t *dest = (uint16_t *) _dest; \
686 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
687 dstW, uvalpha, y, fmt); \
690 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
691 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
693 #define output_pixel(pos, acc) \
694 if (target == PIX_FMT_MONOBLACK) { \
700 static av_always_inline void
701 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
702 const int16_t **lumSrc, int lumFilterSize,
703 const int16_t *chrFilter, const int16_t **chrUSrc,
704 const int16_t **chrVSrc, int chrFilterSize,
705 const int16_t **alpSrc, uint8_t *dest, int dstW,
706 int y, enum PixelFormat target)
708 const uint8_t * const d128=dither_8x8_220[y&7];
709 uint8_t *g = c->table_gU[128] + c->table_gV[128];
713 for (i = 0; i < dstW - 1; i += 2) {
718 for (j = 0; j < lumFilterSize; j++) {
719 Y1 += lumSrc[j][i] * lumFilter[j];
720 Y2 += lumSrc[j][i+1] * lumFilter[j];
724 if ((Y1 | Y2) & 0x100) {
725 Y1 = av_clip_uint8(Y1);
726 Y2 = av_clip_uint8(Y2);
728 acc += acc + g[Y1 + d128[(i + 0) & 7]];
729 acc += acc + g[Y2 + d128[(i + 1) & 7]];
731 output_pixel(*dest++, acc);
736 static av_always_inline void
737 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
738 const int16_t *ubuf[2], const int16_t *vbuf[2],
739 const int16_t *abuf[2], uint8_t *dest, int dstW,
740 int yalpha, int uvalpha, int y,
741 enum PixelFormat target)
743 const int16_t *buf0 = buf[0], *buf1 = buf[1];
744 const uint8_t * const d128 = dither_8x8_220[y & 7];
745 uint8_t *g = c->table_gU[128] + c->table_gV[128];
746 int yalpha1 = 4095 - yalpha;
749 for (i = 0; i < dstW - 7; i += 8) {
750 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
751 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
752 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
753 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
754 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
755 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
756 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
757 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
758 output_pixel(*dest++, acc);
762 static av_always_inline void
763 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
764 const int16_t *ubuf[2], const int16_t *vbuf[2],
765 const int16_t *abuf0, uint8_t *dest, int dstW,
766 int uvalpha, int y, enum PixelFormat target)
768 const uint8_t * const d128 = dither_8x8_220[y & 7];
769 uint8_t *g = c->table_gU[128] + c->table_gV[128];
772 for (i = 0; i < dstW - 7; i += 8) {
773 int acc = g[(buf0[i ] >> 7) + d128[0]];
774 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
775 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
776 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
777 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
778 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
779 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
780 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
781 output_pixel(*dest++, acc);
787 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
788 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
789 const int16_t **lumSrc, int lumFilterSize, \
790 const int16_t *chrFilter, const int16_t **chrUSrc, \
791 const int16_t **chrVSrc, int chrFilterSize, \
792 const int16_t **alpSrc, uint8_t *dest, int dstW, \
795 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
796 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
797 alpSrc, dest, dstW, y, fmt); \
800 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
801 const int16_t *ubuf[2], const int16_t *vbuf[2], \
802 const int16_t *abuf[2], uint8_t *dest, int dstW, \
803 int yalpha, int uvalpha, int y) \
805 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
806 dest, dstW, yalpha, uvalpha, y, fmt); \
809 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
810 const int16_t *ubuf[2], const int16_t *vbuf[2], \
811 const int16_t *abuf0, uint8_t *dest, int dstW, \
812 int uvalpha, int y) \
814 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
815 abuf0, dest, dstW, uvalpha, \
819 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
820 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
822 #define output_pixels(pos, Y1, U, Y2, V) \
823 if (target == PIX_FMT_YUYV422) { \
824 dest[pos + 0] = Y1; \
826 dest[pos + 2] = Y2; \
830 dest[pos + 1] = Y1; \
832 dest[pos + 3] = Y2; \
835 static av_always_inline void
836 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
837 const int16_t **lumSrc, int lumFilterSize,
838 const int16_t *chrFilter, const int16_t **chrUSrc,
839 const int16_t **chrVSrc, int chrFilterSize,
840 const int16_t **alpSrc, uint8_t *dest, int dstW,
841 int y, enum PixelFormat target)
845 for (i = 0; i < (dstW >> 1); i++) {
852 for (j = 0; j < lumFilterSize; j++) {
853 Y1 += lumSrc[j][i * 2] * lumFilter[j];
854 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
856 for (j = 0; j < chrFilterSize; j++) {
857 U += chrUSrc[j][i] * chrFilter[j];
858 V += chrVSrc[j][i] * chrFilter[j];
864 if ((Y1 | Y2 | U | V) & 0x100) {
865 Y1 = av_clip_uint8(Y1);
866 Y2 = av_clip_uint8(Y2);
867 U = av_clip_uint8(U);
868 V = av_clip_uint8(V);
870 output_pixels(4*i, Y1, U, Y2, V);
874 static av_always_inline void
875 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
876 const int16_t *ubuf[2], const int16_t *vbuf[2],
877 const int16_t *abuf[2], uint8_t *dest, int dstW,
878 int yalpha, int uvalpha, int y,
879 enum PixelFormat target)
881 const int16_t *buf0 = buf[0], *buf1 = buf[1],
882 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
883 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
884 int yalpha1 = 4095 - yalpha;
885 int uvalpha1 = 4095 - uvalpha;
888 for (i = 0; i < (dstW >> 1); i++) {
889 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
890 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
891 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
892 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
894 output_pixels(i * 4, Y1, U, Y2, V);
898 static av_always_inline void
899 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
900 const int16_t *ubuf[2], const int16_t *vbuf[2],
901 const int16_t *abuf0, uint8_t *dest, int dstW,
902 int uvalpha, int y, enum PixelFormat target)
904 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
905 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
908 if (uvalpha < 2048) {
909 for (i = 0; i < (dstW >> 1); i++) {
910 int Y1 = buf0[i * 2] >> 7;
911 int Y2 = buf0[i * 2 + 1] >> 7;
912 int U = ubuf1[i] >> 7;
913 int V = vbuf1[i] >> 7;
915 output_pixels(i * 4, Y1, U, Y2, V);
918 for (i = 0; i < (dstW >> 1); i++) {
919 int Y1 = buf0[i * 2] >> 7;
920 int Y2 = buf0[i * 2 + 1] >> 7;
921 int U = (ubuf0[i] + ubuf1[i]) >> 8;
922 int V = (vbuf0[i] + vbuf1[i]) >> 8;
924 output_pixels(i * 4, Y1, U, Y2, V);
931 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
932 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
934 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
935 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
936 #define output_pixel(pos, val) \
937 if (isBE(target)) { \
943 static av_always_inline void
944 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
945 const int32_t **lumSrc, int lumFilterSize,
946 const int16_t *chrFilter, const int32_t **chrUSrc,
947 const int32_t **chrVSrc, int chrFilterSize,
948 const int32_t **alpSrc, uint16_t *dest, int dstW,
949 int y, enum PixelFormat target)
953 for (i = 0; i < (dstW >> 1); i++) {
957 int U = -128 << 23; // 19
961 for (j = 0; j < lumFilterSize; j++) {
962 Y1 += lumSrc[j][i * 2] * lumFilter[j];
963 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
965 for (j = 0; j < chrFilterSize; j++) {
966 U += chrUSrc[j][i] * chrFilter[j];
967 V += chrVSrc[j][i] * chrFilter[j];
970 // 8bit: 12+15=27; 16-bit: 12+19=31
976 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
977 Y1 -= c->yuv2rgb_y_offset;
978 Y2 -= c->yuv2rgb_y_offset;
979 Y1 *= c->yuv2rgb_y_coeff;
980 Y2 *= c->yuv2rgb_y_coeff;
983 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
985 R = V * c->yuv2rgb_v2r_coeff;
986 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
987 B = U * c->yuv2rgb_u2b_coeff;
989 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
990 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
991 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
992 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
993 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
994 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
995 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1000 static av_always_inline void
1001 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
1002 const int32_t *ubuf[2], const int32_t *vbuf[2],
1003 const int32_t *abuf[2], uint16_t *dest, int dstW,
1004 int yalpha, int uvalpha, int y,
1005 enum PixelFormat target)
1007 const int32_t *buf0 = buf[0], *buf1 = buf[1],
1008 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1009 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1010 int yalpha1 = 4095 - yalpha;
1011 int uvalpha1 = 4095 - uvalpha;
1014 for (i = 0; i < (dstW >> 1); i++) {
1015 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
1016 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
1017 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
1018 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
1021 Y1 -= c->yuv2rgb_y_offset;
1022 Y2 -= c->yuv2rgb_y_offset;
1023 Y1 *= c->yuv2rgb_y_coeff;
1024 Y2 *= c->yuv2rgb_y_coeff;
1028 R = V * c->yuv2rgb_v2r_coeff;
1029 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1030 B = U * c->yuv2rgb_u2b_coeff;
1032 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1033 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1034 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1035 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1036 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1037 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1042 static av_always_inline void
1043 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
1044 const int32_t *ubuf[2], const int32_t *vbuf[2],
1045 const int32_t *abuf0, uint16_t *dest, int dstW,
1046 int uvalpha, int y, enum PixelFormat target)
1048 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1049 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1052 if (uvalpha < 2048) {
1053 for (i = 0; i < (dstW >> 1); i++) {
1054 int Y1 = (buf0[i * 2] ) >> 2;
1055 int Y2 = (buf0[i * 2 + 1]) >> 2;
1056 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1057 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1060 Y1 -= c->yuv2rgb_y_offset;
1061 Y2 -= c->yuv2rgb_y_offset;
1062 Y1 *= c->yuv2rgb_y_coeff;
1063 Y2 *= c->yuv2rgb_y_coeff;
1067 R = V * c->yuv2rgb_v2r_coeff;
1068 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1069 B = U * c->yuv2rgb_u2b_coeff;
1071 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1072 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1073 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1074 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1075 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1076 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1080 for (i = 0; i < (dstW >> 1); i++) {
1081 int Y1 = (buf0[i * 2] ) >> 2;
1082 int Y2 = (buf0[i * 2 + 1]) >> 2;
1083 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
1084 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
1087 Y1 -= c->yuv2rgb_y_offset;
1088 Y2 -= c->yuv2rgb_y_offset;
1089 Y1 *= c->yuv2rgb_y_coeff;
1090 Y2 *= c->yuv2rgb_y_coeff;
1094 R = V * c->yuv2rgb_v2r_coeff;
1095 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1096 B = U * c->yuv2rgb_u2b_coeff;
1098 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1099 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1100 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1101 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1102 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1103 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1113 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1114 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1115 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1116 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1118 static av_always_inline void
1119 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1120 int U, int V, int A1, int A2,
1121 const void *_r, const void *_g, const void *_b, int y,
1122 enum PixelFormat target, int hasAlpha)
1124 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1125 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1126 uint32_t *dest = (uint32_t *) _dest;
1127 const uint32_t *r = (const uint32_t *) _r;
1128 const uint32_t *g = (const uint32_t *) _g;
1129 const uint32_t *b = (const uint32_t *) _b;
1132 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1134 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1135 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1138 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1140 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1141 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1143 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1144 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1147 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1148 uint8_t *dest = (uint8_t *) _dest;
1149 const uint8_t *r = (const uint8_t *) _r;
1150 const uint8_t *g = (const uint8_t *) _g;
1151 const uint8_t *b = (const uint8_t *) _b;
1153 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1154 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1156 dest[i * 6 + 0] = r_b[Y1];
1157 dest[i * 6 + 1] = g[Y1];
1158 dest[i * 6 + 2] = b_r[Y1];
1159 dest[i * 6 + 3] = r_b[Y2];
1160 dest[i * 6 + 4] = g[Y2];
1161 dest[i * 6 + 5] = b_r[Y2];
1164 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1165 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1166 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1167 uint16_t *dest = (uint16_t *) _dest;
1168 const uint16_t *r = (const uint16_t *) _r;
1169 const uint16_t *g = (const uint16_t *) _g;
1170 const uint16_t *b = (const uint16_t *) _b;
1171 int dr1, dg1, db1, dr2, dg2, db2;
1173 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1174 dr1 = dither_2x2_8[ y & 1 ][0];
1175 dg1 = dither_2x2_4[ y & 1 ][0];
1176 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1177 dr2 = dither_2x2_8[ y & 1 ][1];
1178 dg2 = dither_2x2_4[ y & 1 ][1];
1179 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1180 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1181 dr1 = dither_2x2_8[ y & 1 ][0];
1182 dg1 = dither_2x2_8[ y & 1 ][1];
1183 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1184 dr2 = dither_2x2_8[ y & 1 ][1];
1185 dg2 = dither_2x2_8[ y & 1 ][0];
1186 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1188 dr1 = dither_4x4_16[ y & 3 ][0];
1189 dg1 = dither_4x4_16[ y & 3 ][1];
1190 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1191 dr2 = dither_4x4_16[ y & 3 ][1];
1192 dg2 = dither_4x4_16[ y & 3 ][0];
1193 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1196 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1197 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1198 } else /* 8/4-bit */ {
1199 uint8_t *dest = (uint8_t *) _dest;
1200 const uint8_t *r = (const uint8_t *) _r;
1201 const uint8_t *g = (const uint8_t *) _g;
1202 const uint8_t *b = (const uint8_t *) _b;
1203 int dr1, dg1, db1, dr2, dg2, db2;
1205 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1206 const uint8_t * const d64 = dither_8x8_73[y & 7];
1207 const uint8_t * const d32 = dither_8x8_32[y & 7];
1208 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1209 db1 = d64[(i * 2 + 0) & 7];
1210 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1211 db2 = d64[(i * 2 + 1) & 7];
1213 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1214 const uint8_t * const d128 = dither_8x8_220[y & 7];
1215 dr1 = db1 = d128[(i * 2 + 0) & 7];
1216 dg1 = d64[(i * 2 + 0) & 7];
1217 dr2 = db2 = d128[(i * 2 + 1) & 7];
1218 dg2 = d64[(i * 2 + 1) & 7];
1221 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1222 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1223 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1225 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1226 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1231 static av_always_inline void
1232 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1233 const int16_t **lumSrc, int lumFilterSize,
1234 const int16_t *chrFilter, const int16_t **chrUSrc,
1235 const int16_t **chrVSrc, int chrFilterSize,
1236 const int16_t **alpSrc, uint8_t *dest, int dstW,
1237 int y, enum PixelFormat target, int hasAlpha)
1241 for (i = 0; i < (dstW >> 1); i++) {
1247 int av_unused A1, A2;
1248 const void *r, *g, *b;
1250 for (j = 0; j < lumFilterSize; j++) {
1251 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1252 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1254 for (j = 0; j < chrFilterSize; j++) {
1255 U += chrUSrc[j][i] * chrFilter[j];
1256 V += chrVSrc[j][i] * chrFilter[j];
1262 if ((Y1 | Y2 | U | V) & 0x100) {
1263 Y1 = av_clip_uint8(Y1);
1264 Y2 = av_clip_uint8(Y2);
1265 U = av_clip_uint8(U);
1266 V = av_clip_uint8(V);
1271 for (j = 0; j < lumFilterSize; j++) {
1272 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1273 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1277 if ((A1 | A2) & 0x100) {
1278 A1 = av_clip_uint8(A1);
1279 A2 = av_clip_uint8(A2);
1283 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1285 g = (c->table_gU[U] + c->table_gV[V]);
1288 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1289 r, g, b, y, target, hasAlpha);
1293 static av_always_inline void
1294 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1295 const int16_t *ubuf[2], const int16_t *vbuf[2],
1296 const int16_t *abuf[2], uint8_t *dest, int dstW,
1297 int yalpha, int uvalpha, int y,
1298 enum PixelFormat target, int hasAlpha)
1300 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1301 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1302 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1303 *abuf0 = hasAlpha ? abuf[0] : NULL,
1304 *abuf1 = hasAlpha ? abuf[1] : NULL;
1305 int yalpha1 = 4095 - yalpha;
1306 int uvalpha1 = 4095 - uvalpha;
1309 for (i = 0; i < (dstW >> 1); i++) {
1310 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1311 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1312 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1313 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1315 const void *r = c->table_rV[V],
1316 *g = (c->table_gU[U] + c->table_gV[V]),
1317 *b = c->table_bU[U];
1320 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1321 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1324 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1325 r, g, b, y, target, hasAlpha);
1329 static av_always_inline void
1330 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1331 const int16_t *ubuf[2], const int16_t *vbuf[2],
1332 const int16_t *abuf0, uint8_t *dest, int dstW,
1333 int uvalpha, int y, enum PixelFormat target,
1336 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1337 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1340 if (uvalpha < 2048) {
1341 for (i = 0; i < (dstW >> 1); i++) {
1342 int Y1 = buf0[i * 2] >> 7;
1343 int Y2 = buf0[i * 2 + 1] >> 7;
1344 int U = ubuf1[i] >> 7;
1345 int V = vbuf1[i] >> 7;
1347 const void *r = c->table_rV[V],
1348 *g = (c->table_gU[U] + c->table_gV[V]),
1349 *b = c->table_bU[U];
1352 A1 = abuf0[i * 2 ] >> 7;
1353 A2 = abuf0[i * 2 + 1] >> 7;
1356 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1357 r, g, b, y, target, hasAlpha);
1360 for (i = 0; i < (dstW >> 1); i++) {
1361 int Y1 = buf0[i * 2] >> 7;
1362 int Y2 = buf0[i * 2 + 1] >> 7;
1363 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1364 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1366 const void *r = c->table_rV[V],
1367 *g = (c->table_gU[U] + c->table_gV[V]),
1368 *b = c->table_bU[U];
1371 A1 = abuf0[i * 2 ] >> 7;
1372 A2 = abuf0[i * 2 + 1] >> 7;
1375 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1376 r, g, b, y, target, hasAlpha);
1381 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1382 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1383 const int16_t **lumSrc, int lumFilterSize, \
1384 const int16_t *chrFilter, const int16_t **chrUSrc, \
1385 const int16_t **chrVSrc, int chrFilterSize, \
1386 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1389 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1390 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1391 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1393 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1394 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1395 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1396 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1397 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1398 int yalpha, int uvalpha, int y) \
1400 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1401 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1404 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1405 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1406 const int16_t *abuf0, uint8_t *dest, int dstW, \
1407 int uvalpha, int y) \
1409 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1410 dstW, uvalpha, y, fmt, hasAlpha); \
1414 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1415 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1417 #if CONFIG_SWSCALE_ALPHA
1418 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1419 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1421 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1422 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1424 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1425 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1426 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1427 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1428 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1429 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1430 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1431 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1433 static av_always_inline void
1434 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1435 const int16_t **lumSrc, int lumFilterSize,
1436 const int16_t *chrFilter, const int16_t **chrUSrc,
1437 const int16_t **chrVSrc, int chrFilterSize,
1438 const int16_t **alpSrc, uint8_t *dest,
1439 int dstW, int y, enum PixelFormat target, int hasAlpha)
1442 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1444 for (i = 0; i < dstW; i++) {
1447 int U = (1<<9)-(128 << 19);
1448 int V = (1<<9)-(128 << 19);
1452 for (j = 0; j < lumFilterSize; j++) {
1453 Y += lumSrc[j][i] * lumFilter[j];
1455 for (j = 0; j < chrFilterSize; j++) {
1456 U += chrUSrc[j][i] * chrFilter[j];
1457 V += chrVSrc[j][i] * chrFilter[j];
1464 for (j = 0; j < lumFilterSize; j++) {
1465 A += alpSrc[j][i] * lumFilter[j];
1469 A = av_clip_uint8(A);
1471 Y -= c->yuv2rgb_y_offset;
1472 Y *= c->yuv2rgb_y_coeff;
1474 R = Y + V*c->yuv2rgb_v2r_coeff;
1475 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1476 B = Y + U*c->yuv2rgb_u2b_coeff;
1477 if ((R | G | B) & 0xC0000000) {
1478 R = av_clip_uintp2(R, 30);
1479 G = av_clip_uintp2(G, 30);
1480 B = av_clip_uintp2(B, 30);
1485 dest[0] = hasAlpha ? A : 255;
1499 dest[3] = hasAlpha ? A : 255;
1502 dest[0] = hasAlpha ? A : 255;
1516 dest[3] = hasAlpha ? A : 255;
1524 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1525 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1526 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1527 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1529 #if CONFIG_SWSCALE_ALPHA
1530 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1531 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1532 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1533 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1535 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1536 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1537 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1538 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1540 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1541 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1543 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1544 int width, int height,
1548 uint8_t *ptr = plane + stride*y;
1549 for (i=0; i<height; i++) {
1550 memset(ptr, val, width);
1555 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1557 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1558 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1560 static av_always_inline void
1561 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1562 enum PixelFormat origin)
1565 for (i = 0; i < width; i++) {
1566 unsigned int r_b = input_pixel(&src[i*3+0]);
1567 unsigned int g = input_pixel(&src[i*3+1]);
1568 unsigned int b_r = input_pixel(&src[i*3+2]);
1570 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1574 static av_always_inline void
1575 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1576 const uint16_t *src1, const uint16_t *src2,
1577 int width, enum PixelFormat origin)
1581 for (i = 0; i < width; i++) {
1582 int r_b = input_pixel(&src1[i*3+0]);
1583 int g = input_pixel(&src1[i*3+1]);
1584 int b_r = input_pixel(&src1[i*3+2]);
1586 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1587 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1591 static av_always_inline void
1592 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1593 const uint16_t *src1, const uint16_t *src2,
1594 int width, enum PixelFormat origin)
1598 for (i = 0; i < width; i++) {
1599 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1600 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1601 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1603 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1604 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1612 #define rgb48funcs(pattern, BE_LE, origin) \
1613 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1614 int width, uint32_t *unused) \
1616 const uint16_t *src = (const uint16_t *) _src; \
1617 uint16_t *dst = (uint16_t *) _dst; \
1618 rgb48ToY_c_template(dst, src, width, origin); \
1621 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1622 const uint8_t *_src1, const uint8_t *_src2, \
1623 int width, uint32_t *unused) \
1625 const uint16_t *src1 = (const uint16_t *) _src1, \
1626 *src2 = (const uint16_t *) _src2; \
1627 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1628 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1631 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1632 const uint8_t *_src1, const uint8_t *_src2, \
1633 int width, uint32_t *unused) \
1635 const uint16_t *src1 = (const uint16_t *) _src1, \
1636 *src2 = (const uint16_t *) _src2; \
1637 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1638 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1641 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1642 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1643 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1644 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1646 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1647 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1648 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1650 static av_always_inline void
1651 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1652 int width, enum PixelFormat origin,
1653 int shr, int shg, int shb, int shp,
1654 int maskr, int maskg, int maskb,
1655 int rsh, int gsh, int bsh, int S)
1657 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1658 rnd = (32<<((S)-1)) + (1<<(S-7));
1661 for (i = 0; i < width; i++) {
1662 int px = input_pixel(i) >> shp;
1663 int b = (px & maskb) >> shb;
1664 int g = (px & maskg) >> shg;
1665 int r = (px & maskr) >> shr;
1667 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1671 static av_always_inline void
1672 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1673 const uint8_t *src, int width,
1674 enum PixelFormat origin,
1675 int shr, int shg, int shb, int shp,
1676 int maskr, int maskg, int maskb,
1677 int rsh, int gsh, int bsh, int S)
1679 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1680 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1681 rnd = (256<<((S)-1)) + (1<<(S-7));
1684 for (i = 0; i < width; i++) {
1685 int px = input_pixel(i) >> shp;
1686 int b = (px & maskb) >> shb;
1687 int g = (px & maskg) >> shg;
1688 int r = (px & maskr) >> shr;
1690 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1691 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1695 static av_always_inline void
1696 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1697 const uint8_t *src, int width,
1698 enum PixelFormat origin,
1699 int shr, int shg, int shb, int shp,
1700 int maskr, int maskg, int maskb,
1701 int rsh, int gsh, int bsh, int S)
1703 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1704 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1705 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1708 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1709 for (i = 0; i < width; i++) {
1710 int px0 = input_pixel(2 * i + 0) >> shp;
1711 int px1 = input_pixel(2 * i + 1) >> shp;
1712 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1713 int rb = px0 + px1 - g;
1715 b = (rb & maskb) >> shb;
1716 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1717 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1720 g = (g & maskg) >> shg;
1722 r = (rb & maskr) >> shr;
1724 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1725 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1731 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1732 maskg, maskb, rsh, gsh, bsh, S) \
1733 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1734 int width, uint32_t *unused) \
1736 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1737 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1740 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1741 const uint8_t *src, const uint8_t *dummy, \
1742 int width, uint32_t *unused) \
1744 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1745 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1748 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1749 const uint8_t *src, const uint8_t *dummy, \
1750 int width, uint32_t *unused) \
1752 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1753 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1756 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1757 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1758 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1759 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1760 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1761 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1762 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1763 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1764 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1765 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1766 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1767 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1769 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1772 for (i=0; i<width; i++) {
1773 dst[i]= src[4*i]<<6;
1777 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1780 for (i=0; i<width; i++) {
1781 dst[i]= src[4*i+3]<<6;
1785 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1788 for (i=0; i<width; i++) {
1791 dst[i]= (pal[d] >> 24)<<6;
1795 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1798 for (i=0; i<width; i++) {
1801 dst[i]= (pal[d] & 0xFF)<<6;
1805 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1806 const uint8_t *src1, const uint8_t *src2,
1807 int width, uint32_t *pal)
1810 assert(src1 == src2);
1811 for (i=0; i<width; i++) {
1812 int p= pal[src1[i]];
1814 dstU[i]= (uint8_t)(p>> 8)<<6;
1815 dstV[i]= (uint8_t)(p>>16)<<6;
1819 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1822 for (i=0; i<width/8; i++) {
1825 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1829 for(j=0; j<(width&7); j++)
1830 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1834 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1837 for (i=0; i<width/8; i++) {
1840 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1844 for(j=0; j<(width&7); j++)
1845 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1849 //FIXME yuy2* can read up to 7 samples too much
1851 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1855 for (i=0; i<width; i++)
1859 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1860 const uint8_t *src2, int width, uint32_t *unused)
1863 for (i=0; i<width; i++) {
1864 dstU[i]= src1[4*i + 1];
1865 dstV[i]= src1[4*i + 3];
1867 assert(src1 == src2);
1870 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1873 const uint16_t *src = (const uint16_t *) _src;
1874 uint16_t *dst = (uint16_t *) _dst;
1875 for (i=0; i<width; i++) {
1876 dst[i] = av_bswap16(src[i]);
1880 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1881 const uint8_t *_src2, int width, uint32_t *unused)
1884 const uint16_t *src1 = (const uint16_t *) _src1,
1885 *src2 = (const uint16_t *) _src2;
1886 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1887 for (i=0; i<width; i++) {
1888 dstU[i] = av_bswap16(src1[i]);
1889 dstV[i] = av_bswap16(src2[i]);
1893 /* This is almost identical to the previous, end exists only because
1894 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1895 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1899 for (i=0; i<width; i++)
1903 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1904 const uint8_t *src2, int width, uint32_t *unused)
1907 for (i=0; i<width; i++) {
1908 dstU[i]= src1[4*i + 0];
1909 dstV[i]= src1[4*i + 2];
1911 assert(src1 == src2);
1914 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1915 const uint8_t *src, int width)
1918 for (i = 0; i < width; i++) {
1919 dst1[i] = src[2*i+0];
1920 dst2[i] = src[2*i+1];
1924 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1925 const uint8_t *src1, const uint8_t *src2,
1926 int width, uint32_t *unused)
1928 nvXXtoUV_c(dstU, dstV, src1, width);
1931 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1932 const uint8_t *src1, const uint8_t *src2,
1933 int width, uint32_t *unused)
1935 nvXXtoUV_c(dstV, dstU, src1, width);
1938 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1940 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1941 int width, uint32_t *unused)
1944 for (i=0; i<width; i++) {
1949 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1953 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1954 const uint8_t *src2, int width, uint32_t *unused)
1957 for (i=0; i<width; i++) {
1958 int b= src1[3*i + 0];
1959 int g= src1[3*i + 1];
1960 int r= src1[3*i + 2];
1962 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1963 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1965 assert(src1 == src2);
1968 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1969 const uint8_t *src2, int width, uint32_t *unused)
1972 for (i=0; i<width; i++) {
1973 int b= src1[6*i + 0] + src1[6*i + 3];
1974 int g= src1[6*i + 1] + src1[6*i + 4];
1975 int r= src1[6*i + 2] + src1[6*i + 5];
1977 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1978 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1980 assert(src1 == src2);
1983 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1987 for (i=0; i<width; i++) {
1992 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1996 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1997 const uint8_t *src2, int width, uint32_t *unused)
2001 for (i=0; i<width; i++) {
2002 int r= src1[3*i + 0];
2003 int g= src1[3*i + 1];
2004 int b= src1[3*i + 2];
2006 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2007 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2011 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
2012 const uint8_t *src2, int width, uint32_t *unused)
2016 for (i=0; i<width; i++) {
2017 int r= src1[6*i + 0] + src1[6*i + 3];
2018 int g= src1[6*i + 1] + src1[6*i + 4];
2019 int b= src1[6*i + 2] + src1[6*i + 5];
2021 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2022 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2026 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
2027 const int16_t *filter,
2028 const int16_t *filterPos, int filterSize)
2031 int32_t *dst = (int32_t *) _dst;
2032 const uint16_t *src = (const uint16_t *) _src;
2033 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2036 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2039 for (i = 0; i < dstW; i++) {
2041 int srcPos = filterPos[i];
2044 for (j = 0; j < filterSize; j++) {
2045 val += src[srcPos + j] * filter[filterSize * i + j];
2047 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2048 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2052 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2053 const int16_t *filter,
2054 const int16_t *filterPos, int filterSize)
2057 const uint16_t *src = (const uint16_t *) _src;
2058 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2061 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2063 for (i = 0; i < dstW; i++) {
2065 int srcPos = filterPos[i];
2068 for (j = 0; j < filterSize; j++) {
2069 val += src[srcPos + j] * filter[filterSize * i + j];
2071 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2072 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2076 // bilinear / bicubic scaling
2077 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2078 const int16_t *filter, const int16_t *filterPos,
2082 for (i=0; i<dstW; i++) {
2084 int srcPos= filterPos[i];
2086 for (j=0; j<filterSize; j++) {
2087 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2089 //filter += hFilterSize;
2090 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2095 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2096 const int16_t *filter, const int16_t *filterPos,
2100 int32_t *dst = (int32_t *) _dst;
2101 for (i=0; i<dstW; i++) {
2103 int srcPos= filterPos[i];
2105 for (j=0; j<filterSize; j++) {
2106 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2108 //filter += hFilterSize;
2109 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2114 //FIXME all pal and rgb srcFormats could do this convertion as well
2115 //FIXME all scalers more complex than bilinear could do half of this transform
2116 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2119 for (i = 0; i < width; i++) {
2120 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2121 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2124 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2127 for (i = 0; i < width; i++) {
2128 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2129 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2132 static void lumRangeToJpeg_c(int16_t *dst, int width)
2135 for (i = 0; i < width; i++)
2136 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2138 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2141 for (i = 0; i < width; i++)
2142 dst[i] = (dst[i]*14071 + 33561947)>>14;
2145 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2148 int32_t *dstU = (int32_t *) _dstU;
2149 int32_t *dstV = (int32_t *) _dstV;
2150 for (i = 0; i < width; i++) {
2151 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2152 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2155 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2158 int32_t *dstU = (int32_t *) _dstU;
2159 int32_t *dstV = (int32_t *) _dstV;
2160 for (i = 0; i < width; i++) {
2161 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2162 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2165 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2168 int32_t *dst = (int32_t *) _dst;
2169 for (i = 0; i < width; i++)
2170 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2172 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2175 int32_t *dst = (int32_t *) _dst;
2176 for (i = 0; i < width; i++)
2177 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2180 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2181 const uint8_t *src, int srcW, int xInc)
2184 unsigned int xpos=0;
2185 for (i=0;i<dstWidth;i++) {
2186 register unsigned int xx=xpos>>16;
2187 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2188 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2191 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2192 dst[i] = src[srcW-1]*128;
2195 // *** horizontal scale Y line to temp buffer
2196 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2197 const uint8_t *src, int srcW, int xInc,
2198 const int16_t *hLumFilter,
2199 const int16_t *hLumFilterPos, int hLumFilterSize,
2200 uint8_t *formatConvBuffer,
2201 uint32_t *pal, int isAlpha)
2203 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2204 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2207 toYV12(formatConvBuffer, src, srcW, pal);
2208 src= formatConvBuffer;
2212 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2213 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2214 } else if (!c->hyscale_fast) {
2215 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2216 } else { // fast bilinear upscale / crap downscale
2217 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2221 convertRange(dst, dstWidth);
2224 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2225 int dstWidth, const uint8_t *src1,
2226 const uint8_t *src2, int srcW, int xInc)
2229 unsigned int xpos=0;
2230 for (i=0;i<dstWidth;i++) {
2231 register unsigned int xx=xpos>>16;
2232 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2233 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2234 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2237 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2238 dst1[i] = src1[srcW-1]*128;
2239 dst2[i] = src2[srcW-1]*128;
2243 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2244 const uint8_t *src1, const uint8_t *src2,
2245 int srcW, int xInc, const int16_t *hChrFilter,
2246 const int16_t *hChrFilterPos, int hChrFilterSize,
2247 uint8_t *formatConvBuffer, uint32_t *pal)
2250 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2251 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2252 src1= formatConvBuffer;
2257 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2258 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2259 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2260 } else if (!c->hcscale_fast) {
2261 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2262 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2263 } else { // fast bilinear upscale / crap downscale
2264 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2267 if (c->chrConvertRange)
2268 c->chrConvertRange(dst1, dst2, dstWidth);
2271 static av_always_inline void
2272 find_c_packed_planar_out_funcs(SwsContext *c,
2273 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2274 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2275 yuv2packedX_fn *yuv2packedX)
2277 enum PixelFormat dstFormat = c->dstFormat;
2279 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2280 *yuv2yuvX = yuv2nv12X_c;
2281 } else if (is16BPS(dstFormat)) {
2282 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2283 } else if (is9_OR_10BPS(dstFormat)) {
2284 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2285 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2287 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2290 *yuv2yuv1 = yuv2yuv1_c;
2291 *yuv2yuvX = yuv2yuvX_c;
2293 if(c->flags & SWS_FULL_CHR_H_INT) {
2294 switch (dstFormat) {
2297 *yuv2packedX = yuv2rgba32_full_X_c;
2299 #if CONFIG_SWSCALE_ALPHA
2301 *yuv2packedX = yuv2rgba32_full_X_c;
2303 #endif /* CONFIG_SWSCALE_ALPHA */
2305 *yuv2packedX = yuv2rgbx32_full_X_c;
2307 #endif /* !CONFIG_SMALL */
2311 *yuv2packedX = yuv2argb32_full_X_c;
2313 #if CONFIG_SWSCALE_ALPHA
2315 *yuv2packedX = yuv2argb32_full_X_c;
2317 #endif /* CONFIG_SWSCALE_ALPHA */
2319 *yuv2packedX = yuv2xrgb32_full_X_c;
2321 #endif /* !CONFIG_SMALL */
2325 *yuv2packedX = yuv2bgra32_full_X_c;
2327 #if CONFIG_SWSCALE_ALPHA
2329 *yuv2packedX = yuv2bgra32_full_X_c;
2331 #endif /* CONFIG_SWSCALE_ALPHA */
2333 *yuv2packedX = yuv2bgrx32_full_X_c;
2335 #endif /* !CONFIG_SMALL */
2339 *yuv2packedX = yuv2abgr32_full_X_c;
2341 #if CONFIG_SWSCALE_ALPHA
2343 *yuv2packedX = yuv2abgr32_full_X_c;
2345 #endif /* CONFIG_SWSCALE_ALPHA */
2347 *yuv2packedX = yuv2xbgr32_full_X_c;
2349 #endif /* !CONFIG_SMALL */
2352 *yuv2packedX = yuv2rgb24_full_X_c;
2355 *yuv2packedX = yuv2bgr24_full_X_c;
2362 switch (dstFormat) {
2363 case PIX_FMT_GRAY16BE:
2364 *yuv2packed1 = yuv2gray16BE_1_c;
2365 *yuv2packed2 = yuv2gray16BE_2_c;
2366 *yuv2packedX = yuv2gray16BE_X_c;
2368 case PIX_FMT_GRAY16LE:
2369 *yuv2packed1 = yuv2gray16LE_1_c;
2370 *yuv2packed2 = yuv2gray16LE_2_c;
2371 *yuv2packedX = yuv2gray16LE_X_c;
2373 case PIX_FMT_MONOWHITE:
2374 *yuv2packed1 = yuv2monowhite_1_c;
2375 *yuv2packed2 = yuv2monowhite_2_c;
2376 *yuv2packedX = yuv2monowhite_X_c;
2378 case PIX_FMT_MONOBLACK:
2379 *yuv2packed1 = yuv2monoblack_1_c;
2380 *yuv2packed2 = yuv2monoblack_2_c;
2381 *yuv2packedX = yuv2monoblack_X_c;
2383 case PIX_FMT_YUYV422:
2384 *yuv2packed1 = yuv2yuyv422_1_c;
2385 *yuv2packed2 = yuv2yuyv422_2_c;
2386 *yuv2packedX = yuv2yuyv422_X_c;
2388 case PIX_FMT_UYVY422:
2389 *yuv2packed1 = yuv2uyvy422_1_c;
2390 *yuv2packed2 = yuv2uyvy422_2_c;
2391 *yuv2packedX = yuv2uyvy422_X_c;
2393 case PIX_FMT_RGB48LE:
2394 *yuv2packed1 = yuv2rgb48le_1_c;
2395 *yuv2packed2 = yuv2rgb48le_2_c;
2396 *yuv2packedX = yuv2rgb48le_X_c;
2398 case PIX_FMT_RGB48BE:
2399 *yuv2packed1 = yuv2rgb48be_1_c;
2400 *yuv2packed2 = yuv2rgb48be_2_c;
2401 *yuv2packedX = yuv2rgb48be_X_c;
2403 case PIX_FMT_BGR48LE:
2404 *yuv2packed1 = yuv2bgr48le_1_c;
2405 *yuv2packed2 = yuv2bgr48le_2_c;
2406 *yuv2packedX = yuv2bgr48le_X_c;
2408 case PIX_FMT_BGR48BE:
2409 *yuv2packed1 = yuv2bgr48be_1_c;
2410 *yuv2packed2 = yuv2bgr48be_2_c;
2411 *yuv2packedX = yuv2bgr48be_X_c;
2416 *yuv2packed1 = yuv2rgb32_1_c;
2417 *yuv2packed2 = yuv2rgb32_2_c;
2418 *yuv2packedX = yuv2rgb32_X_c;
2420 #if CONFIG_SWSCALE_ALPHA
2422 *yuv2packed1 = yuv2rgba32_1_c;
2423 *yuv2packed2 = yuv2rgba32_2_c;
2424 *yuv2packedX = yuv2rgba32_X_c;
2426 #endif /* CONFIG_SWSCALE_ALPHA */
2428 *yuv2packed1 = yuv2rgbx32_1_c;
2429 *yuv2packed2 = yuv2rgbx32_2_c;
2430 *yuv2packedX = yuv2rgbx32_X_c;
2432 #endif /* !CONFIG_SMALL */
2434 case PIX_FMT_RGB32_1:
2435 case PIX_FMT_BGR32_1:
2437 *yuv2packed1 = yuv2rgb32_1_1_c;
2438 *yuv2packed2 = yuv2rgb32_1_2_c;
2439 *yuv2packedX = yuv2rgb32_1_X_c;
2441 #if CONFIG_SWSCALE_ALPHA
2443 *yuv2packed1 = yuv2rgba32_1_1_c;
2444 *yuv2packed2 = yuv2rgba32_1_2_c;
2445 *yuv2packedX = yuv2rgba32_1_X_c;
2447 #endif /* CONFIG_SWSCALE_ALPHA */
2449 *yuv2packed1 = yuv2rgbx32_1_1_c;
2450 *yuv2packed2 = yuv2rgbx32_1_2_c;
2451 *yuv2packedX = yuv2rgbx32_1_X_c;
2453 #endif /* !CONFIG_SMALL */
2456 *yuv2packed1 = yuv2rgb24_1_c;
2457 *yuv2packed2 = yuv2rgb24_2_c;
2458 *yuv2packedX = yuv2rgb24_X_c;
2461 *yuv2packed1 = yuv2bgr24_1_c;
2462 *yuv2packed2 = yuv2bgr24_2_c;
2463 *yuv2packedX = yuv2bgr24_X_c;
2465 case PIX_FMT_RGB565LE:
2466 case PIX_FMT_RGB565BE:
2467 case PIX_FMT_BGR565LE:
2468 case PIX_FMT_BGR565BE:
2469 *yuv2packed1 = yuv2rgb16_1_c;
2470 *yuv2packed2 = yuv2rgb16_2_c;
2471 *yuv2packedX = yuv2rgb16_X_c;
2473 case PIX_FMT_RGB555LE:
2474 case PIX_FMT_RGB555BE:
2475 case PIX_FMT_BGR555LE:
2476 case PIX_FMT_BGR555BE:
2477 *yuv2packed1 = yuv2rgb15_1_c;
2478 *yuv2packed2 = yuv2rgb15_2_c;
2479 *yuv2packedX = yuv2rgb15_X_c;
2481 case PIX_FMT_RGB444LE:
2482 case PIX_FMT_RGB444BE:
2483 case PIX_FMT_BGR444LE:
2484 case PIX_FMT_BGR444BE:
2485 *yuv2packed1 = yuv2rgb12_1_c;
2486 *yuv2packed2 = yuv2rgb12_2_c;
2487 *yuv2packedX = yuv2rgb12_X_c;
2491 *yuv2packed1 = yuv2rgb8_1_c;
2492 *yuv2packed2 = yuv2rgb8_2_c;
2493 *yuv2packedX = yuv2rgb8_X_c;
2497 *yuv2packed1 = yuv2rgb4_1_c;
2498 *yuv2packed2 = yuv2rgb4_2_c;
2499 *yuv2packedX = yuv2rgb4_X_c;
2501 case PIX_FMT_RGB4_BYTE:
2502 case PIX_FMT_BGR4_BYTE:
2503 *yuv2packed1 = yuv2rgb4b_1_c;
2504 *yuv2packed2 = yuv2rgb4b_2_c;
2505 *yuv2packedX = yuv2rgb4b_X_c;
2511 #define DEBUG_SWSCALE_BUFFERS 0
2512 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2514 static int swScale(SwsContext *c, const uint8_t* src[],
2515 int srcStride[], int srcSliceY,
2516 int srcSliceH, uint8_t* dst[], int dstStride[])
2518 /* load a few things into local vars to make the code more readable? and faster */
2519 const int srcW= c->srcW;
2520 const int dstW= c->dstW;
2521 const int dstH= c->dstH;
2522 const int chrDstW= c->chrDstW;
2523 const int chrSrcW= c->chrSrcW;
2524 const int lumXInc= c->lumXInc;
2525 const int chrXInc= c->chrXInc;
2526 const enum PixelFormat dstFormat= c->dstFormat;
2527 const int flags= c->flags;
2528 int16_t *vLumFilterPos= c->vLumFilterPos;
2529 int16_t *vChrFilterPos= c->vChrFilterPos;
2530 int16_t *hLumFilterPos= c->hLumFilterPos;
2531 int16_t *hChrFilterPos= c->hChrFilterPos;
2532 int16_t *vLumFilter= c->vLumFilter;
2533 int16_t *vChrFilter= c->vChrFilter;
2534 int16_t *hLumFilter= c->hLumFilter;
2535 int16_t *hChrFilter= c->hChrFilter;
2536 int32_t *lumMmxFilter= c->lumMmxFilter;
2537 int32_t *chrMmxFilter= c->chrMmxFilter;
2538 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2539 const int vLumFilterSize= c->vLumFilterSize;
2540 const int vChrFilterSize= c->vChrFilterSize;
2541 const int hLumFilterSize= c->hLumFilterSize;
2542 const int hChrFilterSize= c->hChrFilterSize;
2543 int16_t **lumPixBuf= c->lumPixBuf;
2544 int16_t **chrUPixBuf= c->chrUPixBuf;
2545 int16_t **chrVPixBuf= c->chrVPixBuf;
2546 int16_t **alpPixBuf= c->alpPixBuf;
2547 const int vLumBufSize= c->vLumBufSize;
2548 const int vChrBufSize= c->vChrBufSize;
2549 uint8_t *formatConvBuffer= c->formatConvBuffer;
2550 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2551 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2553 uint32_t *pal=c->pal_yuv;
2555 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2556 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2557 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2558 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2559 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2560 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2562 /* vars which will change and which we need to store back in the context */
2564 int lumBufIndex= c->lumBufIndex;
2565 int chrBufIndex= c->chrBufIndex;
2566 int lastInLumBuf= c->lastInLumBuf;
2567 int lastInChrBuf= c->lastInChrBuf;
2569 if (isPacked(c->srcFormat)) {
2577 srcStride[3]= srcStride[0];
2579 srcStride[1]<<= c->vChrDrop;
2580 srcStride[2]<<= c->vChrDrop;
2582 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2583 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2584 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2585 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2586 srcSliceY, srcSliceH, dstY, dstH);
2587 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2588 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2590 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2591 static int warnedAlready=0; //FIXME move this into the context perhaps
2592 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2593 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2594 " ->cannot do aligned memory accesses anymore\n");
2599 /* Note the user might start scaling the picture in the middle so this
2600 will not get executed. This is not really intended but works
2601 currently, so people might do it. */
2602 if (srcSliceY ==0) {
2610 if (!should_dither) {
2611 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2615 for (;dstY < dstH; dstY++) {
2616 const int chrDstY= dstY>>c->chrDstVSubSample;
2617 uint8_t *dest[4] = {
2618 dst[0] + dstStride[0] * dstY,
2619 dst[1] + dstStride[1] * chrDstY,
2620 dst[2] + dstStride[2] * chrDstY,
2621 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2624 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2625 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2626 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2627 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2628 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2629 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2632 //handle holes (FAST_BILINEAR & weird filters)
2633 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2634 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2635 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2636 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2638 DEBUG_BUFFERS("dstY: %d\n", dstY);
2639 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2640 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2641 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2642 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2644 // Do we have enough lines in this slice to output the dstY line
2645 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2647 if (!enough_lines) {
2648 lastLumSrcY = srcSliceY + srcSliceH - 1;
2649 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2650 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2651 lastLumSrcY, lastChrSrcY);
2654 //Do horizontal scaling
2655 while(lastInLumBuf < lastLumSrcY) {
2656 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2657 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2659 assert(lumBufIndex < 2*vLumBufSize);
2660 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2661 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2662 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2663 hLumFilter, hLumFilterPos, hLumFilterSize,
2666 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2667 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2668 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2672 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2673 lumBufIndex, lastInLumBuf);
2675 while(lastInChrBuf < lastChrSrcY) {
2676 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2677 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2679 assert(chrBufIndex < 2*vChrBufSize);
2680 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2681 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2682 //FIXME replace parameters through context struct (some at least)
2684 if (c->needs_hcscale)
2685 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2686 chrDstW, src1, src2, chrSrcW, chrXInc,
2687 hChrFilter, hChrFilterPos, hChrFilterSize,
2688 formatConvBuffer, pal);
2690 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2691 chrBufIndex, lastInChrBuf);
2693 //wrap buf index around to stay inside the ring buffer
2694 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2695 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2697 break; //we can't output a dstY line so let's try with the next slice
2700 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2702 if (should_dither) {
2703 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2704 c->lumDither8 = dither_8x8_128[dstY & 7];
2706 if (dstY >= dstH-2) {
2707 // hmm looks like we can't use MMX here without overwriting this array's tail
2708 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2709 &yuv2packed1, &yuv2packed2,
2714 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2715 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2716 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2717 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2719 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2720 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2721 if ((dstY&chrSkipMask) || isGray(dstFormat))
2722 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2723 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2724 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2725 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2726 dest, dstW, chrDstW);
2727 } else { //General YV12
2728 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2729 lumSrcPtr, vLumFilterSize,
2730 vChrFilter + chrDstY * vChrFilterSize,
2731 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2732 alpSrcPtr, dest, dstW, chrDstW);
2735 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2736 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2737 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2738 int chrAlpha = vChrFilter[2 * dstY + 1];
2739 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2740 alpPixBuf ? *alpSrcPtr : NULL,
2741 dest[0], dstW, chrAlpha, dstY);
2742 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2743 int lumAlpha = vLumFilter[2 * dstY + 1];
2744 int chrAlpha = vChrFilter[2 * dstY + 1];
2746 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2748 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2749 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2750 alpPixBuf ? alpSrcPtr : NULL,
2751 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2752 } else { //general RGB
2753 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2754 lumSrcPtr, vLumFilterSize,
2755 vChrFilter + dstY * vChrFilterSize,
2756 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2757 alpSrcPtr, dest[0], dstW, dstY);
2763 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2764 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2767 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2768 __asm__ volatile("sfence":::"memory");
2772 /* store changed local vars back in the context */
2774 c->lumBufIndex= lumBufIndex;
2775 c->chrBufIndex= chrBufIndex;
2776 c->lastInLumBuf= lastInLumBuf;
2777 c->lastInChrBuf= lastInChrBuf;
2779 return dstY - lastDstY;
2782 static av_cold void sws_init_swScale_c(SwsContext *c)
2784 enum PixelFormat srcFormat = c->srcFormat;
2786 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2787 &c->yuv2packed1, &c->yuv2packed2,
2790 c->chrToYV12 = NULL;
2792 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2793 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2794 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2795 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2799 case PIX_FMT_BGR4_BYTE:
2800 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2802 case PIX_FMT_YUV444P9LE:
2803 case PIX_FMT_YUV420P9LE:
2804 case PIX_FMT_YUV422P10LE:
2805 case PIX_FMT_YUV420P10LE:
2806 case PIX_FMT_YUV444P10LE:
2807 case PIX_FMT_YUV420P16LE:
2808 case PIX_FMT_YUV422P16LE:
2809 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2811 case PIX_FMT_YUV444P9BE:
2812 case PIX_FMT_YUV420P9BE:
2813 case PIX_FMT_YUV444P10BE:
2814 case PIX_FMT_YUV422P10BE:
2815 case PIX_FMT_YUV420P10BE:
2816 case PIX_FMT_YUV420P16BE:
2817 case PIX_FMT_YUV422P16BE:
2818 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2821 if (c->chrSrcHSubSample) {
2823 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2824 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2825 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2826 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2827 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2828 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2829 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2830 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2831 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2832 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2833 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2834 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2835 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2836 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2837 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2838 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2839 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2840 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2844 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2845 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2846 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2847 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2848 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2849 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2850 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2851 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2852 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2853 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2854 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2855 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2856 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2857 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2858 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2859 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2860 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2861 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2865 c->lumToYV12 = NULL;
2866 c->alpToYV12 = NULL;
2867 switch (srcFormat) {
2869 case PIX_FMT_YUV444P9LE:
2870 case PIX_FMT_YUV420P9LE:
2871 case PIX_FMT_YUV422P10LE:
2872 case PIX_FMT_YUV420P10LE:
2873 case PIX_FMT_YUV444P10LE:
2874 case PIX_FMT_YUV420P16LE:
2875 case PIX_FMT_YUV422P16LE:
2876 case PIX_FMT_YUV444P16LE:
2877 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2879 case PIX_FMT_YUV444P9BE:
2880 case PIX_FMT_YUV420P9BE:
2881 case PIX_FMT_YUV444P10BE:
2882 case PIX_FMT_YUV422P10BE:
2883 case PIX_FMT_YUV420P10BE:
2884 case PIX_FMT_YUV420P16BE:
2885 case PIX_FMT_YUV422P16BE:
2886 case PIX_FMT_YUV444P16BE:
2887 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2889 case PIX_FMT_YUYV422 :
2890 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2891 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2892 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2893 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2894 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2895 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2896 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2897 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2898 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2899 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2900 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2901 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2905 case PIX_FMT_BGR4_BYTE:
2906 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2907 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2908 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2909 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2910 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2911 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2912 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2913 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2914 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2915 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2916 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2919 switch (srcFormat) {
2921 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2923 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2924 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2925 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2930 if (c->srcBpc == 8) {
2931 if (c->dstBpc <= 10) {
2932 c->hyScale = c->hcScale = hScale8To15_c;
2933 if (c->flags & SWS_FAST_BILINEAR) {
2934 c->hyscale_fast = hyscale_fast_c;
2935 c->hcscale_fast = hcscale_fast_c;
2938 c->hyScale = c->hcScale = hScale8To19_c;
2941 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2944 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2945 if (c->dstBpc <= 10) {
2947 c->lumConvertRange = lumRangeFromJpeg_c;
2948 c->chrConvertRange = chrRangeFromJpeg_c;
2950 c->lumConvertRange = lumRangeToJpeg_c;
2951 c->chrConvertRange = chrRangeToJpeg_c;
2955 c->lumConvertRange = lumRangeFromJpeg16_c;
2956 c->chrConvertRange = chrRangeFromJpeg16_c;
2958 c->lumConvertRange = lumRangeToJpeg16_c;
2959 c->chrConvertRange = chrRangeToJpeg16_c;
2964 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2965 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2966 c->needs_hcscale = 1;
2969 SwsFunc ff_getSwsFunc(SwsContext *c)
2971 sws_init_swScale_c(c);
2974 ff_sws_init_swScale_mmx(c);
2976 ff_sws_init_swScale_altivec(c);