2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 static av_always_inline void
294 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
295 int lumFilterSize, const int16_t *chrFilter,
296 const int32_t **chrUSrc, const int32_t **chrVSrc,
297 int chrFilterSize, const int32_t **alpSrc,
298 uint16_t *dest[4], int dstW, int chrDstW,
299 int big_endian, int output_bits)
301 //FIXME Optimize (just quickly written not optimized..)
303 int dword= output_bits == 16;
304 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
305 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
306 int shift = 11 + 4*dword + 16 - output_bits - 1;
308 #define output_pixel(pos, val) \
310 AV_WB16(pos, av_clip_uint16(val >> shift)); \
312 AV_WL16(pos, av_clip_uint16(val >> shift)); \
314 for (i = 0; i < dstW; i++) {
315 int val = 1 << (26-output_bits + 4*dword - 1);
318 for (j = 0; j < lumFilterSize; j++)
319 val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
321 output_pixel(&yDest[i], val);
325 for (i = 0; i < chrDstW; i++) {
326 int u = 1 << (26-output_bits + 4*dword - 1);
327 int v = 1 << (26-output_bits + 4*dword - 1);
330 for (j = 0; j < chrFilterSize; j++) {
331 u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
332 v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
335 output_pixel(&uDest[i], u);
336 output_pixel(&vDest[i], v);
340 if (CONFIG_SWSCALE_ALPHA && aDest) {
341 for (i = 0; i < dstW; i++) {
342 int val = 1 << (26-output_bits + 4*dword - 1);
345 for (j = 0; j < lumFilterSize; j++)
346 val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
348 output_pixel(&aDest[i], val);
354 static av_always_inline void
355 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
356 int lumFilterSize, const int16_t *chrFilter,
357 const int16_t **chrUSrc, const int16_t **chrVSrc,
358 int chrFilterSize, const int16_t **alpSrc,
359 uint16_t *dest[4], int dstW, int chrDstW,
360 int big_endian, int output_bits)
362 //FIXME Optimize (just quickly written not optimized..)
364 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
365 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
366 int shift = 11 + 16 - output_bits - 1;
368 #define output_pixel(pos, val) \
370 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
372 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
374 for (i = 0; i < dstW; i++) {
375 int val = 1 << (26-output_bits - 1);
378 for (j = 0; j < lumFilterSize; j++)
379 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
381 output_pixel(&yDest[i], val);
385 for (i = 0; i < chrDstW; i++) {
386 int u = 1 << (26-output_bits - 1);
387 int v = 1 << (26-output_bits - 1);
390 for (j = 0; j < chrFilterSize; j++) {
391 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
392 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
395 output_pixel(&uDest[i], u);
396 output_pixel(&vDest[i], v);
400 if (CONFIG_SWSCALE_ALPHA && aDest) {
401 for (i = 0; i < dstW; i++) {
402 int val = 1 << (26-output_bits - 1);
405 for (j = 0; j < lumFilterSize; j++)
406 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
408 output_pixel(&aDest[i], val);
414 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
415 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
416 const int16_t **_lumSrc, int lumFilterSize, \
417 const int16_t *chrFilter, const int16_t **_chrUSrc, \
418 const int16_t **_chrVSrc, \
419 int chrFilterSize, const int16_t **_alpSrc, \
420 uint8_t *_dest[4], int dstW, int chrDstW) \
422 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
423 **chrUSrc = (const typeX_t **) _chrUSrc, \
424 **chrVSrc = (const typeX_t **) _chrVSrc, \
425 **alpSrc = (const typeX_t **) _alpSrc; \
426 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
427 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
428 alpSrc, (uint16_t **) _dest, \
429 dstW, chrDstW, is_be, bits); \
431 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
432 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
433 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
434 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
435 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
436 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
438 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
439 const int16_t **lumSrc, int lumFilterSize,
440 const int16_t *chrFilter, const int16_t **chrUSrc,
441 const int16_t **chrVSrc,
442 int chrFilterSize, const int16_t **alpSrc,
443 uint8_t *dest[4], int dstW, int chrDstW)
445 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
446 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
448 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
450 //FIXME Optimize (just quickly written not optimized..)
451 for (i=0; i<dstW; i++) {
452 int val = lumDither[i & 7] << 12;
454 for (j=0; j<lumFilterSize; j++)
455 val += lumSrc[j][i] * lumFilter[j];
457 yDest[i]= av_clip_uint8(val>>19);
461 for (i=0; i<chrDstW; i++) {
462 int u = chrDither[i & 7] << 12;
463 int v = chrDither[(i + 3) & 7] << 12;
465 for (j=0; j<chrFilterSize; j++) {
466 u += chrUSrc[j][i] * chrFilter[j];
467 v += chrVSrc[j][i] * chrFilter[j];
470 uDest[i]= av_clip_uint8(u>>19);
471 vDest[i]= av_clip_uint8(v>>19);
474 if (CONFIG_SWSCALE_ALPHA && aDest)
475 for (i=0; i<dstW; i++) {
476 int val = lumDither[i & 7] << 12;
478 for (j=0; j<lumFilterSize; j++)
479 val += alpSrc[j][i] * lumFilter[j];
481 aDest[i]= av_clip_uint8(val>>19);
485 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
486 const int16_t *chrUSrc, const int16_t *chrVSrc,
487 const int16_t *alpSrc,
488 uint8_t *dest[4], int dstW, int chrDstW)
490 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
491 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
493 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
495 for (i=0; i<dstW; i++) {
496 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
497 yDest[i]= av_clip_uint8(val);
501 for (i=0; i<chrDstW; i++) {
502 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
503 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
504 uDest[i]= av_clip_uint8(u);
505 vDest[i]= av_clip_uint8(v);
508 if (CONFIG_SWSCALE_ALPHA && aDest)
509 for (i=0; i<dstW; i++) {
510 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
511 aDest[i]= av_clip_uint8(val);
515 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
516 const int16_t **lumSrc, int lumFilterSize,
517 const int16_t *chrFilter, const int16_t **chrUSrc,
518 const int16_t **chrVSrc, int chrFilterSize,
519 const int16_t **alpSrc, uint8_t *dest[4],
520 int dstW, int chrDstW)
522 uint8_t *yDest = dest[0], *uDest = dest[1];
523 enum PixelFormat dstFormat = c->dstFormat;
524 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
526 //FIXME Optimize (just quickly written not optimized..)
528 for (i=0; i<dstW; i++) {
529 int val = lumDither[i & 7] << 12;
531 for (j=0; j<lumFilterSize; j++)
532 val += lumSrc[j][i] * lumFilter[j];
534 yDest[i]= av_clip_uint8(val>>19);
540 if (dstFormat == PIX_FMT_NV12)
541 for (i=0; i<chrDstW; i++) {
542 int u = chrDither[i & 7] << 12;
543 int v = chrDither[(i + 3) & 7] << 12;
545 for (j=0; j<chrFilterSize; j++) {
546 u += chrUSrc[j][i] * chrFilter[j];
547 v += chrVSrc[j][i] * chrFilter[j];
550 uDest[2*i]= av_clip_uint8(u>>19);
551 uDest[2*i+1]= av_clip_uint8(v>>19);
554 for (i=0; i<chrDstW; i++) {
555 int u = chrDither[i & 7] << 12;
556 int v = chrDither[(i + 3) & 7] << 12;
558 for (j=0; j<chrFilterSize; j++) {
559 u += chrUSrc[j][i] * chrFilter[j];
560 v += chrVSrc[j][i] * chrFilter[j];
563 uDest[2*i]= av_clip_uint8(v>>19);
564 uDest[2*i+1]= av_clip_uint8(u>>19);
568 #define output_pixel(pos, val) \
569 if (target == PIX_FMT_GRAY16BE) { \
575 static av_always_inline void
576 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
577 const int32_t **lumSrc, int lumFilterSize,
578 const int16_t *chrFilter, const int32_t **chrUSrc,
579 const int32_t **chrVSrc, int chrFilterSize,
580 const int32_t **alpSrc, uint16_t *dest, int dstW,
581 int y, enum PixelFormat target)
585 for (i = 0; i < (dstW >> 1); i++) {
590 for (j = 0; j < lumFilterSize; j++) {
591 Y1 += lumSrc[j][i * 2] * lumFilter[j];
592 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
596 if ((Y1 | Y2) & 0x10000) {
597 Y1 = av_clip_uint16(Y1);
598 Y2 = av_clip_uint16(Y2);
600 output_pixel(&dest[i * 2 + 0], Y1);
601 output_pixel(&dest[i * 2 + 1], Y2);
605 static av_always_inline void
606 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
607 const int32_t *ubuf[2], const int32_t *vbuf[2],
608 const int32_t *abuf[2], uint16_t *dest, int dstW,
609 int yalpha, int uvalpha, int y,
610 enum PixelFormat target)
612 int yalpha1 = 4095 - yalpha;
614 const int32_t *buf0 = buf[0], *buf1 = buf[1];
616 for (i = 0; i < (dstW >> 1); i++) {
617 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
618 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
620 output_pixel(&dest[i * 2 + 0], Y1);
621 output_pixel(&dest[i * 2 + 1], Y2);
625 static av_always_inline void
626 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
627 const int32_t *ubuf[2], const int32_t *vbuf[2],
628 const int32_t *abuf0, uint16_t *dest, int dstW,
629 int uvalpha, int y, enum PixelFormat target)
633 for (i = 0; i < (dstW >> 1); i++) {
634 int Y1 = (buf0[i * 2 ]+4)>>3;
635 int Y2 = (buf0[i * 2 + 1]+4)>>3;
637 output_pixel(&dest[i * 2 + 0], Y1);
638 output_pixel(&dest[i * 2 + 1], Y2);
644 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
645 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
646 const int16_t **_lumSrc, int lumFilterSize, \
647 const int16_t *chrFilter, const int16_t **_chrUSrc, \
648 const int16_t **_chrVSrc, int chrFilterSize, \
649 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
652 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
653 **chrUSrc = (const int32_t **) _chrUSrc, \
654 **chrVSrc = (const int32_t **) _chrVSrc, \
655 **alpSrc = (const int32_t **) _alpSrc; \
656 uint16_t *dest = (uint16_t *) _dest; \
657 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
658 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
659 alpSrc, dest, dstW, y, fmt); \
662 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
663 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
664 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
665 int yalpha, int uvalpha, int y) \
667 const int32_t **buf = (const int32_t **) _buf, \
668 **ubuf = (const int32_t **) _ubuf, \
669 **vbuf = (const int32_t **) _vbuf, \
670 **abuf = (const int32_t **) _abuf; \
671 uint16_t *dest = (uint16_t *) _dest; \
672 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
673 dest, dstW, yalpha, uvalpha, y, fmt); \
676 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
677 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
678 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
679 int uvalpha, int y) \
681 const int32_t *buf0 = (const int32_t *) _buf0, \
682 **ubuf = (const int32_t **) _ubuf, \
683 **vbuf = (const int32_t **) _vbuf, \
684 *abuf0 = (const int32_t *) _abuf0; \
685 uint16_t *dest = (uint16_t *) _dest; \
686 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
687 dstW, uvalpha, y, fmt); \
690 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
691 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
693 #define output_pixel(pos, acc) \
694 if (target == PIX_FMT_MONOBLACK) { \
700 static av_always_inline void
701 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
702 const int16_t **lumSrc, int lumFilterSize,
703 const int16_t *chrFilter, const int16_t **chrUSrc,
704 const int16_t **chrVSrc, int chrFilterSize,
705 const int16_t **alpSrc, uint8_t *dest, int dstW,
706 int y, enum PixelFormat target)
708 const uint8_t * const d128=dither_8x8_220[y&7];
709 uint8_t *g = c->table_gU[128] + c->table_gV[128];
713 for (i = 0; i < dstW - 1; i += 2) {
718 for (j = 0; j < lumFilterSize; j++) {
719 Y1 += lumSrc[j][i] * lumFilter[j];
720 Y2 += lumSrc[j][i+1] * lumFilter[j];
724 if ((Y1 | Y2) & 0x100) {
725 Y1 = av_clip_uint8(Y1);
726 Y2 = av_clip_uint8(Y2);
728 acc += acc + g[Y1 + d128[(i + 0) & 7]];
729 acc += acc + g[Y2 + d128[(i + 1) & 7]];
731 output_pixel(*dest++, acc);
736 static av_always_inline void
737 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
738 const int16_t *ubuf[2], const int16_t *vbuf[2],
739 const int16_t *abuf[2], uint8_t *dest, int dstW,
740 int yalpha, int uvalpha, int y,
741 enum PixelFormat target)
743 const int16_t *buf0 = buf[0], *buf1 = buf[1];
744 const uint8_t * const d128 = dither_8x8_220[y & 7];
745 uint8_t *g = c->table_gU[128] + c->table_gV[128];
746 int yalpha1 = 4095 - yalpha;
749 for (i = 0; i < dstW - 7; i += 8) {
750 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
751 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
752 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
753 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
754 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
755 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
756 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
757 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
758 output_pixel(*dest++, acc);
762 static av_always_inline void
763 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
764 const int16_t *ubuf[2], const int16_t *vbuf[2],
765 const int16_t *abuf0, uint8_t *dest, int dstW,
766 int uvalpha, int y, enum PixelFormat target)
768 const uint8_t * const d128 = dither_8x8_220[y & 7];
769 uint8_t *g = c->table_gU[128] + c->table_gV[128];
772 for (i = 0; i < dstW - 7; i += 8) {
773 int acc = g[(buf0[i ] >> 7) + d128[0]];
774 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
775 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
776 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
777 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
778 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
779 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
780 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
781 output_pixel(*dest++, acc);
787 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
788 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
789 const int16_t **lumSrc, int lumFilterSize, \
790 const int16_t *chrFilter, const int16_t **chrUSrc, \
791 const int16_t **chrVSrc, int chrFilterSize, \
792 const int16_t **alpSrc, uint8_t *dest, int dstW, \
795 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
796 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
797 alpSrc, dest, dstW, y, fmt); \
800 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
801 const int16_t *ubuf[2], const int16_t *vbuf[2], \
802 const int16_t *abuf[2], uint8_t *dest, int dstW, \
803 int yalpha, int uvalpha, int y) \
805 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
806 dest, dstW, yalpha, uvalpha, y, fmt); \
809 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
810 const int16_t *ubuf[2], const int16_t *vbuf[2], \
811 const int16_t *abuf0, uint8_t *dest, int dstW, \
812 int uvalpha, int y) \
814 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
815 abuf0, dest, dstW, uvalpha, \
819 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
820 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
822 #define output_pixels(pos, Y1, U, Y2, V) \
823 if (target == PIX_FMT_YUYV422) { \
824 dest[pos + 0] = Y1; \
826 dest[pos + 2] = Y2; \
830 dest[pos + 1] = Y1; \
832 dest[pos + 3] = Y2; \
835 static av_always_inline void
836 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
837 const int16_t **lumSrc, int lumFilterSize,
838 const int16_t *chrFilter, const int16_t **chrUSrc,
839 const int16_t **chrVSrc, int chrFilterSize,
840 const int16_t **alpSrc, uint8_t *dest, int dstW,
841 int y, enum PixelFormat target)
845 for (i = 0; i < (dstW >> 1); i++) {
852 for (j = 0; j < lumFilterSize; j++) {
853 Y1 += lumSrc[j][i * 2] * lumFilter[j];
854 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
856 for (j = 0; j < chrFilterSize; j++) {
857 U += chrUSrc[j][i] * chrFilter[j];
858 V += chrVSrc[j][i] * chrFilter[j];
864 if ((Y1 | Y2 | U | V) & 0x100) {
865 Y1 = av_clip_uint8(Y1);
866 Y2 = av_clip_uint8(Y2);
867 U = av_clip_uint8(U);
868 V = av_clip_uint8(V);
870 output_pixels(4*i, Y1, U, Y2, V);
874 static av_always_inline void
875 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
876 const int16_t *ubuf[2], const int16_t *vbuf[2],
877 const int16_t *abuf[2], uint8_t *dest, int dstW,
878 int yalpha, int uvalpha, int y,
879 enum PixelFormat target)
881 const int16_t *buf0 = buf[0], *buf1 = buf[1],
882 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
883 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
884 int yalpha1 = 4095 - yalpha;
885 int uvalpha1 = 4095 - uvalpha;
888 for (i = 0; i < (dstW >> 1); i++) {
889 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
890 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
891 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
892 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
894 output_pixels(i * 4, Y1, U, Y2, V);
898 static av_always_inline void
899 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
900 const int16_t *ubuf[2], const int16_t *vbuf[2],
901 const int16_t *abuf0, uint8_t *dest, int dstW,
902 int uvalpha, int y, enum PixelFormat target)
904 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
905 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
908 if (uvalpha < 2048) {
909 for (i = 0; i < (dstW >> 1); i++) {
910 int Y1 = buf0[i * 2] >> 7;
911 int Y2 = buf0[i * 2 + 1] >> 7;
912 int U = ubuf1[i] >> 7;
913 int V = vbuf1[i] >> 7;
915 output_pixels(i * 4, Y1, U, Y2, V);
918 for (i = 0; i < (dstW >> 1); i++) {
919 int Y1 = buf0[i * 2] >> 7;
920 int Y2 = buf0[i * 2 + 1] >> 7;
921 int U = (ubuf0[i] + ubuf1[i]) >> 8;
922 int V = (vbuf0[i] + vbuf1[i]) >> 8;
924 output_pixels(i * 4, Y1, U, Y2, V);
931 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
932 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
934 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
935 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
936 #define output_pixel(pos, val) \
937 if (isBE(target)) { \
943 static av_always_inline void
944 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
945 const int32_t **lumSrc, int lumFilterSize,
946 const int16_t *chrFilter, const int32_t **chrUSrc,
947 const int32_t **chrVSrc, int chrFilterSize,
948 const int32_t **alpSrc, uint16_t *dest, int dstW,
949 int y, enum PixelFormat target)
953 for (i = 0; i < (dstW >> 1); i++) {
957 int U = -128 << 23; // 19
961 for (j = 0; j < lumFilterSize; j++) {
962 Y1 += lumSrc[j][i * 2] * lumFilter[j];
963 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
965 for (j = 0; j < chrFilterSize; j++) {
966 U += chrUSrc[j][i] * chrFilter[j];
967 V += chrVSrc[j][i] * chrFilter[j];
970 // 8bit: 12+15=27; 16-bit: 12+19=31
976 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
977 Y1 -= c->yuv2rgb_y_offset;
978 Y2 -= c->yuv2rgb_y_offset;
979 Y1 *= c->yuv2rgb_y_coeff;
980 Y2 *= c->yuv2rgb_y_coeff;
983 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
985 R = V * c->yuv2rgb_v2r_coeff;
986 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
987 B = U * c->yuv2rgb_u2b_coeff;
989 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
990 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
991 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
992 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
993 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
994 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
995 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1000 static av_always_inline void
1001 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
1002 const int32_t *ubuf[2], const int32_t *vbuf[2],
1003 const int32_t *abuf[2], uint16_t *dest, int dstW,
1004 int yalpha, int uvalpha, int y,
1005 enum PixelFormat target)
1007 const int32_t *buf0 = buf[0], *buf1 = buf[1],
1008 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1009 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1010 int yalpha1 = 4095 - yalpha;
1011 int uvalpha1 = 4095 - uvalpha;
1014 for (i = 0; i < (dstW >> 1); i++) {
1015 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
1016 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
1017 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
1018 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
1021 Y1 -= c->yuv2rgb_y_offset;
1022 Y2 -= c->yuv2rgb_y_offset;
1023 Y1 *= c->yuv2rgb_y_coeff;
1024 Y2 *= c->yuv2rgb_y_coeff;
1028 R = V * c->yuv2rgb_v2r_coeff;
1029 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1030 B = U * c->yuv2rgb_u2b_coeff;
1032 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1033 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1034 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1035 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1036 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1037 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1042 static av_always_inline void
1043 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
1044 const int32_t *ubuf[2], const int32_t *vbuf[2],
1045 const int32_t *abuf0, uint16_t *dest, int dstW,
1046 int uvalpha, int y, enum PixelFormat target)
1048 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1049 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1052 if (uvalpha < 2048) {
1053 for (i = 0; i < (dstW >> 1); i++) {
1054 int Y1 = (buf0[i * 2] ) >> 2;
1055 int Y2 = (buf0[i * 2 + 1]) >> 2;
1056 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1057 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1060 Y1 -= c->yuv2rgb_y_offset;
1061 Y2 -= c->yuv2rgb_y_offset;
1062 Y1 *= c->yuv2rgb_y_coeff;
1063 Y2 *= c->yuv2rgb_y_coeff;
1067 R = V * c->yuv2rgb_v2r_coeff;
1068 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1069 B = U * c->yuv2rgb_u2b_coeff;
1071 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1072 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1073 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1074 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1075 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1076 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1080 for (i = 0; i < (dstW >> 1); i++) {
1081 int Y1 = (buf0[i * 2] ) >> 2;
1082 int Y2 = (buf0[i * 2 + 1]) >> 2;
1083 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
1084 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
1087 Y1 -= c->yuv2rgb_y_offset;
1088 Y2 -= c->yuv2rgb_y_offset;
1089 Y1 *= c->yuv2rgb_y_coeff;
1090 Y2 *= c->yuv2rgb_y_coeff;
1094 R = V * c->yuv2rgb_v2r_coeff;
1095 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1096 B = U * c->yuv2rgb_u2b_coeff;
1098 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1099 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1100 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1101 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1102 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1103 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1113 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1114 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1115 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1116 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1118 static av_always_inline void
1119 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1120 int U, int V, int A1, int A2,
1121 const void *_r, const void *_g, const void *_b, int y,
1122 enum PixelFormat target, int hasAlpha)
1124 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1125 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1126 uint32_t *dest = (uint32_t *) _dest;
1127 const uint32_t *r = (const uint32_t *) _r;
1128 const uint32_t *g = (const uint32_t *) _g;
1129 const uint32_t *b = (const uint32_t *) _b;
1132 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1134 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1135 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1138 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1140 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1141 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1143 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1144 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1147 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1148 uint8_t *dest = (uint8_t *) _dest;
1149 const uint8_t *r = (const uint8_t *) _r;
1150 const uint8_t *g = (const uint8_t *) _g;
1151 const uint8_t *b = (const uint8_t *) _b;
1153 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1154 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1156 dest[i * 6 + 0] = r_b[Y1];
1157 dest[i * 6 + 1] = g[Y1];
1158 dest[i * 6 + 2] = b_r[Y1];
1159 dest[i * 6 + 3] = r_b[Y2];
1160 dest[i * 6 + 4] = g[Y2];
1161 dest[i * 6 + 5] = b_r[Y2];
1164 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1165 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1166 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1167 uint16_t *dest = (uint16_t *) _dest;
1168 const uint16_t *r = (const uint16_t *) _r;
1169 const uint16_t *g = (const uint16_t *) _g;
1170 const uint16_t *b = (const uint16_t *) _b;
1171 int dr1, dg1, db1, dr2, dg2, db2;
1173 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1174 dr1 = dither_2x2_8[ y & 1 ][0];
1175 dg1 = dither_2x2_4[ y & 1 ][0];
1176 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1177 dr2 = dither_2x2_8[ y & 1 ][1];
1178 dg2 = dither_2x2_4[ y & 1 ][1];
1179 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1180 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1181 dr1 = dither_2x2_8[ y & 1 ][0];
1182 dg1 = dither_2x2_8[ y & 1 ][1];
1183 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1184 dr2 = dither_2x2_8[ y & 1 ][1];
1185 dg2 = dither_2x2_8[ y & 1 ][0];
1186 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1188 dr1 = dither_4x4_16[ y & 3 ][0];
1189 dg1 = dither_4x4_16[ y & 3 ][1];
1190 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1191 dr2 = dither_4x4_16[ y & 3 ][1];
1192 dg2 = dither_4x4_16[ y & 3 ][0];
1193 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1196 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1197 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1198 } else /* 8/4-bit */ {
1199 uint8_t *dest = (uint8_t *) _dest;
1200 const uint8_t *r = (const uint8_t *) _r;
1201 const uint8_t *g = (const uint8_t *) _g;
1202 const uint8_t *b = (const uint8_t *) _b;
1203 int dr1, dg1, db1, dr2, dg2, db2;
1205 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1206 const uint8_t * const d64 = dither_8x8_73[y & 7];
1207 const uint8_t * const d32 = dither_8x8_32[y & 7];
1208 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1209 db1 = d64[(i * 2 + 0) & 7];
1210 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1211 db2 = d64[(i * 2 + 1) & 7];
1213 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1214 const uint8_t * const d128 = dither_8x8_220[y & 7];
1215 dr1 = db1 = d128[(i * 2 + 0) & 7];
1216 dg1 = d64[(i * 2 + 0) & 7];
1217 dr2 = db2 = d128[(i * 2 + 1) & 7];
1218 dg2 = d64[(i * 2 + 1) & 7];
1221 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1222 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1223 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1225 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1226 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1231 static av_always_inline void
1232 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1233 const int16_t **lumSrc, int lumFilterSize,
1234 const int16_t *chrFilter, const int16_t **chrUSrc,
1235 const int16_t **chrVSrc, int chrFilterSize,
1236 const int16_t **alpSrc, uint8_t *dest, int dstW,
1237 int y, enum PixelFormat target, int hasAlpha)
1241 for (i = 0; i < (dstW >> 1); i++) {
1247 int av_unused A1, A2;
1248 const void *r, *g, *b;
1250 for (j = 0; j < lumFilterSize; j++) {
1251 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1252 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1254 for (j = 0; j < chrFilterSize; j++) {
1255 U += chrUSrc[j][i] * chrFilter[j];
1256 V += chrVSrc[j][i] * chrFilter[j];
1262 if ((Y1 | Y2 | U | V) & 0x100) {
1263 Y1 = av_clip_uint8(Y1);
1264 Y2 = av_clip_uint8(Y2);
1265 U = av_clip_uint8(U);
1266 V = av_clip_uint8(V);
1271 for (j = 0; j < lumFilterSize; j++) {
1272 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1273 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1277 if ((A1 | A2) & 0x100) {
1278 A1 = av_clip_uint8(A1);
1279 A2 = av_clip_uint8(A2);
1283 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1285 g = (c->table_gU[U] + c->table_gV[V]);
1288 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1289 r, g, b, y, target, hasAlpha);
1293 static av_always_inline void
1294 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1295 const int16_t *ubuf[2], const int16_t *vbuf[2],
1296 const int16_t *abuf[2], uint8_t *dest, int dstW,
1297 int yalpha, int uvalpha, int y,
1298 enum PixelFormat target, int hasAlpha)
1300 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1301 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1302 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1303 *abuf0 = hasAlpha ? abuf[0] : NULL,
1304 *abuf1 = hasAlpha ? abuf[1] : NULL;
1305 int yalpha1 = 4095 - yalpha;
1306 int uvalpha1 = 4095 - uvalpha;
1309 for (i = 0; i < (dstW >> 1); i++) {
1310 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1311 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1312 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1313 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1315 const void *r = c->table_rV[V],
1316 *g = (c->table_gU[U] + c->table_gV[V]),
1317 *b = c->table_bU[U];
1320 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1321 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1324 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1325 r, g, b, y, target, hasAlpha);
1329 static av_always_inline void
1330 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1331 const int16_t *ubuf[2], const int16_t *vbuf[2],
1332 const int16_t *abuf0, uint8_t *dest, int dstW,
1333 int uvalpha, int y, enum PixelFormat target,
1336 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1337 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1340 if (uvalpha < 2048) {
1341 for (i = 0; i < (dstW >> 1); i++) {
1342 int Y1 = buf0[i * 2] >> 7;
1343 int Y2 = buf0[i * 2 + 1] >> 7;
1344 int U = ubuf1[i] >> 7;
1345 int V = vbuf1[i] >> 7;
1347 const void *r = c->table_rV[V],
1348 *g = (c->table_gU[U] + c->table_gV[V]),
1349 *b = c->table_bU[U];
1352 A1 = abuf0[i * 2 ] >> 7;
1353 A2 = abuf0[i * 2 + 1] >> 7;
1356 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1357 r, g, b, y, target, hasAlpha);
1360 for (i = 0; i < (dstW >> 1); i++) {
1361 int Y1 = buf0[i * 2] >> 7;
1362 int Y2 = buf0[i * 2 + 1] >> 7;
1363 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1364 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1366 const void *r = c->table_rV[V],
1367 *g = (c->table_gU[U] + c->table_gV[V]),
1368 *b = c->table_bU[U];
1371 A1 = abuf0[i * 2 ] >> 7;
1372 A2 = abuf0[i * 2 + 1] >> 7;
1375 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1376 r, g, b, y, target, hasAlpha);
1381 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1382 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1383 const int16_t **lumSrc, int lumFilterSize, \
1384 const int16_t *chrFilter, const int16_t **chrUSrc, \
1385 const int16_t **chrVSrc, int chrFilterSize, \
1386 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1389 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1390 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1391 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1393 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1394 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1395 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1396 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1397 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1398 int yalpha, int uvalpha, int y) \
1400 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1401 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1404 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1405 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1406 const int16_t *abuf0, uint8_t *dest, int dstW, \
1407 int uvalpha, int y) \
1409 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1410 dstW, uvalpha, y, fmt, hasAlpha); \
1414 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1415 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1417 #if CONFIG_SWSCALE_ALPHA
1418 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1419 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1421 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1422 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1424 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1425 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1426 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1427 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1428 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1429 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1430 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1431 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1433 static av_always_inline void
1434 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1435 const int16_t **lumSrc, int lumFilterSize,
1436 const int16_t *chrFilter, const int16_t **chrUSrc,
1437 const int16_t **chrVSrc, int chrFilterSize,
1438 const int16_t **alpSrc, uint8_t *dest,
1439 int dstW, int y, enum PixelFormat target, int hasAlpha)
1442 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1444 for (i = 0; i < dstW; i++) {
1447 int U = (1<<9)-(128 << 19);
1448 int V = (1<<9)-(128 << 19);
1452 for (j = 0; j < lumFilterSize; j++) {
1453 Y += lumSrc[j][i] * lumFilter[j];
1455 for (j = 0; j < chrFilterSize; j++) {
1456 U += chrUSrc[j][i] * chrFilter[j];
1457 V += chrVSrc[j][i] * chrFilter[j];
1464 for (j = 0; j < lumFilterSize; j++) {
1465 A += alpSrc[j][i] * lumFilter[j];
1469 A = av_clip_uint8(A);
1471 Y -= c->yuv2rgb_y_offset;
1472 Y *= c->yuv2rgb_y_coeff;
1474 R = Y + V*c->yuv2rgb_v2r_coeff;
1475 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1476 B = Y + U*c->yuv2rgb_u2b_coeff;
1477 if ((R | G | B) & 0xC0000000) {
1478 R = av_clip_uintp2(R, 30);
1479 G = av_clip_uintp2(G, 30);
1480 B = av_clip_uintp2(B, 30);
1485 dest[0] = hasAlpha ? A : 255;
1499 dest[3] = hasAlpha ? A : 255;
1502 dest[0] = hasAlpha ? A : 255;
1516 dest[3] = hasAlpha ? A : 255;
1524 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1525 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1526 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1527 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1529 #if CONFIG_SWSCALE_ALPHA
1530 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1531 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1532 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1533 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1535 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1536 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1537 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1538 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1540 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1541 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1543 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1544 int width, int height,
1548 uint8_t *ptr = plane + stride*y;
1549 for (i=0; i<height; i++) {
1550 memset(ptr, val, width);
1555 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1557 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1558 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1560 static av_always_inline void
1561 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1562 enum PixelFormat origin)
1565 for (i = 0; i < width; i++) {
1566 unsigned int r_b = input_pixel(&src[i*3+0]);
1567 unsigned int g = input_pixel(&src[i*3+1]);
1568 unsigned int b_r = input_pixel(&src[i*3+2]);
1570 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1574 static av_always_inline void
1575 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1576 const uint16_t *src1, const uint16_t *src2,
1577 int width, enum PixelFormat origin)
1581 for (i = 0; i < width; i++) {
1582 int r_b = input_pixel(&src1[i*3+0]);
1583 int g = input_pixel(&src1[i*3+1]);
1584 int b_r = input_pixel(&src1[i*3+2]);
1586 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1587 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1591 static av_always_inline void
1592 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1593 const uint16_t *src1, const uint16_t *src2,
1594 int width, enum PixelFormat origin)
1598 for (i = 0; i < width; i++) {
1599 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1600 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1601 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1603 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1604 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1612 #define rgb48funcs(pattern, BE_LE, origin) \
1613 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1614 int width, uint32_t *unused) \
1616 const uint16_t *src = (const uint16_t *) _src; \
1617 uint16_t *dst = (uint16_t *) _dst; \
1618 rgb48ToY_c_template(dst, src, width, origin); \
1621 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1622 const uint8_t *_src1, const uint8_t *_src2, \
1623 int width, uint32_t *unused) \
1625 const uint16_t *src1 = (const uint16_t *) _src1, \
1626 *src2 = (const uint16_t *) _src2; \
1627 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1628 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1631 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1632 const uint8_t *_src1, const uint8_t *_src2, \
1633 int width, uint32_t *unused) \
1635 const uint16_t *src1 = (const uint16_t *) _src1, \
1636 *src2 = (const uint16_t *) _src2; \
1637 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1638 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1641 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1642 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1643 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1644 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1646 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1647 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1648 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1650 static av_always_inline void
1651 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1652 int width, enum PixelFormat origin,
1653 int shr, int shg, int shb, int shp,
1654 int maskr, int maskg, int maskb,
1655 int rsh, int gsh, int bsh, int S)
1657 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1658 rnd = (32<<((S)-1)) + (1<<(S-7));
1661 for (i = 0; i < width; i++) {
1662 int px = input_pixel(i) >> shp;
1663 int b = (px & maskb) >> shb;
1664 int g = (px & maskg) >> shg;
1665 int r = (px & maskr) >> shr;
1667 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1671 static av_always_inline void
1672 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1673 const uint8_t *src, int width,
1674 enum PixelFormat origin,
1675 int shr, int shg, int shb, int shp,
1676 int maskr, int maskg, int maskb,
1677 int rsh, int gsh, int bsh, int S)
1679 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1680 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1681 rnd = (256<<((S)-1)) + (1<<(S-7));
1684 for (i = 0; i < width; i++) {
1685 int px = input_pixel(i) >> shp;
1686 int b = (px & maskb) >> shb;
1687 int g = (px & maskg) >> shg;
1688 int r = (px & maskr) >> shr;
1690 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1691 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1695 static av_always_inline void
1696 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1697 const uint8_t *src, int width,
1698 enum PixelFormat origin,
1699 int shr, int shg, int shb, int shp,
1700 int maskr, int maskg, int maskb,
1701 int rsh, int gsh, int bsh, int S)
1703 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1704 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1705 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1708 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1709 for (i = 0; i < width; i++) {
1710 int px0 = input_pixel(2 * i + 0) >> shp;
1711 int px1 = input_pixel(2 * i + 1) >> shp;
1712 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1713 int rb = px0 + px1 - g;
1715 b = (rb & maskb) >> shb;
1716 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1717 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1720 g = (g & maskg) >> shg;
1722 r = (rb & maskr) >> shr;
1724 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1725 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1731 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1732 maskg, maskb, rsh, gsh, bsh, S) \
1733 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1734 int width, uint32_t *unused) \
1736 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
1737 shr, shg, shb, shp, \
1738 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1741 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1742 const uint8_t *src, const uint8_t *dummy, \
1743 int width, uint32_t *unused) \
1745 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1746 shr, shg, shb, shp, \
1747 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1750 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1751 const uint8_t *src, const uint8_t *dummy, \
1752 int width, uint32_t *unused) \
1754 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1755 shr, shg, shb, shp, \
1756 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1759 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1760 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1761 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1762 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1763 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1764 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1765 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1766 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1767 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1768 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1769 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1770 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1772 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1775 for (i=0; i<width; i++) {
1776 dst[i]= src[4*i]<<6;
1780 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1783 for (i=0; i<width; i++) {
1784 dst[i]= src[4*i+3]<<6;
1788 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1791 for (i=0; i<width; i++) {
1794 dst[i]= (pal[d] >> 24)<<6;
1798 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1801 for (i=0; i<width; i++) {
1804 dst[i]= (pal[d] & 0xFF)<<6;
1808 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1809 const uint8_t *src1, const uint8_t *src2,
1810 int width, uint32_t *pal)
1813 assert(src1 == src2);
1814 for (i=0; i<width; i++) {
1815 int p= pal[src1[i]];
1817 dstU[i]= (uint8_t)(p>> 8)<<6;
1818 dstV[i]= (uint8_t)(p>>16)<<6;
1822 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1825 for (i=0; i<width/8; i++) {
1828 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1832 for(j=0; j<(width&7); j++)
1833 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1837 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1840 for (i=0; i<width/8; i++) {
1843 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1847 for(j=0; j<(width&7); j++)
1848 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1852 //FIXME yuy2* can read up to 7 samples too much
1854 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1858 for (i=0; i<width; i++)
1862 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1863 const uint8_t *src2, int width, uint32_t *unused)
1866 for (i=0; i<width; i++) {
1867 dstU[i]= src1[4*i + 1];
1868 dstV[i]= src1[4*i + 3];
1870 assert(src1 == src2);
1873 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1876 const uint16_t *src = (const uint16_t *) _src;
1877 uint16_t *dst = (uint16_t *) _dst;
1878 for (i=0; i<width; i++) {
1879 dst[i] = av_bswap16(src[i]);
1883 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1884 const uint8_t *_src2, int width, uint32_t *unused)
1887 const uint16_t *src1 = (const uint16_t *) _src1,
1888 *src2 = (const uint16_t *) _src2;
1889 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1890 for (i=0; i<width; i++) {
1891 dstU[i] = av_bswap16(src1[i]);
1892 dstV[i] = av_bswap16(src2[i]);
1896 /* This is almost identical to the previous, end exists only because
1897 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1898 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1902 for (i=0; i<width; i++)
1906 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1907 const uint8_t *src2, int width, uint32_t *unused)
1910 for (i=0; i<width; i++) {
1911 dstU[i]= src1[4*i + 0];
1912 dstV[i]= src1[4*i + 2];
1914 assert(src1 == src2);
1917 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1918 const uint8_t *src, int width)
1921 for (i = 0; i < width; i++) {
1922 dst1[i] = src[2*i+0];
1923 dst2[i] = src[2*i+1];
1927 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1928 const uint8_t *src1, const uint8_t *src2,
1929 int width, uint32_t *unused)
1931 nvXXtoUV_c(dstU, dstV, src1, width);
1934 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1935 const uint8_t *src1, const uint8_t *src2,
1936 int width, uint32_t *unused)
1938 nvXXtoUV_c(dstV, dstU, src1, width);
1941 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1943 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1944 int width, uint32_t *unused)
1947 for (i=0; i<width; i++) {
1952 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1956 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1957 const uint8_t *src2, int width, uint32_t *unused)
1960 for (i=0; i<width; i++) {
1961 int b= src1[3*i + 0];
1962 int g= src1[3*i + 1];
1963 int r= src1[3*i + 2];
1965 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1966 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1968 assert(src1 == src2);
1971 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1972 const uint8_t *src2, int width, uint32_t *unused)
1975 for (i=0; i<width; i++) {
1976 int b= src1[6*i + 0] + src1[6*i + 3];
1977 int g= src1[6*i + 1] + src1[6*i + 4];
1978 int r= src1[6*i + 2] + src1[6*i + 5];
1980 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1981 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1983 assert(src1 == src2);
1986 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1990 for (i=0; i<width; i++) {
1995 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1999 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
2000 const uint8_t *src2, int width, uint32_t *unused)
2004 for (i=0; i<width; i++) {
2005 int r= src1[3*i + 0];
2006 int g= src1[3*i + 1];
2007 int b= src1[3*i + 2];
2009 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2010 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2014 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
2015 const uint8_t *src2, int width, uint32_t *unused)
2019 for (i=0; i<width; i++) {
2020 int r= src1[6*i + 0] + src1[6*i + 3];
2021 int g= src1[6*i + 1] + src1[6*i + 4];
2022 int b= src1[6*i + 2] + src1[6*i + 5];
2024 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2025 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2029 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
2030 const int16_t *filter,
2031 const int16_t *filterPos, int filterSize)
2034 int32_t *dst = (int32_t *) _dst;
2035 const uint16_t *src = (const uint16_t *) _src;
2036 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2039 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2042 for (i = 0; i < dstW; i++) {
2044 int srcPos = filterPos[i];
2047 for (j = 0; j < filterSize; j++) {
2048 val += src[srcPos + j] * filter[filterSize * i + j];
2050 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2051 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2055 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2056 const int16_t *filter,
2057 const int16_t *filterPos, int filterSize)
2060 const uint16_t *src = (const uint16_t *) _src;
2061 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2064 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2066 for (i = 0; i < dstW; i++) {
2068 int srcPos = filterPos[i];
2071 for (j = 0; j < filterSize; j++) {
2072 val += src[srcPos + j] * filter[filterSize * i + j];
2074 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2075 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2079 // bilinear / bicubic scaling
2080 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2081 const int16_t *filter, const int16_t *filterPos,
2085 for (i=0; i<dstW; i++) {
2087 int srcPos= filterPos[i];
2089 for (j=0; j<filterSize; j++) {
2090 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2092 //filter += hFilterSize;
2093 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2098 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2099 const int16_t *filter, const int16_t *filterPos,
2103 int32_t *dst = (int32_t *) _dst;
2104 for (i=0; i<dstW; i++) {
2106 int srcPos= filterPos[i];
2108 for (j=0; j<filterSize; j++) {
2109 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2111 //filter += hFilterSize;
2112 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2117 //FIXME all pal and rgb srcFormats could do this convertion as well
2118 //FIXME all scalers more complex than bilinear could do half of this transform
2119 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2122 for (i = 0; i < width; i++) {
2123 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2124 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2127 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2130 for (i = 0; i < width; i++) {
2131 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2132 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2135 static void lumRangeToJpeg_c(int16_t *dst, int width)
2138 for (i = 0; i < width; i++)
2139 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2141 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2144 for (i = 0; i < width; i++)
2145 dst[i] = (dst[i]*14071 + 33561947)>>14;
2148 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2151 int32_t *dstU = (int32_t *) _dstU;
2152 int32_t *dstV = (int32_t *) _dstV;
2153 for (i = 0; i < width; i++) {
2154 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2155 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2158 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2161 int32_t *dstU = (int32_t *) _dstU;
2162 int32_t *dstV = (int32_t *) _dstV;
2163 for (i = 0; i < width; i++) {
2164 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2165 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2168 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2171 int32_t *dst = (int32_t *) _dst;
2172 for (i = 0; i < width; i++)
2173 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2175 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2178 int32_t *dst = (int32_t *) _dst;
2179 for (i = 0; i < width; i++)
2180 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2183 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2184 const uint8_t *src, int srcW, int xInc)
2187 unsigned int xpos=0;
2188 for (i=0;i<dstWidth;i++) {
2189 register unsigned int xx=xpos>>16;
2190 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2191 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2194 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2195 dst[i] = src[srcW-1]*128;
2198 // *** horizontal scale Y line to temp buffer
2199 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2200 const uint8_t *src, int srcW, int xInc,
2201 const int16_t *hLumFilter,
2202 const int16_t *hLumFilterPos, int hLumFilterSize,
2203 uint8_t *formatConvBuffer,
2204 uint32_t *pal, int isAlpha)
2206 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2207 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2210 toYV12(formatConvBuffer, src, srcW, pal);
2211 src= formatConvBuffer;
2215 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2216 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2217 } else if (!c->hyscale_fast) {
2218 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2219 } else { // fast bilinear upscale / crap downscale
2220 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2224 convertRange(dst, dstWidth);
2227 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2228 int dstWidth, const uint8_t *src1,
2229 const uint8_t *src2, int srcW, int xInc)
2232 unsigned int xpos=0;
2233 for (i=0;i<dstWidth;i++) {
2234 register unsigned int xx=xpos>>16;
2235 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2236 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2237 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2240 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2241 dst1[i] = src1[srcW-1]*128;
2242 dst2[i] = src2[srcW-1]*128;
2246 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2247 const uint8_t *src1, const uint8_t *src2,
2248 int srcW, int xInc, const int16_t *hChrFilter,
2249 const int16_t *hChrFilterPos, int hChrFilterSize,
2250 uint8_t *formatConvBuffer, uint32_t *pal)
2253 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2254 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2255 src1= formatConvBuffer;
2260 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2261 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2262 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2263 } else if (!c->hcscale_fast) {
2264 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2265 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2266 } else { // fast bilinear upscale / crap downscale
2267 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2270 if (c->chrConvertRange)
2271 c->chrConvertRange(dst1, dst2, dstWidth);
2274 static av_always_inline void
2275 find_c_packed_planar_out_funcs(SwsContext *c,
2276 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2277 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2278 yuv2packedX_fn *yuv2packedX)
2280 enum PixelFormat dstFormat = c->dstFormat;
2282 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2283 *yuv2yuvX = yuv2nv12X_c;
2284 } else if (is16BPS(dstFormat)) {
2285 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2286 } else if (is9_OR_10BPS(dstFormat)) {
2287 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2288 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2290 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2293 *yuv2yuv1 = yuv2yuv1_c;
2294 *yuv2yuvX = yuv2yuvX_c;
2296 if(c->flags & SWS_FULL_CHR_H_INT) {
2297 switch (dstFormat) {
2300 *yuv2packedX = yuv2rgba32_full_X_c;
2302 #if CONFIG_SWSCALE_ALPHA
2304 *yuv2packedX = yuv2rgba32_full_X_c;
2306 #endif /* CONFIG_SWSCALE_ALPHA */
2308 *yuv2packedX = yuv2rgbx32_full_X_c;
2310 #endif /* !CONFIG_SMALL */
2314 *yuv2packedX = yuv2argb32_full_X_c;
2316 #if CONFIG_SWSCALE_ALPHA
2318 *yuv2packedX = yuv2argb32_full_X_c;
2320 #endif /* CONFIG_SWSCALE_ALPHA */
2322 *yuv2packedX = yuv2xrgb32_full_X_c;
2324 #endif /* !CONFIG_SMALL */
2328 *yuv2packedX = yuv2bgra32_full_X_c;
2330 #if CONFIG_SWSCALE_ALPHA
2332 *yuv2packedX = yuv2bgra32_full_X_c;
2334 #endif /* CONFIG_SWSCALE_ALPHA */
2336 *yuv2packedX = yuv2bgrx32_full_X_c;
2338 #endif /* !CONFIG_SMALL */
2342 *yuv2packedX = yuv2abgr32_full_X_c;
2344 #if CONFIG_SWSCALE_ALPHA
2346 *yuv2packedX = yuv2abgr32_full_X_c;
2348 #endif /* CONFIG_SWSCALE_ALPHA */
2350 *yuv2packedX = yuv2xbgr32_full_X_c;
2352 #endif /* !CONFIG_SMALL */
2355 *yuv2packedX = yuv2rgb24_full_X_c;
2358 *yuv2packedX = yuv2bgr24_full_X_c;
2365 switch (dstFormat) {
2366 case PIX_FMT_GRAY16BE:
2367 *yuv2packed1 = yuv2gray16BE_1_c;
2368 *yuv2packed2 = yuv2gray16BE_2_c;
2369 *yuv2packedX = yuv2gray16BE_X_c;
2371 case PIX_FMT_GRAY16LE:
2372 *yuv2packed1 = yuv2gray16LE_1_c;
2373 *yuv2packed2 = yuv2gray16LE_2_c;
2374 *yuv2packedX = yuv2gray16LE_X_c;
2376 case PIX_FMT_MONOWHITE:
2377 *yuv2packed1 = yuv2monowhite_1_c;
2378 *yuv2packed2 = yuv2monowhite_2_c;
2379 *yuv2packedX = yuv2monowhite_X_c;
2381 case PIX_FMT_MONOBLACK:
2382 *yuv2packed1 = yuv2monoblack_1_c;
2383 *yuv2packed2 = yuv2monoblack_2_c;
2384 *yuv2packedX = yuv2monoblack_X_c;
2386 case PIX_FMT_YUYV422:
2387 *yuv2packed1 = yuv2yuyv422_1_c;
2388 *yuv2packed2 = yuv2yuyv422_2_c;
2389 *yuv2packedX = yuv2yuyv422_X_c;
2391 case PIX_FMT_UYVY422:
2392 *yuv2packed1 = yuv2uyvy422_1_c;
2393 *yuv2packed2 = yuv2uyvy422_2_c;
2394 *yuv2packedX = yuv2uyvy422_X_c;
2396 case PIX_FMT_RGB48LE:
2397 *yuv2packed1 = yuv2rgb48le_1_c;
2398 *yuv2packed2 = yuv2rgb48le_2_c;
2399 *yuv2packedX = yuv2rgb48le_X_c;
2401 case PIX_FMT_RGB48BE:
2402 *yuv2packed1 = yuv2rgb48be_1_c;
2403 *yuv2packed2 = yuv2rgb48be_2_c;
2404 *yuv2packedX = yuv2rgb48be_X_c;
2406 case PIX_FMT_BGR48LE:
2407 *yuv2packed1 = yuv2bgr48le_1_c;
2408 *yuv2packed2 = yuv2bgr48le_2_c;
2409 *yuv2packedX = yuv2bgr48le_X_c;
2411 case PIX_FMT_BGR48BE:
2412 *yuv2packed1 = yuv2bgr48be_1_c;
2413 *yuv2packed2 = yuv2bgr48be_2_c;
2414 *yuv2packedX = yuv2bgr48be_X_c;
2419 *yuv2packed1 = yuv2rgb32_1_c;
2420 *yuv2packed2 = yuv2rgb32_2_c;
2421 *yuv2packedX = yuv2rgb32_X_c;
2423 #if CONFIG_SWSCALE_ALPHA
2425 *yuv2packed1 = yuv2rgba32_1_c;
2426 *yuv2packed2 = yuv2rgba32_2_c;
2427 *yuv2packedX = yuv2rgba32_X_c;
2429 #endif /* CONFIG_SWSCALE_ALPHA */
2431 *yuv2packed1 = yuv2rgbx32_1_c;
2432 *yuv2packed2 = yuv2rgbx32_2_c;
2433 *yuv2packedX = yuv2rgbx32_X_c;
2435 #endif /* !CONFIG_SMALL */
2437 case PIX_FMT_RGB32_1:
2438 case PIX_FMT_BGR32_1:
2440 *yuv2packed1 = yuv2rgb32_1_1_c;
2441 *yuv2packed2 = yuv2rgb32_1_2_c;
2442 *yuv2packedX = yuv2rgb32_1_X_c;
2444 #if CONFIG_SWSCALE_ALPHA
2446 *yuv2packed1 = yuv2rgba32_1_1_c;
2447 *yuv2packed2 = yuv2rgba32_1_2_c;
2448 *yuv2packedX = yuv2rgba32_1_X_c;
2450 #endif /* CONFIG_SWSCALE_ALPHA */
2452 *yuv2packed1 = yuv2rgbx32_1_1_c;
2453 *yuv2packed2 = yuv2rgbx32_1_2_c;
2454 *yuv2packedX = yuv2rgbx32_1_X_c;
2456 #endif /* !CONFIG_SMALL */
2459 *yuv2packed1 = yuv2rgb24_1_c;
2460 *yuv2packed2 = yuv2rgb24_2_c;
2461 *yuv2packedX = yuv2rgb24_X_c;
2464 *yuv2packed1 = yuv2bgr24_1_c;
2465 *yuv2packed2 = yuv2bgr24_2_c;
2466 *yuv2packedX = yuv2bgr24_X_c;
2468 case PIX_FMT_RGB565LE:
2469 case PIX_FMT_RGB565BE:
2470 case PIX_FMT_BGR565LE:
2471 case PIX_FMT_BGR565BE:
2472 *yuv2packed1 = yuv2rgb16_1_c;
2473 *yuv2packed2 = yuv2rgb16_2_c;
2474 *yuv2packedX = yuv2rgb16_X_c;
2476 case PIX_FMT_RGB555LE:
2477 case PIX_FMT_RGB555BE:
2478 case PIX_FMT_BGR555LE:
2479 case PIX_FMT_BGR555BE:
2480 *yuv2packed1 = yuv2rgb15_1_c;
2481 *yuv2packed2 = yuv2rgb15_2_c;
2482 *yuv2packedX = yuv2rgb15_X_c;
2484 case PIX_FMT_RGB444LE:
2485 case PIX_FMT_RGB444BE:
2486 case PIX_FMT_BGR444LE:
2487 case PIX_FMT_BGR444BE:
2488 *yuv2packed1 = yuv2rgb12_1_c;
2489 *yuv2packed2 = yuv2rgb12_2_c;
2490 *yuv2packedX = yuv2rgb12_X_c;
2494 *yuv2packed1 = yuv2rgb8_1_c;
2495 *yuv2packed2 = yuv2rgb8_2_c;
2496 *yuv2packedX = yuv2rgb8_X_c;
2500 *yuv2packed1 = yuv2rgb4_1_c;
2501 *yuv2packed2 = yuv2rgb4_2_c;
2502 *yuv2packedX = yuv2rgb4_X_c;
2504 case PIX_FMT_RGB4_BYTE:
2505 case PIX_FMT_BGR4_BYTE:
2506 *yuv2packed1 = yuv2rgb4b_1_c;
2507 *yuv2packed2 = yuv2rgb4b_2_c;
2508 *yuv2packedX = yuv2rgb4b_X_c;
2514 #define DEBUG_SWSCALE_BUFFERS 0
2515 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2517 static int swScale(SwsContext *c, const uint8_t* src[],
2518 int srcStride[], int srcSliceY,
2519 int srcSliceH, uint8_t* dst[], int dstStride[])
2521 /* load a few things into local vars to make the code more readable? and faster */
2522 const int srcW= c->srcW;
2523 const int dstW= c->dstW;
2524 const int dstH= c->dstH;
2525 const int chrDstW= c->chrDstW;
2526 const int chrSrcW= c->chrSrcW;
2527 const int lumXInc= c->lumXInc;
2528 const int chrXInc= c->chrXInc;
2529 const enum PixelFormat dstFormat= c->dstFormat;
2530 const int flags= c->flags;
2531 int16_t *vLumFilterPos= c->vLumFilterPos;
2532 int16_t *vChrFilterPos= c->vChrFilterPos;
2533 int16_t *hLumFilterPos= c->hLumFilterPos;
2534 int16_t *hChrFilterPos= c->hChrFilterPos;
2535 int16_t *vLumFilter= c->vLumFilter;
2536 int16_t *vChrFilter= c->vChrFilter;
2537 int16_t *hLumFilter= c->hLumFilter;
2538 int16_t *hChrFilter= c->hChrFilter;
2539 int32_t *lumMmxFilter= c->lumMmxFilter;
2540 int32_t *chrMmxFilter= c->chrMmxFilter;
2541 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2542 const int vLumFilterSize= c->vLumFilterSize;
2543 const int vChrFilterSize= c->vChrFilterSize;
2544 const int hLumFilterSize= c->hLumFilterSize;
2545 const int hChrFilterSize= c->hChrFilterSize;
2546 int16_t **lumPixBuf= c->lumPixBuf;
2547 int16_t **chrUPixBuf= c->chrUPixBuf;
2548 int16_t **chrVPixBuf= c->chrVPixBuf;
2549 int16_t **alpPixBuf= c->alpPixBuf;
2550 const int vLumBufSize= c->vLumBufSize;
2551 const int vChrBufSize= c->vChrBufSize;
2552 uint8_t *formatConvBuffer= c->formatConvBuffer;
2553 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2554 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2556 uint32_t *pal=c->pal_yuv;
2558 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2559 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2560 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2561 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2562 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2563 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2565 /* vars which will change and which we need to store back in the context */
2567 int lumBufIndex= c->lumBufIndex;
2568 int chrBufIndex= c->chrBufIndex;
2569 int lastInLumBuf= c->lastInLumBuf;
2570 int lastInChrBuf= c->lastInChrBuf;
2572 if (isPacked(c->srcFormat)) {
2580 srcStride[3]= srcStride[0];
2582 srcStride[1]<<= c->vChrDrop;
2583 srcStride[2]<<= c->vChrDrop;
2585 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2586 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2587 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2588 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2589 srcSliceY, srcSliceH, dstY, dstH);
2590 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2591 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2593 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2594 static int warnedAlready=0; //FIXME move this into the context perhaps
2595 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2596 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2597 " ->cannot do aligned memory accesses anymore\n");
2602 /* Note the user might start scaling the picture in the middle so this
2603 will not get executed. This is not really intended but works
2604 currently, so people might do it. */
2605 if (srcSliceY ==0) {
2613 if (!should_dither) {
2614 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2618 for (;dstY < dstH; dstY++) {
2619 const int chrDstY= dstY>>c->chrDstVSubSample;
2620 uint8_t *dest[4] = {
2621 dst[0] + dstStride[0] * dstY,
2622 dst[1] + dstStride[1] * chrDstY,
2623 dst[2] + dstStride[2] * chrDstY,
2624 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2627 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2628 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2629 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2630 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2631 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2632 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2635 //handle holes (FAST_BILINEAR & weird filters)
2636 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2637 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2638 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2639 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2641 DEBUG_BUFFERS("dstY: %d\n", dstY);
2642 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2643 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2644 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2645 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2647 // Do we have enough lines in this slice to output the dstY line
2648 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2650 if (!enough_lines) {
2651 lastLumSrcY = srcSliceY + srcSliceH - 1;
2652 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2653 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2654 lastLumSrcY, lastChrSrcY);
2657 //Do horizontal scaling
2658 while(lastInLumBuf < lastLumSrcY) {
2659 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2660 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2662 assert(lumBufIndex < 2*vLumBufSize);
2663 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2664 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2665 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2666 hLumFilter, hLumFilterPos, hLumFilterSize,
2669 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2670 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2671 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2675 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2676 lumBufIndex, lastInLumBuf);
2678 while(lastInChrBuf < lastChrSrcY) {
2679 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2680 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2682 assert(chrBufIndex < 2*vChrBufSize);
2683 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2684 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2685 //FIXME replace parameters through context struct (some at least)
2687 if (c->needs_hcscale)
2688 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2689 chrDstW, src1, src2, chrSrcW, chrXInc,
2690 hChrFilter, hChrFilterPos, hChrFilterSize,
2691 formatConvBuffer, pal);
2693 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2694 chrBufIndex, lastInChrBuf);
2696 //wrap buf index around to stay inside the ring buffer
2697 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2698 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2700 break; //we can't output a dstY line so let's try with the next slice
2703 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2705 if (should_dither) {
2706 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2707 c->lumDither8 = dither_8x8_128[dstY & 7];
2709 if (dstY >= dstH-2) {
2710 // hmm looks like we can't use MMX here without overwriting this array's tail
2711 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2712 &yuv2packed1, &yuv2packed2,
2717 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2718 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2719 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2720 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2722 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2723 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2724 if ((dstY&chrSkipMask) || isGray(dstFormat))
2725 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2726 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2727 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2728 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2729 dest, dstW, chrDstW);
2730 } else { //General YV12
2731 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2732 lumSrcPtr, vLumFilterSize,
2733 vChrFilter + chrDstY * vChrFilterSize,
2734 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2735 alpSrcPtr, dest, dstW, chrDstW);
2738 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2739 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2740 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2741 int chrAlpha = vChrFilter[2 * dstY + 1];
2742 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2743 alpPixBuf ? *alpSrcPtr : NULL,
2744 dest[0], dstW, chrAlpha, dstY);
2745 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2746 int lumAlpha = vLumFilter[2 * dstY + 1];
2747 int chrAlpha = vChrFilter[2 * dstY + 1];
2749 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2751 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2752 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2753 alpPixBuf ? alpSrcPtr : NULL,
2754 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2755 } else { //general RGB
2756 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2757 lumSrcPtr, vLumFilterSize,
2758 vChrFilter + dstY * vChrFilterSize,
2759 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2760 alpSrcPtr, dest[0], dstW, dstY);
2766 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2767 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2770 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2771 __asm__ volatile("sfence":::"memory");
2775 /* store changed local vars back in the context */
2777 c->lumBufIndex= lumBufIndex;
2778 c->chrBufIndex= chrBufIndex;
2779 c->lastInLumBuf= lastInLumBuf;
2780 c->lastInChrBuf= lastInChrBuf;
2782 return dstY - lastDstY;
2785 static av_cold void sws_init_swScale_c(SwsContext *c)
2787 enum PixelFormat srcFormat = c->srcFormat;
2789 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2790 &c->yuv2packed1, &c->yuv2packed2,
2793 c->chrToYV12 = NULL;
2795 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2796 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2797 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2798 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2802 case PIX_FMT_BGR4_BYTE:
2803 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2805 case PIX_FMT_YUV444P9LE:
2806 case PIX_FMT_YUV420P9LE:
2807 case PIX_FMT_YUV422P10LE:
2808 case PIX_FMT_YUV420P10LE:
2809 case PIX_FMT_YUV444P10LE:
2810 case PIX_FMT_YUV420P16LE:
2811 case PIX_FMT_YUV422P16LE:
2812 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2814 case PIX_FMT_YUV444P9BE:
2815 case PIX_FMT_YUV420P9BE:
2816 case PIX_FMT_YUV444P10BE:
2817 case PIX_FMT_YUV422P10BE:
2818 case PIX_FMT_YUV420P10BE:
2819 case PIX_FMT_YUV420P16BE:
2820 case PIX_FMT_YUV422P16BE:
2821 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2824 if (c->chrSrcHSubSample) {
2826 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2827 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2828 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2829 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2830 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2831 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2832 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2833 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2834 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2835 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2836 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2837 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2838 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2839 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2840 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2841 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2842 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2843 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2847 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2848 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2849 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2850 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2851 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2852 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2853 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2854 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2855 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2856 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2857 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2858 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2859 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2860 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2861 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2862 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2863 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2864 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2868 c->lumToYV12 = NULL;
2869 c->alpToYV12 = NULL;
2870 switch (srcFormat) {
2872 case PIX_FMT_YUV444P9LE:
2873 case PIX_FMT_YUV420P9LE:
2874 case PIX_FMT_YUV422P10LE:
2875 case PIX_FMT_YUV420P10LE:
2876 case PIX_FMT_YUV444P10LE:
2877 case PIX_FMT_YUV420P16LE:
2878 case PIX_FMT_YUV422P16LE:
2879 case PIX_FMT_YUV444P16LE:
2880 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2882 case PIX_FMT_YUV444P9BE:
2883 case PIX_FMT_YUV420P9BE:
2884 case PIX_FMT_YUV444P10BE:
2885 case PIX_FMT_YUV422P10BE:
2886 case PIX_FMT_YUV420P10BE:
2887 case PIX_FMT_YUV420P16BE:
2888 case PIX_FMT_YUV422P16BE:
2889 case PIX_FMT_YUV444P16BE:
2890 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2892 case PIX_FMT_YUYV422 :
2893 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2894 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2895 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2896 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2897 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2898 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2899 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2900 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2901 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2902 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2903 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2904 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2908 case PIX_FMT_BGR4_BYTE:
2909 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2910 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2911 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2912 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2913 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2914 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2915 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2916 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2917 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2918 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2919 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2922 switch (srcFormat) {
2924 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2926 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2927 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2928 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2933 if (c->srcBpc == 8) {
2934 if (c->dstBpc <= 10) {
2935 c->hyScale = c->hcScale = hScale8To15_c;
2936 if (c->flags & SWS_FAST_BILINEAR) {
2937 c->hyscale_fast = hyscale_fast_c;
2938 c->hcscale_fast = hcscale_fast_c;
2941 c->hyScale = c->hcScale = hScale8To19_c;
2944 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2947 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2948 if (c->dstBpc <= 10) {
2950 c->lumConvertRange = lumRangeFromJpeg_c;
2951 c->chrConvertRange = chrRangeFromJpeg_c;
2953 c->lumConvertRange = lumRangeToJpeg_c;
2954 c->chrConvertRange = chrRangeToJpeg_c;
2958 c->lumConvertRange = lumRangeFromJpeg16_c;
2959 c->chrConvertRange = chrRangeFromJpeg16_c;
2961 c->lumConvertRange = lumRangeToJpeg16_c;
2962 c->chrConvertRange = chrRangeToJpeg16_c;
2967 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2968 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2969 c->needs_hcscale = 1;
2972 SwsFunc ff_getSwsFunc(SwsContext *c)
2974 sws_init_swScale_c(c);
2977 ff_sws_init_swScale_mmx(c);
2979 ff_sws_init_swScale_altivec(c);