2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 #define output_pixel(pos, val, bias, signedness) \
295 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
297 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
300 static av_always_inline void
301 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
302 int big_endian, int output_bits)
306 av_assert0(output_bits == 16);
308 for (i = 0; i < dstW; i++) {
309 int val = src[i] + (1 << (shift - 1));
310 output_pixel(&dest[i], val, 0, uint);
314 static av_always_inline void
315 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
316 const int32_t **src, uint16_t *dest, int dstW,
317 int big_endian, int output_bits)
321 av_assert0(output_bits == 16);
323 for (i = 0; i < dstW; i++) {
324 int val = 1 << (shift - 1);
327 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
328 * filters (or anything with negative coeffs, the range can be slightly
329 * wider in both directions. To account for this overflow, we subtract
330 * a constant so it always fits in the signed range (assuming a
331 * reasonable filterSize), and re-add that at the end. */
333 for (j = 0; j < filterSize; j++)
334 val += src[j][i] * filter[j];
336 output_pixel(&dest[i], val, 0x8000, int);
342 #define output_pixel(pos, val) \
344 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
346 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
349 static av_always_inline void
350 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
351 int big_endian, int output_bits)
354 int shift = 15 - output_bits;
356 for (i = 0; i < dstW; i++) {
357 int val = src[i] + (1 << (shift - 1));
358 output_pixel(&dest[i], val);
362 static av_always_inline void
363 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
364 const int16_t **src, uint16_t *dest, int dstW,
365 int big_endian, int output_bits)
368 int shift = 11 + 16 - output_bits;
370 for (i = 0; i < dstW; i++) {
371 int val = 1 << (shift - 1);
374 for (j = 0; j < filterSize; j++)
375 val += src[j][i] * filter[j];
377 output_pixel(&dest[i], val);
383 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
384 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
385 uint8_t *dest, int dstW, \
386 const uint8_t *dither, int offset)\
388 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
389 (uint16_t *) dest, dstW, is_be, bits); \
391 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
392 const int16_t **src, uint8_t *dest, int dstW, \
393 const uint8_t *dither, int offset)\
395 yuv2planeX_## template_size ## _c_template(filter, \
396 filterSize, (const typeX_t **) src, \
397 (uint16_t *) dest, dstW, is_be, bits); \
399 yuv2NBPS( 9, BE, 1, 10, int16_t);
400 yuv2NBPS( 9, LE, 0, 10, int16_t);
401 yuv2NBPS(10, BE, 1, 10, int16_t);
402 yuv2NBPS(10, LE, 0, 10, int16_t);
403 yuv2NBPS(16, BE, 1, 16, int32_t);
404 yuv2NBPS(16, LE, 0, 16, int32_t);
406 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
407 const int16_t **src, uint8_t *dest, int dstW,
408 const uint8_t *dither, int offset)
411 for (i=0; i<dstW; i++) {
412 int val = dither[(i + offset) & 7] << 12;
414 for (j=0; j<filterSize; j++)
415 val += src[j][i] * filter[j];
417 dest[i]= av_clip_uint8(val>>19);
421 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
422 const uint8_t *dither, int offset)
425 for (i=0; i<dstW; i++) {
426 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
427 dest[i]= av_clip_uint8(val);
431 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
432 const int16_t **chrUSrc, const int16_t **chrVSrc,
433 uint8_t *dest, int chrDstW)
435 enum PixelFormat dstFormat = c->dstFormat;
436 const uint8_t *chrDither = c->chrDither8;
439 if (dstFormat == PIX_FMT_NV12)
440 for (i=0; i<chrDstW; i++) {
441 int u = chrDither[i & 7] << 12;
442 int v = chrDither[(i + 3) & 7] << 12;
444 for (j=0; j<chrFilterSize; j++) {
445 u += chrUSrc[j][i] * chrFilter[j];
446 v += chrVSrc[j][i] * chrFilter[j];
449 dest[2*i]= av_clip_uint8(u>>19);
450 dest[2*i+1]= av_clip_uint8(v>>19);
453 for (i=0; i<chrDstW; i++) {
454 int u = chrDither[i & 7] << 12;
455 int v = chrDither[(i + 3) & 7] << 12;
457 for (j=0; j<chrFilterSize; j++) {
458 u += chrUSrc[j][i] * chrFilter[j];
459 v += chrVSrc[j][i] * chrFilter[j];
462 dest[2*i]= av_clip_uint8(v>>19);
463 dest[2*i+1]= av_clip_uint8(u>>19);
467 #define output_pixel(pos, val) \
468 if (target == PIX_FMT_GRAY16BE) { \
474 static av_always_inline void
475 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
476 const int32_t **lumSrc, int lumFilterSize,
477 const int16_t *chrFilter, const int32_t **chrUSrc,
478 const int32_t **chrVSrc, int chrFilterSize,
479 const int32_t **alpSrc, uint16_t *dest, int dstW,
480 int y, enum PixelFormat target)
484 for (i = 0; i < (dstW >> 1); i++) {
489 for (j = 0; j < lumFilterSize; j++) {
490 Y1 += lumSrc[j][i * 2] * lumFilter[j];
491 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
495 if ((Y1 | Y2) & 0x10000) {
496 Y1 = av_clip_uint16(Y1);
497 Y2 = av_clip_uint16(Y2);
499 output_pixel(&dest[i * 2 + 0], Y1);
500 output_pixel(&dest[i * 2 + 1], Y2);
504 static av_always_inline void
505 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
506 const int32_t *ubuf[2], const int32_t *vbuf[2],
507 const int32_t *abuf[2], uint16_t *dest, int dstW,
508 int yalpha, int uvalpha, int y,
509 enum PixelFormat target)
511 int yalpha1 = 4095 - yalpha;
513 const int32_t *buf0 = buf[0], *buf1 = buf[1];
515 for (i = 0; i < (dstW >> 1); i++) {
516 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
517 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
519 output_pixel(&dest[i * 2 + 0], Y1);
520 output_pixel(&dest[i * 2 + 1], Y2);
524 static av_always_inline void
525 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
526 const int32_t *ubuf[2], const int32_t *vbuf[2],
527 const int32_t *abuf0, uint16_t *dest, int dstW,
528 int uvalpha, int y, enum PixelFormat target)
532 for (i = 0; i < (dstW >> 1); i++) {
533 int Y1 = (buf0[i * 2 ]+4)>>3;
534 int Y2 = (buf0[i * 2 + 1]+4)>>3;
536 output_pixel(&dest[i * 2 + 0], Y1);
537 output_pixel(&dest[i * 2 + 1], Y2);
543 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
544 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
545 const int16_t **_lumSrc, int lumFilterSize, \
546 const int16_t *chrFilter, const int16_t **_chrUSrc, \
547 const int16_t **_chrVSrc, int chrFilterSize, \
548 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
551 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
552 **chrUSrc = (const int32_t **) _chrUSrc, \
553 **chrVSrc = (const int32_t **) _chrVSrc, \
554 **alpSrc = (const int32_t **) _alpSrc; \
555 uint16_t *dest = (uint16_t *) _dest; \
556 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
557 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
558 alpSrc, dest, dstW, y, fmt); \
561 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
562 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
563 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
564 int yalpha, int uvalpha, int y) \
566 const int32_t **buf = (const int32_t **) _buf, \
567 **ubuf = (const int32_t **) _ubuf, \
568 **vbuf = (const int32_t **) _vbuf, \
569 **abuf = (const int32_t **) _abuf; \
570 uint16_t *dest = (uint16_t *) _dest; \
571 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
572 dest, dstW, yalpha, uvalpha, y, fmt); \
575 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
576 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
577 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
578 int uvalpha, int y) \
580 const int32_t *buf0 = (const int32_t *) _buf0, \
581 **ubuf = (const int32_t **) _ubuf, \
582 **vbuf = (const int32_t **) _vbuf, \
583 *abuf0 = (const int32_t *) _abuf0; \
584 uint16_t *dest = (uint16_t *) _dest; \
585 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
586 dstW, uvalpha, y, fmt); \
589 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
590 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
592 #define output_pixel(pos, acc) \
593 if (target == PIX_FMT_MONOBLACK) { \
599 static av_always_inline void
600 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
601 const int16_t **lumSrc, int lumFilterSize,
602 const int16_t *chrFilter, const int16_t **chrUSrc,
603 const int16_t **chrVSrc, int chrFilterSize,
604 const int16_t **alpSrc, uint8_t *dest, int dstW,
605 int y, enum PixelFormat target)
607 const uint8_t * const d128=dither_8x8_220[y&7];
608 uint8_t *g = c->table_gU[128] + c->table_gV[128];
612 for (i = 0; i < dstW - 1; i += 2) {
617 for (j = 0; j < lumFilterSize; j++) {
618 Y1 += lumSrc[j][i] * lumFilter[j];
619 Y2 += lumSrc[j][i+1] * lumFilter[j];
623 if ((Y1 | Y2) & 0x100) {
624 Y1 = av_clip_uint8(Y1);
625 Y2 = av_clip_uint8(Y2);
627 acc += acc + g[Y1 + d128[(i + 0) & 7]];
628 acc += acc + g[Y2 + d128[(i + 1) & 7]];
630 output_pixel(*dest++, acc);
635 static av_always_inline void
636 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
637 const int16_t *ubuf[2], const int16_t *vbuf[2],
638 const int16_t *abuf[2], uint8_t *dest, int dstW,
639 int yalpha, int uvalpha, int y,
640 enum PixelFormat target)
642 const int16_t *buf0 = buf[0], *buf1 = buf[1];
643 const uint8_t * const d128 = dither_8x8_220[y & 7];
644 uint8_t *g = c->table_gU[128] + c->table_gV[128];
645 int yalpha1 = 4095 - yalpha;
648 for (i = 0; i < dstW - 7; i += 8) {
649 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
650 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
651 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
652 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
653 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
654 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
655 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
656 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
657 output_pixel(*dest++, acc);
661 static av_always_inline void
662 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
663 const int16_t *ubuf[2], const int16_t *vbuf[2],
664 const int16_t *abuf0, uint8_t *dest, int dstW,
665 int uvalpha, int y, enum PixelFormat target)
667 const uint8_t * const d128 = dither_8x8_220[y & 7];
668 uint8_t *g = c->table_gU[128] + c->table_gV[128];
671 for (i = 0; i < dstW - 7; i += 8) {
672 int acc = g[(buf0[i ] >> 7) + d128[0]];
673 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
674 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
675 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
676 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
677 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
678 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
679 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
680 output_pixel(*dest++, acc);
686 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
687 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
688 const int16_t **lumSrc, int lumFilterSize, \
689 const int16_t *chrFilter, const int16_t **chrUSrc, \
690 const int16_t **chrVSrc, int chrFilterSize, \
691 const int16_t **alpSrc, uint8_t *dest, int dstW, \
694 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
695 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
696 alpSrc, dest, dstW, y, fmt); \
699 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
700 const int16_t *ubuf[2], const int16_t *vbuf[2], \
701 const int16_t *abuf[2], uint8_t *dest, int dstW, \
702 int yalpha, int uvalpha, int y) \
704 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
705 dest, dstW, yalpha, uvalpha, y, fmt); \
708 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
709 const int16_t *ubuf[2], const int16_t *vbuf[2], \
710 const int16_t *abuf0, uint8_t *dest, int dstW, \
711 int uvalpha, int y) \
713 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
714 abuf0, dest, dstW, uvalpha, \
718 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
719 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
721 #define output_pixels(pos, Y1, U, Y2, V) \
722 if (target == PIX_FMT_YUYV422) { \
723 dest[pos + 0] = Y1; \
725 dest[pos + 2] = Y2; \
729 dest[pos + 1] = Y1; \
731 dest[pos + 3] = Y2; \
734 static av_always_inline void
735 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
736 const int16_t **lumSrc, int lumFilterSize,
737 const int16_t *chrFilter, const int16_t **chrUSrc,
738 const int16_t **chrVSrc, int chrFilterSize,
739 const int16_t **alpSrc, uint8_t *dest, int dstW,
740 int y, enum PixelFormat target)
744 for (i = 0; i < (dstW >> 1); i++) {
751 for (j = 0; j < lumFilterSize; j++) {
752 Y1 += lumSrc[j][i * 2] * lumFilter[j];
753 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
755 for (j = 0; j < chrFilterSize; j++) {
756 U += chrUSrc[j][i] * chrFilter[j];
757 V += chrVSrc[j][i] * chrFilter[j];
763 if ((Y1 | Y2 | U | V) & 0x100) {
764 Y1 = av_clip_uint8(Y1);
765 Y2 = av_clip_uint8(Y2);
766 U = av_clip_uint8(U);
767 V = av_clip_uint8(V);
769 output_pixels(4*i, Y1, U, Y2, V);
773 static av_always_inline void
774 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
775 const int16_t *ubuf[2], const int16_t *vbuf[2],
776 const int16_t *abuf[2], uint8_t *dest, int dstW,
777 int yalpha, int uvalpha, int y,
778 enum PixelFormat target)
780 const int16_t *buf0 = buf[0], *buf1 = buf[1],
781 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
782 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
783 int yalpha1 = 4095 - yalpha;
784 int uvalpha1 = 4095 - uvalpha;
787 for (i = 0; i < (dstW >> 1); i++) {
788 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
789 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
790 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
791 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
793 output_pixels(i * 4, Y1, U, Y2, V);
797 static av_always_inline void
798 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
799 const int16_t *ubuf[2], const int16_t *vbuf[2],
800 const int16_t *abuf0, uint8_t *dest, int dstW,
801 int uvalpha, int y, enum PixelFormat target)
803 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
804 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
807 if (uvalpha < 2048) {
808 for (i = 0; i < (dstW >> 1); i++) {
809 int Y1 = buf0[i * 2] >> 7;
810 int Y2 = buf0[i * 2 + 1] >> 7;
811 int U = ubuf1[i] >> 7;
812 int V = vbuf1[i] >> 7;
814 output_pixels(i * 4, Y1, U, Y2, V);
817 for (i = 0; i < (dstW >> 1); i++) {
818 int Y1 = buf0[i * 2] >> 7;
819 int Y2 = buf0[i * 2 + 1] >> 7;
820 int U = (ubuf0[i] + ubuf1[i]) >> 8;
821 int V = (vbuf0[i] + vbuf1[i]) >> 8;
823 output_pixels(i * 4, Y1, U, Y2, V);
830 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
831 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
833 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
834 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
835 #define output_pixel(pos, val) \
836 if (isBE(target)) { \
842 static av_always_inline void
843 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
844 const int32_t **lumSrc, int lumFilterSize,
845 const int16_t *chrFilter, const int32_t **chrUSrc,
846 const int32_t **chrVSrc, int chrFilterSize,
847 const int32_t **alpSrc, uint16_t *dest, int dstW,
848 int y, enum PixelFormat target)
852 for (i = 0; i < (dstW >> 1); i++) {
856 int U = -128 << 23; // 19
860 for (j = 0; j < lumFilterSize; j++) {
861 Y1 += lumSrc[j][i * 2] * lumFilter[j];
862 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
864 for (j = 0; j < chrFilterSize; j++) {
865 U += chrUSrc[j][i] * chrFilter[j];
866 V += chrVSrc[j][i] * chrFilter[j];
869 // 8bit: 12+15=27; 16-bit: 12+19=31
875 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
876 Y1 -= c->yuv2rgb_y_offset;
877 Y2 -= c->yuv2rgb_y_offset;
878 Y1 *= c->yuv2rgb_y_coeff;
879 Y2 *= c->yuv2rgb_y_coeff;
882 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
884 R = V * c->yuv2rgb_v2r_coeff;
885 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
886 B = U * c->yuv2rgb_u2b_coeff;
888 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
889 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
890 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
891 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
892 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
893 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
894 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
899 static av_always_inline void
900 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
901 const int32_t *ubuf[2], const int32_t *vbuf[2],
902 const int32_t *abuf[2], uint16_t *dest, int dstW,
903 int yalpha, int uvalpha, int y,
904 enum PixelFormat target)
906 const int32_t *buf0 = buf[0], *buf1 = buf[1],
907 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
908 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
909 int yalpha1 = 4095 - yalpha;
910 int uvalpha1 = 4095 - uvalpha;
913 for (i = 0; i < (dstW >> 1); i++) {
914 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
915 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
916 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
917 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
920 Y1 -= c->yuv2rgb_y_offset;
921 Y2 -= c->yuv2rgb_y_offset;
922 Y1 *= c->yuv2rgb_y_coeff;
923 Y2 *= c->yuv2rgb_y_coeff;
927 R = V * c->yuv2rgb_v2r_coeff;
928 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
929 B = U * c->yuv2rgb_u2b_coeff;
931 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
932 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
933 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
934 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
935 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
936 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
941 static av_always_inline void
942 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
943 const int32_t *ubuf[2], const int32_t *vbuf[2],
944 const int32_t *abuf0, uint16_t *dest, int dstW,
945 int uvalpha, int y, enum PixelFormat target)
947 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
948 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
951 if (uvalpha < 2048) {
952 for (i = 0; i < (dstW >> 1); i++) {
953 int Y1 = (buf0[i * 2] ) >> 2;
954 int Y2 = (buf0[i * 2 + 1]) >> 2;
955 int U = (ubuf0[i] + (-128 << 11)) >> 2;
956 int V = (vbuf0[i] + (-128 << 11)) >> 2;
959 Y1 -= c->yuv2rgb_y_offset;
960 Y2 -= c->yuv2rgb_y_offset;
961 Y1 *= c->yuv2rgb_y_coeff;
962 Y2 *= c->yuv2rgb_y_coeff;
966 R = V * c->yuv2rgb_v2r_coeff;
967 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
968 B = U * c->yuv2rgb_u2b_coeff;
970 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
971 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
972 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
973 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
974 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
975 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
979 for (i = 0; i < (dstW >> 1); i++) {
980 int Y1 = (buf0[i * 2] ) >> 2;
981 int Y2 = (buf0[i * 2 + 1]) >> 2;
982 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
983 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
986 Y1 -= c->yuv2rgb_y_offset;
987 Y2 -= c->yuv2rgb_y_offset;
988 Y1 *= c->yuv2rgb_y_coeff;
989 Y2 *= c->yuv2rgb_y_coeff;
993 R = V * c->yuv2rgb_v2r_coeff;
994 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
995 B = U * c->yuv2rgb_u2b_coeff;
997 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
998 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
999 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1000 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1001 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1002 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1012 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1013 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1014 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1015 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1017 static av_always_inline void
1018 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1019 int U, int V, int A1, int A2,
1020 const void *_r, const void *_g, const void *_b, int y,
1021 enum PixelFormat target, int hasAlpha)
1023 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1024 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1025 uint32_t *dest = (uint32_t *) _dest;
1026 const uint32_t *r = (const uint32_t *) _r;
1027 const uint32_t *g = (const uint32_t *) _g;
1028 const uint32_t *b = (const uint32_t *) _b;
1031 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1033 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1034 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1037 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1039 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1040 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1042 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1043 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1046 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1047 uint8_t *dest = (uint8_t *) _dest;
1048 const uint8_t *r = (const uint8_t *) _r;
1049 const uint8_t *g = (const uint8_t *) _g;
1050 const uint8_t *b = (const uint8_t *) _b;
1052 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1053 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1055 dest[i * 6 + 0] = r_b[Y1];
1056 dest[i * 6 + 1] = g[Y1];
1057 dest[i * 6 + 2] = b_r[Y1];
1058 dest[i * 6 + 3] = r_b[Y2];
1059 dest[i * 6 + 4] = g[Y2];
1060 dest[i * 6 + 5] = b_r[Y2];
1063 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1064 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1065 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1066 uint16_t *dest = (uint16_t *) _dest;
1067 const uint16_t *r = (const uint16_t *) _r;
1068 const uint16_t *g = (const uint16_t *) _g;
1069 const uint16_t *b = (const uint16_t *) _b;
1070 int dr1, dg1, db1, dr2, dg2, db2;
1072 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1073 dr1 = dither_2x2_8[ y & 1 ][0];
1074 dg1 = dither_2x2_4[ y & 1 ][0];
1075 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1076 dr2 = dither_2x2_8[ y & 1 ][1];
1077 dg2 = dither_2x2_4[ y & 1 ][1];
1078 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1079 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1080 dr1 = dither_2x2_8[ y & 1 ][0];
1081 dg1 = dither_2x2_8[ y & 1 ][1];
1082 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1083 dr2 = dither_2x2_8[ y & 1 ][1];
1084 dg2 = dither_2x2_8[ y & 1 ][0];
1085 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1087 dr1 = dither_4x4_16[ y & 3 ][0];
1088 dg1 = dither_4x4_16[ y & 3 ][1];
1089 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1090 dr2 = dither_4x4_16[ y & 3 ][1];
1091 dg2 = dither_4x4_16[ y & 3 ][0];
1092 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1095 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1096 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1097 } else /* 8/4-bit */ {
1098 uint8_t *dest = (uint8_t *) _dest;
1099 const uint8_t *r = (const uint8_t *) _r;
1100 const uint8_t *g = (const uint8_t *) _g;
1101 const uint8_t *b = (const uint8_t *) _b;
1102 int dr1, dg1, db1, dr2, dg2, db2;
1104 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1105 const uint8_t * const d64 = dither_8x8_73[y & 7];
1106 const uint8_t * const d32 = dither_8x8_32[y & 7];
1107 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1108 db1 = d64[(i * 2 + 0) & 7];
1109 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1110 db2 = d64[(i * 2 + 1) & 7];
1112 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1113 const uint8_t * const d128 = dither_8x8_220[y & 7];
1114 dr1 = db1 = d128[(i * 2 + 0) & 7];
1115 dg1 = d64[(i * 2 + 0) & 7];
1116 dr2 = db2 = d128[(i * 2 + 1) & 7];
1117 dg2 = d64[(i * 2 + 1) & 7];
1120 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1121 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1122 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1124 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1125 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1130 static av_always_inline void
1131 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1132 const int16_t **lumSrc, int lumFilterSize,
1133 const int16_t *chrFilter, const int16_t **chrUSrc,
1134 const int16_t **chrVSrc, int chrFilterSize,
1135 const int16_t **alpSrc, uint8_t *dest, int dstW,
1136 int y, enum PixelFormat target, int hasAlpha)
1140 for (i = 0; i < (dstW >> 1); i++) {
1146 int av_unused A1, A2;
1147 const void *r, *g, *b;
1149 for (j = 0; j < lumFilterSize; j++) {
1150 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1151 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1153 for (j = 0; j < chrFilterSize; j++) {
1154 U += chrUSrc[j][i] * chrFilter[j];
1155 V += chrVSrc[j][i] * chrFilter[j];
1161 if ((Y1 | Y2 | U | V) & 0x100) {
1162 Y1 = av_clip_uint8(Y1);
1163 Y2 = av_clip_uint8(Y2);
1164 U = av_clip_uint8(U);
1165 V = av_clip_uint8(V);
1170 for (j = 0; j < lumFilterSize; j++) {
1171 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1172 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1176 if ((A1 | A2) & 0x100) {
1177 A1 = av_clip_uint8(A1);
1178 A2 = av_clip_uint8(A2);
1182 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1184 g = (c->table_gU[U] + c->table_gV[V]);
1187 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1188 r, g, b, y, target, hasAlpha);
1192 static av_always_inline void
1193 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1194 const int16_t *ubuf[2], const int16_t *vbuf[2],
1195 const int16_t *abuf[2], uint8_t *dest, int dstW,
1196 int yalpha, int uvalpha, int y,
1197 enum PixelFormat target, int hasAlpha)
1199 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1200 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1201 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1202 *abuf0 = hasAlpha ? abuf[0] : NULL,
1203 *abuf1 = hasAlpha ? abuf[1] : NULL;
1204 int yalpha1 = 4095 - yalpha;
1205 int uvalpha1 = 4095 - uvalpha;
1208 for (i = 0; i < (dstW >> 1); i++) {
1209 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1210 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1211 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1212 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1214 const void *r = c->table_rV[V],
1215 *g = (c->table_gU[U] + c->table_gV[V]),
1216 *b = c->table_bU[U];
1219 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1220 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1223 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1224 r, g, b, y, target, hasAlpha);
1228 static av_always_inline void
1229 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1230 const int16_t *ubuf[2], const int16_t *vbuf[2],
1231 const int16_t *abuf0, uint8_t *dest, int dstW,
1232 int uvalpha, int y, enum PixelFormat target,
1235 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1236 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1239 if (uvalpha < 2048) {
1240 for (i = 0; i < (dstW >> 1); i++) {
1241 int Y1 = buf0[i * 2] >> 7;
1242 int Y2 = buf0[i * 2 + 1] >> 7;
1243 int U = ubuf1[i] >> 7;
1244 int V = vbuf1[i] >> 7;
1246 const void *r = c->table_rV[V],
1247 *g = (c->table_gU[U] + c->table_gV[V]),
1248 *b = c->table_bU[U];
1251 A1 = abuf0[i * 2 ] >> 7;
1252 A2 = abuf0[i * 2 + 1] >> 7;
1255 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1256 r, g, b, y, target, hasAlpha);
1259 for (i = 0; i < (dstW >> 1); i++) {
1260 int Y1 = buf0[i * 2] >> 7;
1261 int Y2 = buf0[i * 2 + 1] >> 7;
1262 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1263 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1265 const void *r = c->table_rV[V],
1266 *g = (c->table_gU[U] + c->table_gV[V]),
1267 *b = c->table_bU[U];
1270 A1 = abuf0[i * 2 ] >> 7;
1271 A2 = abuf0[i * 2 + 1] >> 7;
1274 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1275 r, g, b, y, target, hasAlpha);
1280 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1281 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1282 const int16_t **lumSrc, int lumFilterSize, \
1283 const int16_t *chrFilter, const int16_t **chrUSrc, \
1284 const int16_t **chrVSrc, int chrFilterSize, \
1285 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1288 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1289 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1290 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1292 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1293 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1294 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1295 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1296 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1297 int yalpha, int uvalpha, int y) \
1299 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1300 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1303 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1304 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1305 const int16_t *abuf0, uint8_t *dest, int dstW, \
1306 int uvalpha, int y) \
1308 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1309 dstW, uvalpha, y, fmt, hasAlpha); \
1313 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1314 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1316 #if CONFIG_SWSCALE_ALPHA
1317 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1318 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1320 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1321 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1323 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1324 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1325 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1326 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1327 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1328 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1329 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1330 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1332 static av_always_inline void
1333 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1334 const int16_t **lumSrc, int lumFilterSize,
1335 const int16_t *chrFilter, const int16_t **chrUSrc,
1336 const int16_t **chrVSrc, int chrFilterSize,
1337 const int16_t **alpSrc, uint8_t *dest,
1338 int dstW, int y, enum PixelFormat target, int hasAlpha)
1341 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1343 for (i = 0; i < dstW; i++) {
1346 int U = (1<<9)-(128 << 19);
1347 int V = (1<<9)-(128 << 19);
1351 for (j = 0; j < lumFilterSize; j++) {
1352 Y += lumSrc[j][i] * lumFilter[j];
1354 for (j = 0; j < chrFilterSize; j++) {
1355 U += chrUSrc[j][i] * chrFilter[j];
1356 V += chrVSrc[j][i] * chrFilter[j];
1363 for (j = 0; j < lumFilterSize; j++) {
1364 A += alpSrc[j][i] * lumFilter[j];
1368 A = av_clip_uint8(A);
1370 Y -= c->yuv2rgb_y_offset;
1371 Y *= c->yuv2rgb_y_coeff;
1373 R = Y + V*c->yuv2rgb_v2r_coeff;
1374 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1375 B = Y + U*c->yuv2rgb_u2b_coeff;
1376 if ((R | G | B) & 0xC0000000) {
1377 R = av_clip_uintp2(R, 30);
1378 G = av_clip_uintp2(G, 30);
1379 B = av_clip_uintp2(B, 30);
1384 dest[0] = hasAlpha ? A : 255;
1398 dest[3] = hasAlpha ? A : 255;
1401 dest[0] = hasAlpha ? A : 255;
1415 dest[3] = hasAlpha ? A : 255;
1423 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1424 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1425 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1426 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1428 #if CONFIG_SWSCALE_ALPHA
1429 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1430 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1431 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1432 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1434 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1435 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1436 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1437 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1439 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1440 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1442 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1443 int width, int height,
1447 uint8_t *ptr = plane + stride*y;
1448 for (i=0; i<height; i++) {
1449 memset(ptr, val, width);
1454 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1456 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1457 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1459 static av_always_inline void
1460 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1461 enum PixelFormat origin)
1464 for (i = 0; i < width; i++) {
1465 unsigned int r_b = input_pixel(&src[i*3+0]);
1466 unsigned int g = input_pixel(&src[i*3+1]);
1467 unsigned int b_r = input_pixel(&src[i*3+2]);
1469 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1473 static av_always_inline void
1474 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1475 const uint16_t *src1, const uint16_t *src2,
1476 int width, enum PixelFormat origin)
1480 for (i = 0; i < width; i++) {
1481 int r_b = input_pixel(&src1[i*3+0]);
1482 int g = input_pixel(&src1[i*3+1]);
1483 int b_r = input_pixel(&src1[i*3+2]);
1485 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1486 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1490 static av_always_inline void
1491 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1492 const uint16_t *src1, const uint16_t *src2,
1493 int width, enum PixelFormat origin)
1497 for (i = 0; i < width; i++) {
1498 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1499 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1500 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1502 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1503 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1511 #define rgb48funcs(pattern, BE_LE, origin) \
1512 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
1513 int width, uint32_t *unused) \
1515 const uint16_t *src = (const uint16_t *) _src; \
1516 uint16_t *dst = (uint16_t *) _dst; \
1517 rgb48ToY_c_template(dst, src, width, origin); \
1520 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1521 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1522 int width, uint32_t *unused) \
1524 const uint16_t *src1 = (const uint16_t *) _src1, \
1525 *src2 = (const uint16_t *) _src2; \
1526 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1527 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1530 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1531 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1532 int width, uint32_t *unused) \
1534 const uint16_t *src1 = (const uint16_t *) _src1, \
1535 *src2 = (const uint16_t *) _src2; \
1536 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1537 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1540 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1541 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1542 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1543 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1545 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1546 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1547 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1549 static av_always_inline void
1550 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1551 int width, enum PixelFormat origin,
1552 int shr, int shg, int shb, int shp,
1553 int maskr, int maskg, int maskb,
1554 int rsh, int gsh, int bsh, int S)
1556 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1557 rnd = (32<<((S)-1)) + (1<<(S-7));
1560 for (i = 0; i < width; i++) {
1561 int px = input_pixel(i) >> shp;
1562 int b = (px & maskb) >> shb;
1563 int g = (px & maskg) >> shg;
1564 int r = (px & maskr) >> shr;
1566 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1570 static av_always_inline void
1571 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1572 const uint8_t *src, int width,
1573 enum PixelFormat origin,
1574 int shr, int shg, int shb, int shp,
1575 int maskr, int maskg, int maskb,
1576 int rsh, int gsh, int bsh, int S)
1578 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1579 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1580 rnd = (256<<((S)-1)) + (1<<(S-7));
1583 for (i = 0; i < width; i++) {
1584 int px = input_pixel(i) >> shp;
1585 int b = (px & maskb) >> shb;
1586 int g = (px & maskg) >> shg;
1587 int r = (px & maskr) >> shr;
1589 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1590 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1594 static av_always_inline void
1595 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1596 const uint8_t *src, int width,
1597 enum PixelFormat origin,
1598 int shr, int shg, int shb, int shp,
1599 int maskr, int maskg, int maskb,
1600 int rsh, int gsh, int bsh, int S)
1602 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1603 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1604 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1607 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1608 for (i = 0; i < width; i++) {
1609 int px0 = input_pixel(2 * i + 0) >> shp;
1610 int px1 = input_pixel(2 * i + 1) >> shp;
1611 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1612 int rb = px0 + px1 - g;
1614 b = (rb & maskb) >> shb;
1615 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1616 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1619 g = (g & maskg) >> shg;
1621 r = (rb & maskr) >> shr;
1623 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1624 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1630 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1631 maskg, maskb, rsh, gsh, bsh, S) \
1632 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
1633 int width, uint32_t *unused) \
1635 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
1636 shr, shg, shb, shp, \
1637 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1640 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1641 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1642 int width, uint32_t *unused) \
1644 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1645 shr, shg, shb, shp, \
1646 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1649 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1650 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1651 int width, uint32_t *unused) \
1653 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1654 shr, shg, shb, shp, \
1655 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1658 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1659 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1660 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1661 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1662 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1663 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1664 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1665 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1666 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1667 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1668 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1669 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1671 static void gbr24pToY_c(uint16_t *dst, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1672 int width, uint32_t *unused)
1675 for (i = 0; i < width; i++) {
1676 unsigned int g = gsrc[i];
1677 unsigned int b = bsrc[i];
1678 unsigned int r = rsrc[i];
1680 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1684 static void gbr24pToUV_c(uint16_t *dstU, uint16_t *dstV,
1685 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1686 int width, enum PixelFormat origin)
1689 for (i = 0; i < width; i++) {
1690 unsigned int g = gsrc[i];
1691 unsigned int b = bsrc[i];
1692 unsigned int r = rsrc[i];
1694 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1695 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1699 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
1700 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1701 int width, enum PixelFormat origin)
1704 for (i = 0; i < width; i++) {
1705 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
1706 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
1707 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
1709 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1710 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1714 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1717 for (i=0; i<width; i++) {
1718 dst[i]= src[4*i]<<6;
1722 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1725 for (i=0; i<width; i++) {
1726 dst[i]= src[4*i+3]<<6;
1730 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
1733 for (i=0; i<width; i++) {
1736 dst[i]= (pal[d] >> 24)<<6;
1740 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
1743 for (i=0; i<width; i++) {
1746 dst[i]= (pal[d] & 0xFF)<<6;
1750 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1751 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1752 int width, uint32_t *pal)
1755 assert(src1 == src2);
1756 for (i=0; i<width; i++) {
1757 int p= pal[src1[i]];
1759 dstU[i]= (uint8_t)(p>> 8)<<6;
1760 dstV[i]= (uint8_t)(p>>16)<<6;
1764 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1767 for (i=0; i<width/8; i++) {
1770 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1774 for(j=0; j<(width&7); j++)
1775 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1779 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1782 for (i=0; i<width/8; i++) {
1785 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1789 for(j=0; j<(width&7); j++)
1790 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1794 //FIXME yuy2* can read up to 7 samples too much
1796 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1800 for (i=0; i<width; i++)
1804 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1805 const uint8_t *src2, int width, uint32_t *unused)
1808 for (i=0; i<width; i++) {
1809 dstU[i]= src1[4*i + 1];
1810 dstV[i]= src1[4*i + 3];
1812 assert(src1 == src2);
1815 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1818 const uint16_t *src = (const uint16_t *) _src;
1819 uint16_t *dst = (uint16_t *) _dst;
1820 for (i=0; i<width; i++) {
1821 dst[i] = av_bswap16(src[i]);
1825 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
1826 const uint8_t *_src2, int width, uint32_t *unused)
1829 const uint16_t *src1 = (const uint16_t *) _src1,
1830 *src2 = (const uint16_t *) _src2;
1831 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1832 for (i=0; i<width; i++) {
1833 dstU[i] = av_bswap16(src1[i]);
1834 dstV[i] = av_bswap16(src2[i]);
1838 /* This is almost identical to the previous, end exists only because
1839 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1840 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1844 for (i=0; i<width; i++)
1848 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1849 const uint8_t *src2, int width, uint32_t *unused)
1852 for (i=0; i<width; i++) {
1853 dstU[i]= src1[4*i + 0];
1854 dstV[i]= src1[4*i + 2];
1856 assert(src1 == src2);
1859 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1860 const uint8_t *src, int width)
1863 for (i = 0; i < width; i++) {
1864 dst1[i] = src[2*i+0];
1865 dst2[i] = src[2*i+1];
1869 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1870 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1871 int width, uint32_t *unused)
1873 nvXXtoUV_c(dstU, dstV, src1, width);
1876 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1877 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1878 int width, uint32_t *unused)
1880 nvXXtoUV_c(dstV, dstU, src1, width);
1883 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1885 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
1886 int width, uint32_t *unused)
1889 for (i=0; i<width; i++) {
1894 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1898 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1899 const uint8_t *src2, int width, uint32_t *unused)
1902 for (i=0; i<width; i++) {
1903 int b= src1[3*i + 0];
1904 int g= src1[3*i + 1];
1905 int r= src1[3*i + 2];
1907 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1908 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1910 assert(src1 == src2);
1913 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1914 const uint8_t *src2, int width, uint32_t *unused)
1917 for (i=0; i<width; i++) {
1918 int b= src1[6*i + 0] + src1[6*i + 3];
1919 int g= src1[6*i + 1] + src1[6*i + 4];
1920 int r= src1[6*i + 2] + src1[6*i + 5];
1922 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1923 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1925 assert(src1 == src2);
1928 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1932 for (i=0; i<width; i++) {
1937 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1941 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1942 const uint8_t *src2, int width, uint32_t *unused)
1946 for (i=0; i<width; i++) {
1947 int r= src1[3*i + 0];
1948 int g= src1[3*i + 1];
1949 int b= src1[3*i + 2];
1951 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1952 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1956 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1957 const uint8_t *src2, int width, uint32_t *unused)
1961 for (i=0; i<width; i++) {
1962 int r= src1[6*i + 0] + src1[6*i + 3];
1963 int g= src1[6*i + 1] + src1[6*i + 4];
1964 int b= src1[6*i + 2] + src1[6*i + 5];
1966 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1967 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1971 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1972 const int16_t *filter,
1973 const int16_t *filterPos, int filterSize)
1976 int32_t *dst = (int32_t *) _dst;
1977 const uint16_t *src = (const uint16_t *) _src;
1978 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1981 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
1984 for (i = 0; i < dstW; i++) {
1986 int srcPos = filterPos[i];
1989 for (j = 0; j < filterSize; j++) {
1990 val += src[srcPos + j] * filter[filterSize * i + j];
1992 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1993 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1997 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1998 const int16_t *filter,
1999 const int16_t *filterPos, int filterSize)
2002 const uint16_t *src = (const uint16_t *) _src;
2003 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2006 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2008 for (i = 0; i < dstW; i++) {
2010 int srcPos = filterPos[i];
2013 for (j = 0; j < filterSize; j++) {
2014 val += src[srcPos + j] * filter[filterSize * i + j];
2016 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2017 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2021 // bilinear / bicubic scaling
2022 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2023 const int16_t *filter, const int16_t *filterPos,
2027 for (i=0; i<dstW; i++) {
2029 int srcPos= filterPos[i];
2031 for (j=0; j<filterSize; j++) {
2032 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2034 //filter += hFilterSize;
2035 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2040 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2041 const int16_t *filter, const int16_t *filterPos,
2045 int32_t *dst = (int32_t *) _dst;
2046 for (i=0; i<dstW; i++) {
2048 int srcPos= filterPos[i];
2050 for (j=0; j<filterSize; j++) {
2051 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2053 //filter += hFilterSize;
2054 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2059 //FIXME all pal and rgb srcFormats could do this convertion as well
2060 //FIXME all scalers more complex than bilinear could do half of this transform
2061 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2064 for (i = 0; i < width; i++) {
2065 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2066 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2069 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2072 for (i = 0; i < width; i++) {
2073 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2074 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2077 static void lumRangeToJpeg_c(int16_t *dst, int width)
2080 for (i = 0; i < width; i++)
2081 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2083 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2086 for (i = 0; i < width; i++)
2087 dst[i] = (dst[i]*14071 + 33561947)>>14;
2090 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2093 int32_t *dstU = (int32_t *) _dstU;
2094 int32_t *dstV = (int32_t *) _dstV;
2095 for (i = 0; i < width; i++) {
2096 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2097 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2100 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2103 int32_t *dstU = (int32_t *) _dstU;
2104 int32_t *dstV = (int32_t *) _dstV;
2105 for (i = 0; i < width; i++) {
2106 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2107 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2110 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2113 int32_t *dst = (int32_t *) _dst;
2114 for (i = 0; i < width; i++)
2115 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2117 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2120 int32_t *dst = (int32_t *) _dst;
2121 for (i = 0; i < width; i++)
2122 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2125 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2126 const uint8_t *src, int srcW, int xInc)
2129 unsigned int xpos=0;
2130 for (i=0;i<dstWidth;i++) {
2131 register unsigned int xx=xpos>>16;
2132 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2133 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2136 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2137 dst[i] = src[srcW-1]*128;
2140 // *** horizontal scale Y line to temp buffer
2141 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2142 const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
2144 const int16_t *hLumFilter,
2145 const int16_t *hLumFilterPos, int hLumFilterSize,
2146 uint8_t *formatConvBuffer,
2147 uint32_t *pal, int isAlpha)
2149 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2150 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2153 toYV12(formatConvBuffer, src, src2, src3, srcW, pal);
2154 src= formatConvBuffer;
2157 if (!c->hyscale_fast) {
2158 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2159 } else { // fast bilinear upscale / crap downscale
2160 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2164 convertRange(dst, dstWidth);
2167 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2168 int dstWidth, const uint8_t *src1,
2169 const uint8_t *src2, int srcW, int xInc)
2172 unsigned int xpos=0;
2173 for (i=0;i<dstWidth;i++) {
2174 register unsigned int xx=xpos>>16;
2175 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2176 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2177 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2180 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2181 dst1[i] = src1[srcW-1]*128;
2182 dst2[i] = src2[srcW-1]*128;
2186 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2187 const uint8_t *src0, const uint8_t *src1, const uint8_t *src2,
2188 int srcW, int xInc, const int16_t *hChrFilter,
2189 const int16_t *hChrFilterPos, int hChrFilterSize,
2190 uint8_t *formatConvBuffer, uint32_t *pal)
2193 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2194 c->chrToYV12(formatConvBuffer, buf2, src0, src1, src2, srcW, pal);
2195 src1= formatConvBuffer;
2199 if (!c->hcscale_fast) {
2200 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2201 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2202 } else { // fast bilinear upscale / crap downscale
2203 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2206 if (c->chrConvertRange)
2207 c->chrConvertRange(dst1, dst2, dstWidth);
2210 static av_always_inline void
2211 find_c_packed_planar_out_funcs(SwsContext *c,
2212 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2213 yuv2interleavedX_fn *yuv2nv12cX,
2214 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2215 yuv2packedX_fn *yuv2packedX)
2217 enum PixelFormat dstFormat = c->dstFormat;
2219 if (is16BPS(dstFormat)) {
2220 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2221 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2222 } else if (is9_OR_10BPS(dstFormat)) {
2223 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2224 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2225 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2227 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2228 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2231 *yuv2plane1 = yuv2plane1_8_c;
2232 *yuv2planeX = yuv2planeX_8_c;
2233 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2234 *yuv2nv12cX = yuv2nv12cX_c;
2237 if(c->flags & SWS_FULL_CHR_H_INT) {
2238 switch (dstFormat) {
2241 *yuv2packedX = yuv2rgba32_full_X_c;
2243 #if CONFIG_SWSCALE_ALPHA
2245 *yuv2packedX = yuv2rgba32_full_X_c;
2247 #endif /* CONFIG_SWSCALE_ALPHA */
2249 *yuv2packedX = yuv2rgbx32_full_X_c;
2251 #endif /* !CONFIG_SMALL */
2255 *yuv2packedX = yuv2argb32_full_X_c;
2257 #if CONFIG_SWSCALE_ALPHA
2259 *yuv2packedX = yuv2argb32_full_X_c;
2261 #endif /* CONFIG_SWSCALE_ALPHA */
2263 *yuv2packedX = yuv2xrgb32_full_X_c;
2265 #endif /* !CONFIG_SMALL */
2269 *yuv2packedX = yuv2bgra32_full_X_c;
2271 #if CONFIG_SWSCALE_ALPHA
2273 *yuv2packedX = yuv2bgra32_full_X_c;
2275 #endif /* CONFIG_SWSCALE_ALPHA */
2277 *yuv2packedX = yuv2bgrx32_full_X_c;
2279 #endif /* !CONFIG_SMALL */
2283 *yuv2packedX = yuv2abgr32_full_X_c;
2285 #if CONFIG_SWSCALE_ALPHA
2287 *yuv2packedX = yuv2abgr32_full_X_c;
2289 #endif /* CONFIG_SWSCALE_ALPHA */
2291 *yuv2packedX = yuv2xbgr32_full_X_c;
2293 #endif /* !CONFIG_SMALL */
2296 *yuv2packedX = yuv2rgb24_full_X_c;
2299 *yuv2packedX = yuv2bgr24_full_X_c;
2306 switch (dstFormat) {
2307 case PIX_FMT_GRAY16BE:
2308 *yuv2packed1 = yuv2gray16BE_1_c;
2309 *yuv2packed2 = yuv2gray16BE_2_c;
2310 *yuv2packedX = yuv2gray16BE_X_c;
2312 case PIX_FMT_GRAY16LE:
2313 *yuv2packed1 = yuv2gray16LE_1_c;
2314 *yuv2packed2 = yuv2gray16LE_2_c;
2315 *yuv2packedX = yuv2gray16LE_X_c;
2317 case PIX_FMT_MONOWHITE:
2318 *yuv2packed1 = yuv2monowhite_1_c;
2319 *yuv2packed2 = yuv2monowhite_2_c;
2320 *yuv2packedX = yuv2monowhite_X_c;
2322 case PIX_FMT_MONOBLACK:
2323 *yuv2packed1 = yuv2monoblack_1_c;
2324 *yuv2packed2 = yuv2monoblack_2_c;
2325 *yuv2packedX = yuv2monoblack_X_c;
2327 case PIX_FMT_YUYV422:
2328 *yuv2packed1 = yuv2yuyv422_1_c;
2329 *yuv2packed2 = yuv2yuyv422_2_c;
2330 *yuv2packedX = yuv2yuyv422_X_c;
2332 case PIX_FMT_UYVY422:
2333 *yuv2packed1 = yuv2uyvy422_1_c;
2334 *yuv2packed2 = yuv2uyvy422_2_c;
2335 *yuv2packedX = yuv2uyvy422_X_c;
2337 case PIX_FMT_RGB48LE:
2338 *yuv2packed1 = yuv2rgb48le_1_c;
2339 *yuv2packed2 = yuv2rgb48le_2_c;
2340 *yuv2packedX = yuv2rgb48le_X_c;
2342 case PIX_FMT_RGB48BE:
2343 *yuv2packed1 = yuv2rgb48be_1_c;
2344 *yuv2packed2 = yuv2rgb48be_2_c;
2345 *yuv2packedX = yuv2rgb48be_X_c;
2347 case PIX_FMT_BGR48LE:
2348 *yuv2packed1 = yuv2bgr48le_1_c;
2349 *yuv2packed2 = yuv2bgr48le_2_c;
2350 *yuv2packedX = yuv2bgr48le_X_c;
2352 case PIX_FMT_BGR48BE:
2353 *yuv2packed1 = yuv2bgr48be_1_c;
2354 *yuv2packed2 = yuv2bgr48be_2_c;
2355 *yuv2packedX = yuv2bgr48be_X_c;
2360 *yuv2packed1 = yuv2rgb32_1_c;
2361 *yuv2packed2 = yuv2rgb32_2_c;
2362 *yuv2packedX = yuv2rgb32_X_c;
2364 #if CONFIG_SWSCALE_ALPHA
2366 *yuv2packed1 = yuv2rgba32_1_c;
2367 *yuv2packed2 = yuv2rgba32_2_c;
2368 *yuv2packedX = yuv2rgba32_X_c;
2370 #endif /* CONFIG_SWSCALE_ALPHA */
2372 *yuv2packed1 = yuv2rgbx32_1_c;
2373 *yuv2packed2 = yuv2rgbx32_2_c;
2374 *yuv2packedX = yuv2rgbx32_X_c;
2376 #endif /* !CONFIG_SMALL */
2378 case PIX_FMT_RGB32_1:
2379 case PIX_FMT_BGR32_1:
2381 *yuv2packed1 = yuv2rgb32_1_1_c;
2382 *yuv2packed2 = yuv2rgb32_1_2_c;
2383 *yuv2packedX = yuv2rgb32_1_X_c;
2385 #if CONFIG_SWSCALE_ALPHA
2387 *yuv2packed1 = yuv2rgba32_1_1_c;
2388 *yuv2packed2 = yuv2rgba32_1_2_c;
2389 *yuv2packedX = yuv2rgba32_1_X_c;
2391 #endif /* CONFIG_SWSCALE_ALPHA */
2393 *yuv2packed1 = yuv2rgbx32_1_1_c;
2394 *yuv2packed2 = yuv2rgbx32_1_2_c;
2395 *yuv2packedX = yuv2rgbx32_1_X_c;
2397 #endif /* !CONFIG_SMALL */
2400 *yuv2packed1 = yuv2rgb24_1_c;
2401 *yuv2packed2 = yuv2rgb24_2_c;
2402 *yuv2packedX = yuv2rgb24_X_c;
2405 *yuv2packed1 = yuv2bgr24_1_c;
2406 *yuv2packed2 = yuv2bgr24_2_c;
2407 *yuv2packedX = yuv2bgr24_X_c;
2409 case PIX_FMT_RGB565LE:
2410 case PIX_FMT_RGB565BE:
2411 case PIX_FMT_BGR565LE:
2412 case PIX_FMT_BGR565BE:
2413 *yuv2packed1 = yuv2rgb16_1_c;
2414 *yuv2packed2 = yuv2rgb16_2_c;
2415 *yuv2packedX = yuv2rgb16_X_c;
2417 case PIX_FMT_RGB555LE:
2418 case PIX_FMT_RGB555BE:
2419 case PIX_FMT_BGR555LE:
2420 case PIX_FMT_BGR555BE:
2421 *yuv2packed1 = yuv2rgb15_1_c;
2422 *yuv2packed2 = yuv2rgb15_2_c;
2423 *yuv2packedX = yuv2rgb15_X_c;
2425 case PIX_FMT_RGB444LE:
2426 case PIX_FMT_RGB444BE:
2427 case PIX_FMT_BGR444LE:
2428 case PIX_FMT_BGR444BE:
2429 *yuv2packed1 = yuv2rgb12_1_c;
2430 *yuv2packed2 = yuv2rgb12_2_c;
2431 *yuv2packedX = yuv2rgb12_X_c;
2435 *yuv2packed1 = yuv2rgb8_1_c;
2436 *yuv2packed2 = yuv2rgb8_2_c;
2437 *yuv2packedX = yuv2rgb8_X_c;
2441 *yuv2packed1 = yuv2rgb4_1_c;
2442 *yuv2packed2 = yuv2rgb4_2_c;
2443 *yuv2packedX = yuv2rgb4_X_c;
2445 case PIX_FMT_RGB4_BYTE:
2446 case PIX_FMT_BGR4_BYTE:
2447 *yuv2packed1 = yuv2rgb4b_1_c;
2448 *yuv2packed2 = yuv2rgb4b_2_c;
2449 *yuv2packedX = yuv2rgb4b_X_c;
2455 #define DEBUG_SWSCALE_BUFFERS 0
2456 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2458 static int swScale(SwsContext *c, const uint8_t* src[],
2459 int srcStride[], int srcSliceY,
2460 int srcSliceH, uint8_t* dst[], int dstStride[])
2462 /* load a few things into local vars to make the code more readable? and faster */
2463 const int srcW= c->srcW;
2464 const int dstW= c->dstW;
2465 const int dstH= c->dstH;
2466 const int chrDstW= c->chrDstW;
2467 const int chrSrcW= c->chrSrcW;
2468 const int lumXInc= c->lumXInc;
2469 const int chrXInc= c->chrXInc;
2470 const enum PixelFormat dstFormat= c->dstFormat;
2471 const int flags= c->flags;
2472 int16_t *vLumFilterPos= c->vLumFilterPos;
2473 int16_t *vChrFilterPos= c->vChrFilterPos;
2474 int16_t *hLumFilterPos= c->hLumFilterPos;
2475 int16_t *hChrFilterPos= c->hChrFilterPos;
2476 int16_t *hLumFilter= c->hLumFilter;
2477 int16_t *hChrFilter= c->hChrFilter;
2478 int32_t *lumMmxFilter= c->lumMmxFilter;
2479 int32_t *chrMmxFilter= c->chrMmxFilter;
2480 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2481 const int vLumFilterSize= c->vLumFilterSize;
2482 const int vChrFilterSize= c->vChrFilterSize;
2483 const int hLumFilterSize= c->hLumFilterSize;
2484 const int hChrFilterSize= c->hChrFilterSize;
2485 int16_t **lumPixBuf= c->lumPixBuf;
2486 int16_t **chrUPixBuf= c->chrUPixBuf;
2487 int16_t **chrVPixBuf= c->chrVPixBuf;
2488 int16_t **alpPixBuf= c->alpPixBuf;
2489 const int vLumBufSize= c->vLumBufSize;
2490 const int vChrBufSize= c->vChrBufSize;
2491 uint8_t *formatConvBuffer= c->formatConvBuffer;
2492 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2493 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2495 uint32_t *pal=c->pal_yuv;
2496 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2498 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2499 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2500 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2501 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2502 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2503 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2505 /* vars which will change and which we need to store back in the context */
2507 int lumBufIndex= c->lumBufIndex;
2508 int chrBufIndex= c->chrBufIndex;
2509 int lastInLumBuf= c->lastInLumBuf;
2510 int lastInChrBuf= c->lastInChrBuf;
2512 if (isPacked(c->srcFormat)) {
2520 srcStride[3]= srcStride[0];
2522 srcStride[1]<<= c->vChrDrop;
2523 srcStride[2]<<= c->vChrDrop;
2525 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2526 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2527 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2528 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2529 srcSliceY, srcSliceH, dstY, dstH);
2530 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2531 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2533 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2534 static int warnedAlready=0; //FIXME move this into the context perhaps
2535 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2536 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2537 " ->cannot do aligned memory accesses anymore\n");
2542 /* Note the user might start scaling the picture in the middle so this
2543 will not get executed. This is not really intended but works
2544 currently, so people might do it. */
2545 if (srcSliceY ==0) {
2553 if (!should_dither) {
2554 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2558 for (;dstY < dstH; dstY++) {
2559 const int chrDstY= dstY>>c->chrDstVSubSample;
2560 uint8_t *dest[4] = {
2561 dst[0] + dstStride[0] * dstY,
2562 dst[1] + dstStride[1] * chrDstY,
2563 dst[2] + dstStride[2] * chrDstY,
2564 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2566 int use_mmx_vfilter= c->use_mmx_vfilter;
2568 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2569 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2570 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2571 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2572 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2573 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2576 //handle holes (FAST_BILINEAR & weird filters)
2577 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2578 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2579 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2580 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2582 DEBUG_BUFFERS("dstY: %d\n", dstY);
2583 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2584 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2585 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2586 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2588 // Do we have enough lines in this slice to output the dstY line
2589 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2591 if (!enough_lines) {
2592 lastLumSrcY = srcSliceY + srcSliceH - 1;
2593 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2594 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2595 lastLumSrcY, lastChrSrcY);
2598 //Do horizontal scaling
2599 while(lastInLumBuf < lastLumSrcY) {
2600 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2601 const uint8_t *src2= src[1]+(lastInLumBuf + 1 - srcSliceY)*srcStride[1];
2602 const uint8_t *src3= src[2]+(lastInLumBuf + 1 - srcSliceY)*srcStride[2];
2603 const uint8_t *src4= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2605 assert(lumBufIndex < 2*vLumBufSize);
2606 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2607 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2608 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, src2, src3, srcW, lumXInc,
2609 hLumFilter, hLumFilterPos, hLumFilterSize,
2612 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2613 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src4, NULL, NULL, srcW,
2614 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2618 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2619 lumBufIndex, lastInLumBuf);
2621 while(lastInChrBuf < lastChrSrcY) {
2622 const uint8_t *src0= src[0]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[0];
2623 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2624 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2626 assert(chrBufIndex < 2*vChrBufSize);
2627 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2628 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2629 //FIXME replace parameters through context struct (some at least)
2631 if (c->needs_hcscale)
2632 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2633 chrDstW, src0, src1, src2, chrSrcW, chrXInc,
2634 hChrFilter, hChrFilterPos, hChrFilterSize,
2635 formatConvBuffer, pal);
2637 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2638 chrBufIndex, lastInChrBuf);
2640 //wrap buf index around to stay inside the ring buffer
2641 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2642 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2644 break; //we can't output a dstY line so let's try with the next slice
2647 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2649 if (should_dither) {
2650 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2651 c->lumDither8 = dither_8x8_128[dstY & 7];
2653 if (dstY >= dstH-2) {
2654 // hmm looks like we can't use MMX here without overwriting this array's tail
2655 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2656 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2661 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2662 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2663 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2664 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2665 int16_t *vLumFilter= c->vLumFilter;
2666 int16_t *vChrFilter= c->vChrFilter;
2668 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2669 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2671 vLumFilter += dstY * vLumFilterSize;
2672 vChrFilter += chrDstY * vChrFilterSize;
2674 av_assert0(use_mmx_vfilter != (
2675 yuv2planeX == yuv2planeX_10BE_c
2676 || yuv2planeX == yuv2planeX_10LE_c
2677 || yuv2planeX == yuv2planeX_9BE_c
2678 || yuv2planeX == yuv2planeX_9LE_c
2679 || yuv2planeX == yuv2planeX_16BE_c
2680 || yuv2planeX == yuv2planeX_16LE_c
2681 || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
2683 if(use_mmx_vfilter){
2684 vLumFilter= c->lumMmxFilter;
2685 vChrFilter= c->chrMmxFilter;
2688 if (vLumFilterSize == 1) {
2689 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2691 yuv2planeX(vLumFilter, vLumFilterSize,
2692 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2695 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2697 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2698 } else if (vChrFilterSize == 1) {
2699 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2700 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2702 yuv2planeX(vChrFilter, vChrFilterSize,
2703 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2704 yuv2planeX(vChrFilter, vChrFilterSize,
2705 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
2709 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2710 if(use_mmx_vfilter){
2711 vLumFilter= c->alpMmxFilter;
2713 if (vLumFilterSize == 1) {
2714 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2716 yuv2planeX(vLumFilter, vLumFilterSize,
2717 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2721 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2722 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2723 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2724 int chrAlpha = vChrFilter[2 * dstY + 1];
2725 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2726 alpPixBuf ? *alpSrcPtr : NULL,
2727 dest[0], dstW, chrAlpha, dstY);
2728 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2729 int lumAlpha = vLumFilter[2 * dstY + 1];
2730 int chrAlpha = vChrFilter[2 * dstY + 1];
2732 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2734 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2735 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2736 alpPixBuf ? alpSrcPtr : NULL,
2737 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2738 } else { //general RGB
2739 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2740 lumSrcPtr, vLumFilterSize,
2741 vChrFilter + dstY * vChrFilterSize,
2742 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2743 alpSrcPtr, dest[0], dstW, dstY);
2749 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2750 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2753 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2754 __asm__ volatile("sfence":::"memory");
2758 /* store changed local vars back in the context */
2760 c->lumBufIndex= lumBufIndex;
2761 c->chrBufIndex= chrBufIndex;
2762 c->lastInLumBuf= lastInLumBuf;
2763 c->lastInChrBuf= lastInChrBuf;
2765 return dstY - lastDstY;
2768 static av_cold void sws_init_swScale_c(SwsContext *c)
2770 enum PixelFormat srcFormat = c->srcFormat;
2772 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2773 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2776 c->chrToYV12 = NULL;
2778 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2779 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2780 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2781 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2785 case PIX_FMT_BGR4_BYTE:
2786 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2788 case PIX_FMT_YUV444P9LE:
2789 case PIX_FMT_YUV422P9LE:
2790 case PIX_FMT_YUV420P9LE:
2791 case PIX_FMT_YUV422P10LE:
2792 case PIX_FMT_YUV420P10LE:
2793 case PIX_FMT_YUV444P10LE:
2794 case PIX_FMT_YUV420P16LE:
2795 case PIX_FMT_YUV422P16LE:
2796 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2798 case PIX_FMT_YUV444P9BE:
2799 case PIX_FMT_YUV422P9BE:
2800 case PIX_FMT_YUV420P9BE:
2801 case PIX_FMT_YUV444P10BE:
2802 case PIX_FMT_YUV422P10BE:
2803 case PIX_FMT_YUV420P10BE:
2804 case PIX_FMT_YUV420P16BE:
2805 case PIX_FMT_YUV422P16BE:
2806 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2809 if (c->chrSrcHSubSample) {
2811 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2812 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2813 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2814 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2815 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2816 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2817 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2818 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2819 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2820 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2821 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2822 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2823 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2824 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2825 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2826 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2827 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2828 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2829 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
2833 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2834 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2835 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2836 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2837 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2838 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2839 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2840 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2841 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2842 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2843 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2844 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2845 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2846 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2847 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2848 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2849 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2850 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2851 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_c; break;
2855 c->lumToYV12 = NULL;
2856 c->alpToYV12 = NULL;
2857 switch (srcFormat) {
2859 case PIX_FMT_YUV444P9LE:
2860 case PIX_FMT_YUV422P9LE:
2861 case PIX_FMT_YUV420P9LE:
2862 case PIX_FMT_YUV422P10LE:
2863 case PIX_FMT_YUV420P10LE:
2864 case PIX_FMT_YUV444P10LE:
2865 case PIX_FMT_YUV420P16LE:
2866 case PIX_FMT_YUV422P16LE:
2867 case PIX_FMT_YUV444P16LE:
2868 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2870 case PIX_FMT_YUV444P9BE:
2871 case PIX_FMT_YUV422P9BE:
2872 case PIX_FMT_YUV420P9BE:
2873 case PIX_FMT_YUV444P10BE:
2874 case PIX_FMT_YUV422P10BE:
2875 case PIX_FMT_YUV420P10BE:
2876 case PIX_FMT_YUV420P16BE:
2877 case PIX_FMT_YUV422P16BE:
2878 case PIX_FMT_YUV444P16BE:
2879 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2881 case PIX_FMT_YUYV422 :
2882 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2883 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2884 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2885 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2886 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2887 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2888 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2889 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2890 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2891 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2892 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2893 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2897 case PIX_FMT_BGR4_BYTE:
2898 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2899 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2900 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2901 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2902 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2903 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2904 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2905 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2906 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2907 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2908 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2909 case PIX_FMT_GBR24P : c->lumToYV12 = gbr24pToY_c ; break;
2912 switch (srcFormat) {
2914 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2916 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2917 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2918 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2923 if (c->srcBpc == 8) {
2924 if (c->dstBpc <= 10) {
2925 c->hyScale = c->hcScale = hScale8To15_c;
2926 if (c->flags & SWS_FAST_BILINEAR) {
2927 c->hyscale_fast = hyscale_fast_c;
2928 c->hcscale_fast = hcscale_fast_c;
2931 c->hyScale = c->hcScale = hScale8To19_c;
2934 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2937 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2938 if (c->dstBpc <= 10) {
2940 c->lumConvertRange = lumRangeFromJpeg_c;
2941 c->chrConvertRange = chrRangeFromJpeg_c;
2943 c->lumConvertRange = lumRangeToJpeg_c;
2944 c->chrConvertRange = chrRangeToJpeg_c;
2948 c->lumConvertRange = lumRangeFromJpeg16_c;
2949 c->chrConvertRange = chrRangeFromJpeg16_c;
2951 c->lumConvertRange = lumRangeToJpeg16_c;
2952 c->chrConvertRange = chrRangeToJpeg16_c;
2957 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2958 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2959 c->needs_hcscale = 1;
2962 SwsFunc ff_getSwsFunc(SwsContext *c)
2964 sws_init_swScale_c(c);
2967 ff_sws_init_swScale_mmx(c);
2969 ff_sws_init_swScale_altivec(c);