2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 #define output_pixel(pos, val, bias, signedness) \
295 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
297 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
300 static av_always_inline void
301 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
302 int big_endian, int output_bits)
306 av_assert0(output_bits == 16);
308 for (i = 0; i < dstW; i++) {
309 int val = src[i] + (1 << (shift - 1));
310 output_pixel(&dest[i], val, 0, uint);
314 static av_always_inline void
315 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
316 const int32_t **src, uint16_t *dest, int dstW,
317 int big_endian, int output_bits)
321 av_assert0(output_bits == 16);
323 for (i = 0; i < dstW; i++) {
324 int val = 1 << (shift - 1);
327 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
328 * filters (or anything with negative coeffs, the range can be slightly
329 * wider in both directions. To account for this overflow, we subtract
330 * a constant so it always fits in the signed range (assuming a
331 * reasonable filterSize), and re-add that at the end. */
333 for (j = 0; j < filterSize; j++)
334 val += src[j][i] * filter[j];
336 output_pixel(&dest[i], val, 0x8000, int);
342 #define output_pixel(pos, val) \
344 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
346 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
349 static av_always_inline void
350 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
351 int big_endian, int output_bits)
354 int shift = 15 - output_bits;
356 for (i = 0; i < dstW; i++) {
357 int val = src[i] + (1 << (shift - 1));
358 output_pixel(&dest[i], val);
362 static av_always_inline void
363 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
364 const int16_t **src, uint16_t *dest, int dstW,
365 int big_endian, int output_bits)
368 int shift = 11 + 16 - output_bits;
370 for (i = 0; i < dstW; i++) {
371 int val = 1 << (shift - 1);
374 for (j = 0; j < filterSize; j++)
375 val += src[j][i] * filter[j];
377 output_pixel(&dest[i], val);
383 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
384 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
385 uint8_t *dest, int dstW, \
386 const uint8_t *dither, int offset)\
388 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
389 (uint16_t *) dest, dstW, is_be, bits); \
391 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
392 const int16_t **src, uint8_t *dest, int dstW, \
393 const uint8_t *dither, int offset)\
395 yuv2planeX_## template_size ## _c_template(filter, \
396 filterSize, (const typeX_t **) src, \
397 (uint16_t *) dest, dstW, is_be, bits); \
399 yuv2NBPS( 9, BE, 1, 10, int16_t)
400 yuv2NBPS( 9, LE, 0, 10, int16_t)
401 yuv2NBPS(10, BE, 1, 10, int16_t)
402 yuv2NBPS(10, LE, 0, 10, int16_t)
403 yuv2NBPS(16, BE, 1, 16, int32_t)
404 yuv2NBPS(16, LE, 0, 16, int32_t)
406 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
407 const int16_t **src, uint8_t *dest, int dstW,
408 const uint8_t *dither, int offset)
411 for (i=0; i<dstW; i++) {
412 int val = dither[(i + offset) & 7] << 12;
414 for (j=0; j<filterSize; j++)
415 val += src[j][i] * filter[j];
417 dest[i]= av_clip_uint8(val>>19);
421 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
422 const uint8_t *dither, int offset)
425 for (i=0; i<dstW; i++) {
426 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
427 dest[i]= av_clip_uint8(val);
431 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
432 const int16_t **chrUSrc, const int16_t **chrVSrc,
433 uint8_t *dest, int chrDstW)
435 enum PixelFormat dstFormat = c->dstFormat;
436 const uint8_t *chrDither = c->chrDither8;
439 if (dstFormat == PIX_FMT_NV12)
440 for (i=0; i<chrDstW; i++) {
441 int u = chrDither[i & 7] << 12;
442 int v = chrDither[(i + 3) & 7] << 12;
444 for (j=0; j<chrFilterSize; j++) {
445 u += chrUSrc[j][i] * chrFilter[j];
446 v += chrVSrc[j][i] * chrFilter[j];
449 dest[2*i]= av_clip_uint8(u>>19);
450 dest[2*i+1]= av_clip_uint8(v>>19);
453 for (i=0; i<chrDstW; i++) {
454 int u = chrDither[i & 7] << 12;
455 int v = chrDither[(i + 3) & 7] << 12;
457 for (j=0; j<chrFilterSize; j++) {
458 u += chrUSrc[j][i] * chrFilter[j];
459 v += chrVSrc[j][i] * chrFilter[j];
462 dest[2*i]= av_clip_uint8(v>>19);
463 dest[2*i+1]= av_clip_uint8(u>>19);
467 #define output_pixel(pos, val) \
468 if (target == PIX_FMT_GRAY16BE) { \
474 static av_always_inline void
475 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
476 const int32_t **lumSrc, int lumFilterSize,
477 const int16_t *chrFilter, const int32_t **chrUSrc,
478 const int32_t **chrVSrc, int chrFilterSize,
479 const int32_t **alpSrc, uint16_t *dest, int dstW,
480 int y, enum PixelFormat target)
484 for (i = 0; i < (dstW >> 1); i++) {
486 int Y1 = (1 << 14) - 0x40000000;
487 int Y2 = (1 << 14) - 0x40000000;
489 for (j = 0; j < lumFilterSize; j++) {
490 Y1 += lumSrc[j][i * 2] * lumFilter[j];
491 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
495 Y1 = av_clip_int16(Y1);
496 Y2 = av_clip_int16(Y2);
497 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
498 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
502 static av_always_inline void
503 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
504 const int32_t *ubuf[2], const int32_t *vbuf[2],
505 const int32_t *abuf[2], uint16_t *dest, int dstW,
506 int yalpha, int uvalpha, int y,
507 enum PixelFormat target)
509 int yalpha1 = 4095 - yalpha;
511 const int32_t *buf0 = buf[0], *buf1 = buf[1];
513 for (i = 0; i < (dstW >> 1); i++) {
514 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
515 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
517 output_pixel(&dest[i * 2 + 0], Y1);
518 output_pixel(&dest[i * 2 + 1], Y2);
522 static av_always_inline void
523 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
524 const int32_t *ubuf[2], const int32_t *vbuf[2],
525 const int32_t *abuf0, uint16_t *dest, int dstW,
526 int uvalpha, int y, enum PixelFormat target)
530 for (i = 0; i < (dstW >> 1); i++) {
531 int Y1 = (buf0[i * 2 ]+4)>>3;
532 int Y2 = (buf0[i * 2 + 1]+4)>>3;
534 output_pixel(&dest[i * 2 + 0], Y1);
535 output_pixel(&dest[i * 2 + 1], Y2);
541 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
542 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
543 const int16_t **_lumSrc, int lumFilterSize, \
544 const int16_t *chrFilter, const int16_t **_chrUSrc, \
545 const int16_t **_chrVSrc, int chrFilterSize, \
546 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
549 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
550 **chrUSrc = (const int32_t **) _chrUSrc, \
551 **chrVSrc = (const int32_t **) _chrVSrc, \
552 **alpSrc = (const int32_t **) _alpSrc; \
553 uint16_t *dest = (uint16_t *) _dest; \
554 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
555 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
556 alpSrc, dest, dstW, y, fmt); \
559 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
560 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
561 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
562 int yalpha, int uvalpha, int y) \
564 const int32_t **buf = (const int32_t **) _buf, \
565 **ubuf = (const int32_t **) _ubuf, \
566 **vbuf = (const int32_t **) _vbuf, \
567 **abuf = (const int32_t **) _abuf; \
568 uint16_t *dest = (uint16_t *) _dest; \
569 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
570 dest, dstW, yalpha, uvalpha, y, fmt); \
573 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
574 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
575 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
576 int uvalpha, int y) \
578 const int32_t *buf0 = (const int32_t *) _buf0, \
579 **ubuf = (const int32_t **) _ubuf, \
580 **vbuf = (const int32_t **) _vbuf, \
581 *abuf0 = (const int32_t *) _abuf0; \
582 uint16_t *dest = (uint16_t *) _dest; \
583 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
584 dstW, uvalpha, y, fmt); \
587 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
588 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
590 #define output_pixel(pos, acc) \
591 if (target == PIX_FMT_MONOBLACK) { \
597 static av_always_inline void
598 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
599 const int16_t **lumSrc, int lumFilterSize,
600 const int16_t *chrFilter, const int16_t **chrUSrc,
601 const int16_t **chrVSrc, int chrFilterSize,
602 const int16_t **alpSrc, uint8_t *dest, int dstW,
603 int y, enum PixelFormat target)
605 const uint8_t * const d128=dither_8x8_220[y&7];
606 uint8_t *g = c->table_gU[128] + c->table_gV[128];
610 for (i = 0; i < dstW - 1; i += 2) {
615 for (j = 0; j < lumFilterSize; j++) {
616 Y1 += lumSrc[j][i] * lumFilter[j];
617 Y2 += lumSrc[j][i+1] * lumFilter[j];
621 if ((Y1 | Y2) & 0x100) {
622 Y1 = av_clip_uint8(Y1);
623 Y2 = av_clip_uint8(Y2);
625 acc += acc + g[Y1 + d128[(i + 0) & 7]];
626 acc += acc + g[Y2 + d128[(i + 1) & 7]];
628 output_pixel(*dest++, acc);
633 static av_always_inline void
634 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
635 const int16_t *ubuf[2], const int16_t *vbuf[2],
636 const int16_t *abuf[2], uint8_t *dest, int dstW,
637 int yalpha, int uvalpha, int y,
638 enum PixelFormat target)
640 const int16_t *buf0 = buf[0], *buf1 = buf[1];
641 const uint8_t * const d128 = dither_8x8_220[y & 7];
642 uint8_t *g = c->table_gU[128] + c->table_gV[128];
643 int yalpha1 = 4095 - yalpha;
646 for (i = 0; i < dstW - 7; i += 8) {
647 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
648 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
649 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
650 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
651 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
652 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
653 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
654 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
655 output_pixel(*dest++, acc);
659 static av_always_inline void
660 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
661 const int16_t *ubuf[2], const int16_t *vbuf[2],
662 const int16_t *abuf0, uint8_t *dest, int dstW,
663 int uvalpha, int y, enum PixelFormat target)
665 const uint8_t * const d128 = dither_8x8_220[y & 7];
666 uint8_t *g = c->table_gU[128] + c->table_gV[128];
669 for (i = 0; i < dstW - 7; i += 8) {
670 int acc = g[(buf0[i ] >> 7) + d128[0]];
671 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
672 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
673 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
674 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
675 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
676 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
677 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
678 output_pixel(*dest++, acc);
684 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
685 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
686 const int16_t **lumSrc, int lumFilterSize, \
687 const int16_t *chrFilter, const int16_t **chrUSrc, \
688 const int16_t **chrVSrc, int chrFilterSize, \
689 const int16_t **alpSrc, uint8_t *dest, int dstW, \
692 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
693 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
694 alpSrc, dest, dstW, y, fmt); \
697 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
698 const int16_t *ubuf[2], const int16_t *vbuf[2], \
699 const int16_t *abuf[2], uint8_t *dest, int dstW, \
700 int yalpha, int uvalpha, int y) \
702 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
703 dest, dstW, yalpha, uvalpha, y, fmt); \
706 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
707 const int16_t *ubuf[2], const int16_t *vbuf[2], \
708 const int16_t *abuf0, uint8_t *dest, int dstW, \
709 int uvalpha, int y) \
711 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
712 abuf0, dest, dstW, uvalpha, \
716 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
717 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
719 #define output_pixels(pos, Y1, U, Y2, V) \
720 if (target == PIX_FMT_YUYV422) { \
721 dest[pos + 0] = Y1; \
723 dest[pos + 2] = Y2; \
727 dest[pos + 1] = Y1; \
729 dest[pos + 3] = Y2; \
732 static av_always_inline void
733 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
734 const int16_t **lumSrc, int lumFilterSize,
735 const int16_t *chrFilter, const int16_t **chrUSrc,
736 const int16_t **chrVSrc, int chrFilterSize,
737 const int16_t **alpSrc, uint8_t *dest, int dstW,
738 int y, enum PixelFormat target)
742 for (i = 0; i < (dstW >> 1); i++) {
749 for (j = 0; j < lumFilterSize; j++) {
750 Y1 += lumSrc[j][i * 2] * lumFilter[j];
751 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
753 for (j = 0; j < chrFilterSize; j++) {
754 U += chrUSrc[j][i] * chrFilter[j];
755 V += chrVSrc[j][i] * chrFilter[j];
761 if ((Y1 | Y2 | U | V) & 0x100) {
762 Y1 = av_clip_uint8(Y1);
763 Y2 = av_clip_uint8(Y2);
764 U = av_clip_uint8(U);
765 V = av_clip_uint8(V);
767 output_pixels(4*i, Y1, U, Y2, V);
771 static av_always_inline void
772 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
773 const int16_t *ubuf[2], const int16_t *vbuf[2],
774 const int16_t *abuf[2], uint8_t *dest, int dstW,
775 int yalpha, int uvalpha, int y,
776 enum PixelFormat target)
778 const int16_t *buf0 = buf[0], *buf1 = buf[1],
779 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
780 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
781 int yalpha1 = 4095 - yalpha;
782 int uvalpha1 = 4095 - uvalpha;
785 for (i = 0; i < (dstW >> 1); i++) {
786 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
787 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
788 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
789 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
791 output_pixels(i * 4, Y1, U, Y2, V);
795 static av_always_inline void
796 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
797 const int16_t *ubuf[2], const int16_t *vbuf[2],
798 const int16_t *abuf0, uint8_t *dest, int dstW,
799 int uvalpha, int y, enum PixelFormat target)
801 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
802 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
805 if (uvalpha < 2048) {
806 for (i = 0; i < (dstW >> 1); i++) {
807 int Y1 = buf0[i * 2] >> 7;
808 int Y2 = buf0[i * 2 + 1] >> 7;
809 int U = ubuf1[i] >> 7;
810 int V = vbuf1[i] >> 7;
812 output_pixels(i * 4, Y1, U, Y2, V);
815 for (i = 0; i < (dstW >> 1); i++) {
816 int Y1 = buf0[i * 2] >> 7;
817 int Y2 = buf0[i * 2 + 1] >> 7;
818 int U = (ubuf0[i] + ubuf1[i]) >> 8;
819 int V = (vbuf0[i] + vbuf1[i]) >> 8;
821 output_pixels(i * 4, Y1, U, Y2, V);
828 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
829 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
831 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
832 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
833 #define output_pixel(pos, val) \
834 if (isBE(target)) { \
840 static av_always_inline void
841 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
842 const int32_t **lumSrc, int lumFilterSize,
843 const int16_t *chrFilter, const int32_t **chrUSrc,
844 const int32_t **chrVSrc, int chrFilterSize,
845 const int32_t **alpSrc, uint16_t *dest, int dstW,
846 int y, enum PixelFormat target)
850 for (i = 0; i < (dstW >> 1); i++) {
852 int Y1 = -0x40000000;
853 int Y2 = -0x40000000;
854 int U = -128 << 23; // 19
858 for (j = 0; j < lumFilterSize; j++) {
859 Y1 += lumSrc[j][i * 2] * lumFilter[j];
860 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
862 for (j = 0; j < chrFilterSize; j++) {
863 U += chrUSrc[j][i] * chrFilter[j];
864 V += chrVSrc[j][i] * chrFilter[j];
867 // 8bit: 12+15=27; 16-bit: 12+19=31
875 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
876 Y1 -= c->yuv2rgb_y_offset;
877 Y2 -= c->yuv2rgb_y_offset;
878 Y1 *= c->yuv2rgb_y_coeff;
879 Y2 *= c->yuv2rgb_y_coeff;
882 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
884 R = V * c->yuv2rgb_v2r_coeff;
885 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
886 B = U * c->yuv2rgb_u2b_coeff;
888 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
889 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
890 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
891 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
892 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
893 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
894 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
899 static av_always_inline void
900 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
901 const int32_t *ubuf[2], const int32_t *vbuf[2],
902 const int32_t *abuf[2], uint16_t *dest, int dstW,
903 int yalpha, int uvalpha, int y,
904 enum PixelFormat target)
906 const int32_t *buf0 = buf[0], *buf1 = buf[1],
907 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
908 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
909 int yalpha1 = 4095 - yalpha;
910 int uvalpha1 = 4095 - uvalpha;
913 for (i = 0; i < (dstW >> 1); i++) {
914 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
915 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
916 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
917 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
920 Y1 -= c->yuv2rgb_y_offset;
921 Y2 -= c->yuv2rgb_y_offset;
922 Y1 *= c->yuv2rgb_y_coeff;
923 Y2 *= c->yuv2rgb_y_coeff;
927 R = V * c->yuv2rgb_v2r_coeff;
928 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
929 B = U * c->yuv2rgb_u2b_coeff;
931 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
932 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
933 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
934 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
935 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
936 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
941 static av_always_inline void
942 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
943 const int32_t *ubuf[2], const int32_t *vbuf[2],
944 const int32_t *abuf0, uint16_t *dest, int dstW,
945 int uvalpha, int y, enum PixelFormat target)
947 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
948 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
951 if (uvalpha < 2048) {
952 for (i = 0; i < (dstW >> 1); i++) {
953 int Y1 = (buf0[i * 2] ) >> 2;
954 int Y2 = (buf0[i * 2 + 1]) >> 2;
955 int U = (ubuf0[i] + (-128 << 11)) >> 2;
956 int V = (vbuf0[i] + (-128 << 11)) >> 2;
959 Y1 -= c->yuv2rgb_y_offset;
960 Y2 -= c->yuv2rgb_y_offset;
961 Y1 *= c->yuv2rgb_y_coeff;
962 Y2 *= c->yuv2rgb_y_coeff;
966 R = V * c->yuv2rgb_v2r_coeff;
967 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
968 B = U * c->yuv2rgb_u2b_coeff;
970 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
971 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
972 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
973 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
974 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
975 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
979 for (i = 0; i < (dstW >> 1); i++) {
980 int Y1 = (buf0[i * 2] ) >> 2;
981 int Y2 = (buf0[i * 2 + 1]) >> 2;
982 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
983 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
986 Y1 -= c->yuv2rgb_y_offset;
987 Y2 -= c->yuv2rgb_y_offset;
988 Y1 *= c->yuv2rgb_y_coeff;
989 Y2 *= c->yuv2rgb_y_coeff;
993 R = V * c->yuv2rgb_v2r_coeff;
994 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
995 B = U * c->yuv2rgb_u2b_coeff;
997 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
998 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
999 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1000 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1001 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1002 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1012 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
1013 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
1014 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
1015 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
1017 static av_always_inline void
1018 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
1019 unsigned U, unsigned V, unsigned A1, unsigned A2,
1020 const void *_r, const void *_g, const void *_b, int y,
1021 enum PixelFormat target, int hasAlpha)
1023 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1024 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1025 uint32_t *dest = (uint32_t *) _dest;
1026 const uint32_t *r = (const uint32_t *) _r;
1027 const uint32_t *g = (const uint32_t *) _g;
1028 const uint32_t *b = (const uint32_t *) _b;
1031 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1033 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1034 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1037 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1039 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1040 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1042 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1043 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1046 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1047 uint8_t *dest = (uint8_t *) _dest;
1048 const uint8_t *r = (const uint8_t *) _r;
1049 const uint8_t *g = (const uint8_t *) _g;
1050 const uint8_t *b = (const uint8_t *) _b;
1052 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1053 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1055 dest[i * 6 + 0] = r_b[Y1];
1056 dest[i * 6 + 1] = g[Y1];
1057 dest[i * 6 + 2] = b_r[Y1];
1058 dest[i * 6 + 3] = r_b[Y2];
1059 dest[i * 6 + 4] = g[Y2];
1060 dest[i * 6 + 5] = b_r[Y2];
1063 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1064 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1065 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1066 uint16_t *dest = (uint16_t *) _dest;
1067 const uint16_t *r = (const uint16_t *) _r;
1068 const uint16_t *g = (const uint16_t *) _g;
1069 const uint16_t *b = (const uint16_t *) _b;
1070 int dr1, dg1, db1, dr2, dg2, db2;
1072 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1073 dr1 = dither_2x2_8[ y & 1 ][0];
1074 dg1 = dither_2x2_4[ y & 1 ][0];
1075 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1076 dr2 = dither_2x2_8[ y & 1 ][1];
1077 dg2 = dither_2x2_4[ y & 1 ][1];
1078 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1079 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1080 dr1 = dither_2x2_8[ y & 1 ][0];
1081 dg1 = dither_2x2_8[ y & 1 ][1];
1082 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1083 dr2 = dither_2x2_8[ y & 1 ][1];
1084 dg2 = dither_2x2_8[ y & 1 ][0];
1085 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1087 dr1 = dither_4x4_16[ y & 3 ][0];
1088 dg1 = dither_4x4_16[ y & 3 ][1];
1089 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1090 dr2 = dither_4x4_16[ y & 3 ][1];
1091 dg2 = dither_4x4_16[ y & 3 ][0];
1092 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1095 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1096 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1097 } else /* 8/4-bit */ {
1098 uint8_t *dest = (uint8_t *) _dest;
1099 const uint8_t *r = (const uint8_t *) _r;
1100 const uint8_t *g = (const uint8_t *) _g;
1101 const uint8_t *b = (const uint8_t *) _b;
1102 int dr1, dg1, db1, dr2, dg2, db2;
1104 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1105 const uint8_t * const d64 = dither_8x8_73[y & 7];
1106 const uint8_t * const d32 = dither_8x8_32[y & 7];
1107 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1108 db1 = d64[(i * 2 + 0) & 7];
1109 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1110 db2 = d64[(i * 2 + 1) & 7];
1112 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1113 const uint8_t * const d128 = dither_8x8_220[y & 7];
1114 dr1 = db1 = d128[(i * 2 + 0) & 7];
1115 dg1 = d64[(i * 2 + 0) & 7];
1116 dr2 = db2 = d128[(i * 2 + 1) & 7];
1117 dg2 = d64[(i * 2 + 1) & 7];
1120 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1121 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1122 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1124 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1125 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1130 static av_always_inline void
1131 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1132 const int16_t **lumSrc, int lumFilterSize,
1133 const int16_t *chrFilter, const int16_t **chrUSrc,
1134 const int16_t **chrVSrc, int chrFilterSize,
1135 const int16_t **alpSrc, uint8_t *dest, int dstW,
1136 int y, enum PixelFormat target, int hasAlpha)
1140 for (i = 0; i < (dstW >> 1); i++) {
1146 int av_unused A1, A2;
1147 const void *r, *g, *b;
1149 for (j = 0; j < lumFilterSize; j++) {
1150 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1151 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1153 for (j = 0; j < chrFilterSize; j++) {
1154 U += chrUSrc[j][i] * chrFilter[j];
1155 V += chrVSrc[j][i] * chrFilter[j];
1161 if ((Y1 | Y2 | U | V) & 0x100) {
1162 Y1 = av_clip_uint8(Y1);
1163 Y2 = av_clip_uint8(Y2);
1164 U = av_clip_uint8(U);
1165 V = av_clip_uint8(V);
1170 for (j = 0; j < lumFilterSize; j++) {
1171 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1172 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1176 if ((A1 | A2) & 0x100) {
1177 A1 = av_clip_uint8(A1);
1178 A2 = av_clip_uint8(A2);
1182 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1184 g = (c->table_gU[U] + c->table_gV[V]);
1187 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1188 r, g, b, y, target, hasAlpha);
1192 static av_always_inline void
1193 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1194 const int16_t *ubuf[2], const int16_t *vbuf[2],
1195 const int16_t *abuf[2], uint8_t *dest, int dstW,
1196 int yalpha, int uvalpha, int y,
1197 enum PixelFormat target, int hasAlpha)
1199 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1200 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1201 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1202 *abuf0 = hasAlpha ? abuf[0] : NULL,
1203 *abuf1 = hasAlpha ? abuf[1] : NULL;
1204 int yalpha1 = 4095 - yalpha;
1205 int uvalpha1 = 4095 - uvalpha;
1208 for (i = 0; i < (dstW >> 1); i++) {
1209 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1210 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1211 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1212 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1214 const void *r = c->table_rV[V],
1215 *g = (c->table_gU[U] + c->table_gV[V]),
1216 *b = c->table_bU[U];
1219 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1220 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1223 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1224 r, g, b, y, target, hasAlpha);
1228 static av_always_inline void
1229 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1230 const int16_t *ubuf[2], const int16_t *vbuf[2],
1231 const int16_t *abuf0, uint8_t *dest, int dstW,
1232 int uvalpha, int y, enum PixelFormat target,
1235 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1236 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1239 if (uvalpha < 2048) {
1240 for (i = 0; i < (dstW >> 1); i++) {
1241 int Y1 = buf0[i * 2] >> 7;
1242 int Y2 = buf0[i * 2 + 1] >> 7;
1243 int U = ubuf1[i] >> 7;
1244 int V = vbuf1[i] >> 7;
1246 const void *r = c->table_rV[V],
1247 *g = (c->table_gU[U] + c->table_gV[V]),
1248 *b = c->table_bU[U];
1251 A1 = abuf0[i * 2 ] >> 7;
1252 A2 = abuf0[i * 2 + 1] >> 7;
1255 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1256 r, g, b, y, target, hasAlpha);
1259 for (i = 0; i < (dstW >> 1); i++) {
1260 int Y1 = buf0[i * 2] >> 7;
1261 int Y2 = buf0[i * 2 + 1] >> 7;
1262 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1263 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1265 const void *r = c->table_rV[V],
1266 *g = (c->table_gU[U] + c->table_gV[V]),
1267 *b = c->table_bU[U];
1270 A1 = abuf0[i * 2 ] >> 7;
1271 A2 = abuf0[i * 2 + 1] >> 7;
1274 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1275 r, g, b, y, target, hasAlpha);
1280 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1281 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1282 const int16_t **lumSrc, int lumFilterSize, \
1283 const int16_t *chrFilter, const int16_t **chrUSrc, \
1284 const int16_t **chrVSrc, int chrFilterSize, \
1285 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1288 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1289 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1290 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1292 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1293 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1294 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1295 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1296 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1297 int yalpha, int uvalpha, int y) \
1299 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1300 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1303 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1304 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1305 const int16_t *abuf0, uint8_t *dest, int dstW, \
1306 int uvalpha, int y) \
1308 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1309 dstW, uvalpha, y, fmt, hasAlpha); \
1313 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1314 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1316 #if CONFIG_SWSCALE_ALPHA
1317 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1318 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1320 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1321 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1323 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1324 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1325 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1326 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1327 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1328 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1329 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1330 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1332 static av_always_inline void
1333 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1334 const int16_t **lumSrc, int lumFilterSize,
1335 const int16_t *chrFilter, const int16_t **chrUSrc,
1336 const int16_t **chrVSrc, int chrFilterSize,
1337 const int16_t **alpSrc, uint8_t *dest,
1338 int dstW, int y, enum PixelFormat target, int hasAlpha)
1341 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1343 for (i = 0; i < dstW; i++) {
1346 int U = (1<<9)-(128 << 19);
1347 int V = (1<<9)-(128 << 19);
1351 for (j = 0; j < lumFilterSize; j++) {
1352 Y += lumSrc[j][i] * lumFilter[j];
1354 for (j = 0; j < chrFilterSize; j++) {
1355 U += chrUSrc[j][i] * chrFilter[j];
1356 V += chrVSrc[j][i] * chrFilter[j];
1363 for (j = 0; j < lumFilterSize; j++) {
1364 A += alpSrc[j][i] * lumFilter[j];
1368 A = av_clip_uint8(A);
1370 Y -= c->yuv2rgb_y_offset;
1371 Y *= c->yuv2rgb_y_coeff;
1373 R = Y + V*c->yuv2rgb_v2r_coeff;
1374 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1375 B = Y + U*c->yuv2rgb_u2b_coeff;
1376 if ((R | G | B) & 0xC0000000) {
1377 R = av_clip_uintp2(R, 30);
1378 G = av_clip_uintp2(G, 30);
1379 B = av_clip_uintp2(B, 30);
1384 dest[0] = hasAlpha ? A : 255;
1398 dest[3] = hasAlpha ? A : 255;
1401 dest[0] = hasAlpha ? A : 255;
1415 dest[3] = hasAlpha ? A : 255;
1423 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1424 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1425 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1426 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1428 #if CONFIG_SWSCALE_ALPHA
1429 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1430 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1431 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1432 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1434 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1435 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1436 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1437 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1439 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1440 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1442 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1443 int width, int height,
1447 uint8_t *ptr = plane + stride*y;
1448 for (i=0; i<height; i++) {
1449 memset(ptr, val, width);
1454 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1456 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1457 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1459 static av_always_inline void
1460 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1461 enum PixelFormat origin)
1464 for (i = 0; i < width; i++) {
1465 unsigned int r_b = input_pixel(&src[i*3+0]);
1466 unsigned int g = input_pixel(&src[i*3+1]);
1467 unsigned int b_r = input_pixel(&src[i*3+2]);
1469 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1473 static av_always_inline void
1474 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1475 const uint16_t *src1, const uint16_t *src2,
1476 int width, enum PixelFormat origin)
1480 for (i = 0; i < width; i++) {
1481 int r_b = input_pixel(&src1[i*3+0]);
1482 int g = input_pixel(&src1[i*3+1]);
1483 int b_r = input_pixel(&src1[i*3+2]);
1485 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1486 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1490 static av_always_inline void
1491 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1492 const uint16_t *src1, const uint16_t *src2,
1493 int width, enum PixelFormat origin)
1497 for (i = 0; i < width; i++) {
1498 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1499 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1500 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1502 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1503 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1511 #define rgb48funcs(pattern, BE_LE, origin) \
1512 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
1513 int width, uint32_t *unused) \
1515 const uint16_t *src = (const uint16_t *) _src; \
1516 uint16_t *dst = (uint16_t *) _dst; \
1517 rgb48ToY_c_template(dst, src, width, origin); \
1520 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1521 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1522 int width, uint32_t *unused) \
1524 const uint16_t *src1 = (const uint16_t *) _src1, \
1525 *src2 = (const uint16_t *) _src2; \
1526 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1527 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1530 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1531 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1532 int width, uint32_t *unused) \
1534 const uint16_t *src1 = (const uint16_t *) _src1, \
1535 *src2 = (const uint16_t *) _src2; \
1536 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1537 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1540 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1541 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1542 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1543 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1545 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1546 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1547 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1549 static av_always_inline void
1550 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1551 int width, enum PixelFormat origin,
1552 int shr, int shg, int shb, int shp,
1553 int maskr, int maskg, int maskb,
1554 int rsh, int gsh, int bsh, int S)
1556 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1557 const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
1560 for (i = 0; i < width; i++) {
1561 int px = input_pixel(i) >> shp;
1562 int b = (px & maskb) >> shb;
1563 int g = (px & maskg) >> shg;
1564 int r = (px & maskr) >> shr;
1566 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1570 static av_always_inline void
1571 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1572 const uint8_t *src, int width,
1573 enum PixelFormat origin,
1574 int shr, int shg, int shb, int shp,
1575 int maskr, int maskg, int maskb,
1576 int rsh, int gsh, int bsh, int S)
1578 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1579 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1580 const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
1583 for (i = 0; i < width; i++) {
1584 int px = input_pixel(i) >> shp;
1585 int b = (px & maskb) >> shb;
1586 int g = (px & maskg) >> shg;
1587 int r = (px & maskr) >> shr;
1589 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1590 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1594 static av_always_inline void
1595 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1596 const uint8_t *src, int width,
1597 enum PixelFormat origin,
1598 int shr, int shg, int shb, int shp,
1599 int maskr, int maskg, int maskb,
1600 int rsh, int gsh, int bsh, int S)
1602 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1603 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1604 maskgx = ~(maskr | maskb);
1605 const unsigned rnd = (256U<<(S)) + (1<<(S-6));
1608 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1609 for (i = 0; i < width; i++) {
1610 int px0 = input_pixel(2 * i + 0) >> shp;
1611 int px1 = input_pixel(2 * i + 1) >> shp;
1612 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1613 int rb = px0 + px1 - g;
1615 b = (rb & maskb) >> shb;
1616 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1617 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1620 g = (g & maskg) >> shg;
1622 r = (rb & maskr) >> shr;
1624 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1625 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1631 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1632 maskg, maskb, rsh, gsh, bsh, S) \
1633 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
1634 int width, uint32_t *unused) \
1636 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
1637 shr, shg, shb, shp, \
1638 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1641 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1642 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1643 int width, uint32_t *unused) \
1645 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1646 shr, shg, shb, shp, \
1647 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1650 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1651 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1652 int width, uint32_t *unused) \
1654 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1655 shr, shg, shb, shp, \
1656 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1659 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1660 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1661 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1662 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1663 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1664 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1665 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1666 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1667 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1668 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1669 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1670 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1672 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
1673 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1674 int width, enum PixelFormat origin)
1677 for (i = 0; i < width; i++) {
1678 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
1679 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
1680 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
1682 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1683 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1687 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1690 for (i=0; i<width; i++) {
1691 dst[i]= src[4*i]<<6;
1695 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1698 for (i=0; i<width; i++) {
1699 dst[i]= src[4*i+3]<<6;
1703 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
1706 for (i=0; i<width; i++) {
1709 dst[i]= (pal[d] >> 24)<<6;
1713 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
1716 for (i=0; i<width; i++) {
1719 dst[i]= (pal[d] & 0xFF)<<6;
1723 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1724 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1725 int width, uint32_t *pal)
1728 assert(src1 == src2);
1729 for (i=0; i<width; i++) {
1730 int p= pal[src1[i]];
1732 dstU[i]= (uint8_t)(p>> 8)<<6;
1733 dstV[i]= (uint8_t)(p>>16)<<6;
1737 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1740 for (i=0; i<width/8; i++) {
1743 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1747 for(j=0; j<(width&7); j++)
1748 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1752 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1755 for (i=0; i<width/8; i++) {
1758 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1762 for(j=0; j<(width&7); j++)
1763 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1767 //FIXME yuy2* can read up to 7 samples too much
1769 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1773 for (i=0; i<width; i++)
1777 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1778 const uint8_t *src2, int width, uint32_t *unused)
1781 for (i=0; i<width; i++) {
1782 dstU[i]= src1[4*i + 1];
1783 dstV[i]= src1[4*i + 3];
1785 assert(src1 == src2);
1788 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1791 const uint16_t *src = (const uint16_t *) _src;
1792 uint16_t *dst = (uint16_t *) _dst;
1793 for (i=0; i<width; i++) {
1794 dst[i] = av_bswap16(src[i]);
1798 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
1799 const uint8_t *_src2, int width, uint32_t *unused)
1802 const uint16_t *src1 = (const uint16_t *) _src1,
1803 *src2 = (const uint16_t *) _src2;
1804 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1805 for (i=0; i<width; i++) {
1806 dstU[i] = av_bswap16(src1[i]);
1807 dstV[i] = av_bswap16(src2[i]);
1811 /* This is almost identical to the previous, end exists only because
1812 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1813 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1817 for (i=0; i<width; i++)
1821 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1822 const uint8_t *src2, int width, uint32_t *unused)
1825 for (i=0; i<width; i++) {
1826 dstU[i]= src1[4*i + 0];
1827 dstV[i]= src1[4*i + 2];
1829 assert(src1 == src2);
1832 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1833 const uint8_t *src, int width)
1836 for (i = 0; i < width; i++) {
1837 dst1[i] = src[2*i+0];
1838 dst2[i] = src[2*i+1];
1842 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1843 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1844 int width, uint32_t *unused)
1846 nvXXtoUV_c(dstU, dstV, src1, width);
1849 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1850 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1851 int width, uint32_t *unused)
1853 nvXXtoUV_c(dstV, dstU, src1, width);
1856 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1858 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
1859 int width, uint32_t *unused)
1862 for (i=0; i<width; i++) {
1867 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1871 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1872 const uint8_t *src2, int width, uint32_t *unused)
1875 for (i=0; i<width; i++) {
1876 int b= src1[3*i + 0];
1877 int g= src1[3*i + 1];
1878 int r= src1[3*i + 2];
1880 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1881 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1883 assert(src1 == src2);
1886 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1887 const uint8_t *src2, int width, uint32_t *unused)
1890 for (i=0; i<width; i++) {
1891 int b= src1[6*i + 0] + src1[6*i + 3];
1892 int g= src1[6*i + 1] + src1[6*i + 4];
1893 int r= src1[6*i + 2] + src1[6*i + 5];
1895 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1896 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1898 assert(src1 == src2);
1901 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1905 for (i=0; i<width; i++) {
1910 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1914 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1915 const uint8_t *src2, int width, uint32_t *unused)
1919 for (i=0; i<width; i++) {
1920 int r= src1[3*i + 0];
1921 int g= src1[3*i + 1];
1922 int b= src1[3*i + 2];
1924 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1925 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1929 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1930 const uint8_t *src2, int width, uint32_t *unused)
1934 for (i=0; i<width; i++) {
1935 int r= src1[6*i + 0] + src1[6*i + 3];
1936 int g= src1[6*i + 1] + src1[6*i + 4];
1937 int b= src1[6*i + 2] + src1[6*i + 5];
1939 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1940 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1944 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
1947 for (i = 0; i < width; i++) {
1952 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1956 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1959 const uint16_t **src = (const uint16_t **) _src;
1960 uint16_t *dst = (uint16_t *) _dst;
1961 for (i = 0; i < width; i++) {
1962 int g = AV_RL16(src[0] + i);
1963 int b = AV_RL16(src[1] + i);
1964 int r = AV_RL16(src[2] + i);
1966 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1970 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1973 const uint16_t **src = (const uint16_t **) _src;
1974 uint16_t *dst = (uint16_t *) _dst;
1975 for (i = 0; i < width; i++) {
1976 int g = AV_RB16(src[0] + i);
1977 int b = AV_RB16(src[1] + i);
1978 int r = AV_RB16(src[2] + i);
1980 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1984 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
1987 for (i = 0; i < width; i++) {
1992 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1993 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1997 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
2000 const uint16_t **src = (const uint16_t **) _src;
2001 uint16_t *dstU = (uint16_t *) _dstU;
2002 uint16_t *dstV = (uint16_t *) _dstV;
2003 for (i = 0; i < width; i++) {
2004 int g = AV_RL16(src[0] + i);
2005 int b = AV_RL16(src[1] + i);
2006 int r = AV_RL16(src[2] + i);
2008 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
2009 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
2013 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
2016 const uint16_t **src = (const uint16_t **) _src;
2017 uint16_t *dstU = (uint16_t *) _dstU;
2018 uint16_t *dstV = (uint16_t *) _dstV;
2019 for (i = 0; i < width; i++) {
2020 int g = AV_RB16(src[0] + i);
2021 int b = AV_RB16(src[1] + i);
2022 int r = AV_RB16(src[2] + i);
2024 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
2025 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
2029 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
2030 const int16_t *filter,
2031 const int16_t *filterPos, int filterSize)
2034 int32_t *dst = (int32_t *) _dst;
2035 const uint16_t *src = (const uint16_t *) _src;
2036 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2039 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2042 for (i = 0; i < dstW; i++) {
2044 int srcPos = filterPos[i];
2047 for (j = 0; j < filterSize; j++) {
2048 val += src[srcPos + j] * filter[filterSize * i + j];
2050 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2051 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2055 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2056 const int16_t *filter,
2057 const int16_t *filterPos, int filterSize)
2060 const uint16_t *src = (const uint16_t *) _src;
2061 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2064 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2066 for (i = 0; i < dstW; i++) {
2068 int srcPos = filterPos[i];
2071 for (j = 0; j < filterSize; j++) {
2072 val += src[srcPos + j] * filter[filterSize * i + j];
2074 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2075 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2079 // bilinear / bicubic scaling
2080 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2081 const int16_t *filter, const int16_t *filterPos,
2085 for (i=0; i<dstW; i++) {
2087 int srcPos= filterPos[i];
2089 for (j=0; j<filterSize; j++) {
2090 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2092 //filter += hFilterSize;
2093 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2098 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2099 const int16_t *filter, const int16_t *filterPos,
2103 int32_t *dst = (int32_t *) _dst;
2104 for (i=0; i<dstW; i++) {
2106 int srcPos= filterPos[i];
2108 for (j=0; j<filterSize; j++) {
2109 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2111 //filter += hFilterSize;
2112 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2117 //FIXME all pal and rgb srcFormats could do this convertion as well
2118 //FIXME all scalers more complex than bilinear could do half of this transform
2119 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2122 for (i = 0; i < width; i++) {
2123 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2124 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2127 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2130 for (i = 0; i < width; i++) {
2131 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2132 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2135 static void lumRangeToJpeg_c(int16_t *dst, int width)
2138 for (i = 0; i < width; i++)
2139 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2141 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2144 for (i = 0; i < width; i++)
2145 dst[i] = (dst[i]*14071 + 33561947)>>14;
2148 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2151 int32_t *dstU = (int32_t *) _dstU;
2152 int32_t *dstV = (int32_t *) _dstV;
2153 for (i = 0; i < width; i++) {
2154 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2155 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2158 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2161 int32_t *dstU = (int32_t *) _dstU;
2162 int32_t *dstV = (int32_t *) _dstV;
2163 for (i = 0; i < width; i++) {
2164 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2165 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2168 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2171 int32_t *dst = (int32_t *) _dst;
2172 for (i = 0; i < width; i++)
2173 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2175 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2178 int32_t *dst = (int32_t *) _dst;
2179 for (i = 0; i < width; i++)
2180 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2183 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2184 const uint8_t *src, int srcW, int xInc)
2187 unsigned int xpos=0;
2188 for (i=0;i<dstWidth;i++) {
2189 register unsigned int xx=xpos>>16;
2190 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2191 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2194 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2195 dst[i] = src[srcW-1]*128;
2198 // *** horizontal scale Y line to temp buffer
2199 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2200 const uint8_t *src_in[4], int srcW, int xInc,
2201 const int16_t *hLumFilter,
2202 const int16_t *hLumFilterPos, int hLumFilterSize,
2203 uint8_t *formatConvBuffer,
2204 uint32_t *pal, int isAlpha)
2206 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2207 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2208 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2211 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
2212 src= formatConvBuffer;
2213 } else if (c->readLumPlanar && !isAlpha) {
2214 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2215 src = formatConvBuffer;
2218 if (!c->hyscale_fast) {
2219 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2220 } else { // fast bilinear upscale / crap downscale
2221 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2225 convertRange(dst, dstWidth);
2228 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2229 int dstWidth, const uint8_t *src1,
2230 const uint8_t *src2, int srcW, int xInc)
2233 unsigned int xpos=0;
2234 for (i=0;i<dstWidth;i++) {
2235 register unsigned int xx=xpos>>16;
2236 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2237 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2238 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2241 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2242 dst1[i] = src1[srcW-1]*128;
2243 dst2[i] = src2[srcW-1]*128;
2247 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2248 const uint8_t *src_in[4],
2249 int srcW, int xInc, const int16_t *hChrFilter,
2250 const int16_t *hChrFilterPos, int hChrFilterSize,
2251 uint8_t *formatConvBuffer, uint32_t *pal)
2253 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2255 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2256 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
2257 src1= formatConvBuffer;
2259 } else if (c->readChrPlanar) {
2260 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2261 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2262 src1= formatConvBuffer;
2266 if (!c->hcscale_fast) {
2267 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2268 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2269 } else { // fast bilinear upscale / crap downscale
2270 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2273 if (c->chrConvertRange)
2274 c->chrConvertRange(dst1, dst2, dstWidth);
2277 static av_always_inline void
2278 find_c_packed_planar_out_funcs(SwsContext *c,
2279 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2280 yuv2interleavedX_fn *yuv2nv12cX,
2281 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2282 yuv2packedX_fn *yuv2packedX)
2284 enum PixelFormat dstFormat = c->dstFormat;
2286 if (is16BPS(dstFormat)) {
2287 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2288 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2289 } else if (is9_OR_10BPS(dstFormat)) {
2290 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2291 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2292 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2294 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2295 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2298 *yuv2plane1 = yuv2plane1_8_c;
2299 *yuv2planeX = yuv2planeX_8_c;
2300 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2301 *yuv2nv12cX = yuv2nv12cX_c;
2304 if(c->flags & SWS_FULL_CHR_H_INT) {
2305 switch (dstFormat) {
2308 *yuv2packedX = yuv2rgba32_full_X_c;
2310 #if CONFIG_SWSCALE_ALPHA
2312 *yuv2packedX = yuv2rgba32_full_X_c;
2314 #endif /* CONFIG_SWSCALE_ALPHA */
2316 *yuv2packedX = yuv2rgbx32_full_X_c;
2318 #endif /* !CONFIG_SMALL */
2322 *yuv2packedX = yuv2argb32_full_X_c;
2324 #if CONFIG_SWSCALE_ALPHA
2326 *yuv2packedX = yuv2argb32_full_X_c;
2328 #endif /* CONFIG_SWSCALE_ALPHA */
2330 *yuv2packedX = yuv2xrgb32_full_X_c;
2332 #endif /* !CONFIG_SMALL */
2336 *yuv2packedX = yuv2bgra32_full_X_c;
2338 #if CONFIG_SWSCALE_ALPHA
2340 *yuv2packedX = yuv2bgra32_full_X_c;
2342 #endif /* CONFIG_SWSCALE_ALPHA */
2344 *yuv2packedX = yuv2bgrx32_full_X_c;
2346 #endif /* !CONFIG_SMALL */
2350 *yuv2packedX = yuv2abgr32_full_X_c;
2352 #if CONFIG_SWSCALE_ALPHA
2354 *yuv2packedX = yuv2abgr32_full_X_c;
2356 #endif /* CONFIG_SWSCALE_ALPHA */
2358 *yuv2packedX = yuv2xbgr32_full_X_c;
2360 #endif /* !CONFIG_SMALL */
2363 *yuv2packedX = yuv2rgb24_full_X_c;
2366 *yuv2packedX = yuv2bgr24_full_X_c;
2373 switch (dstFormat) {
2374 case PIX_FMT_GRAY16BE:
2375 *yuv2packed1 = yuv2gray16BE_1_c;
2376 *yuv2packed2 = yuv2gray16BE_2_c;
2377 *yuv2packedX = yuv2gray16BE_X_c;
2379 case PIX_FMT_GRAY16LE:
2380 *yuv2packed1 = yuv2gray16LE_1_c;
2381 *yuv2packed2 = yuv2gray16LE_2_c;
2382 *yuv2packedX = yuv2gray16LE_X_c;
2384 case PIX_FMT_MONOWHITE:
2385 *yuv2packed1 = yuv2monowhite_1_c;
2386 *yuv2packed2 = yuv2monowhite_2_c;
2387 *yuv2packedX = yuv2monowhite_X_c;
2389 case PIX_FMT_MONOBLACK:
2390 *yuv2packed1 = yuv2monoblack_1_c;
2391 *yuv2packed2 = yuv2monoblack_2_c;
2392 *yuv2packedX = yuv2monoblack_X_c;
2394 case PIX_FMT_YUYV422:
2395 *yuv2packed1 = yuv2yuyv422_1_c;
2396 *yuv2packed2 = yuv2yuyv422_2_c;
2397 *yuv2packedX = yuv2yuyv422_X_c;
2399 case PIX_FMT_UYVY422:
2400 *yuv2packed1 = yuv2uyvy422_1_c;
2401 *yuv2packed2 = yuv2uyvy422_2_c;
2402 *yuv2packedX = yuv2uyvy422_X_c;
2404 case PIX_FMT_RGB48LE:
2405 *yuv2packed1 = yuv2rgb48le_1_c;
2406 *yuv2packed2 = yuv2rgb48le_2_c;
2407 *yuv2packedX = yuv2rgb48le_X_c;
2409 case PIX_FMT_RGB48BE:
2410 *yuv2packed1 = yuv2rgb48be_1_c;
2411 *yuv2packed2 = yuv2rgb48be_2_c;
2412 *yuv2packedX = yuv2rgb48be_X_c;
2414 case PIX_FMT_BGR48LE:
2415 *yuv2packed1 = yuv2bgr48le_1_c;
2416 *yuv2packed2 = yuv2bgr48le_2_c;
2417 *yuv2packedX = yuv2bgr48le_X_c;
2419 case PIX_FMT_BGR48BE:
2420 *yuv2packed1 = yuv2bgr48be_1_c;
2421 *yuv2packed2 = yuv2bgr48be_2_c;
2422 *yuv2packedX = yuv2bgr48be_X_c;
2427 *yuv2packed1 = yuv2rgb32_1_c;
2428 *yuv2packed2 = yuv2rgb32_2_c;
2429 *yuv2packedX = yuv2rgb32_X_c;
2431 #if CONFIG_SWSCALE_ALPHA
2433 *yuv2packed1 = yuv2rgba32_1_c;
2434 *yuv2packed2 = yuv2rgba32_2_c;
2435 *yuv2packedX = yuv2rgba32_X_c;
2437 #endif /* CONFIG_SWSCALE_ALPHA */
2439 *yuv2packed1 = yuv2rgbx32_1_c;
2440 *yuv2packed2 = yuv2rgbx32_2_c;
2441 *yuv2packedX = yuv2rgbx32_X_c;
2443 #endif /* !CONFIG_SMALL */
2445 case PIX_FMT_RGB32_1:
2446 case PIX_FMT_BGR32_1:
2448 *yuv2packed1 = yuv2rgb32_1_1_c;
2449 *yuv2packed2 = yuv2rgb32_1_2_c;
2450 *yuv2packedX = yuv2rgb32_1_X_c;
2452 #if CONFIG_SWSCALE_ALPHA
2454 *yuv2packed1 = yuv2rgba32_1_1_c;
2455 *yuv2packed2 = yuv2rgba32_1_2_c;
2456 *yuv2packedX = yuv2rgba32_1_X_c;
2458 #endif /* CONFIG_SWSCALE_ALPHA */
2460 *yuv2packed1 = yuv2rgbx32_1_1_c;
2461 *yuv2packed2 = yuv2rgbx32_1_2_c;
2462 *yuv2packedX = yuv2rgbx32_1_X_c;
2464 #endif /* !CONFIG_SMALL */
2467 *yuv2packed1 = yuv2rgb24_1_c;
2468 *yuv2packed2 = yuv2rgb24_2_c;
2469 *yuv2packedX = yuv2rgb24_X_c;
2472 *yuv2packed1 = yuv2bgr24_1_c;
2473 *yuv2packed2 = yuv2bgr24_2_c;
2474 *yuv2packedX = yuv2bgr24_X_c;
2476 case PIX_FMT_RGB565LE:
2477 case PIX_FMT_RGB565BE:
2478 case PIX_FMT_BGR565LE:
2479 case PIX_FMT_BGR565BE:
2480 *yuv2packed1 = yuv2rgb16_1_c;
2481 *yuv2packed2 = yuv2rgb16_2_c;
2482 *yuv2packedX = yuv2rgb16_X_c;
2484 case PIX_FMT_RGB555LE:
2485 case PIX_FMT_RGB555BE:
2486 case PIX_FMT_BGR555LE:
2487 case PIX_FMT_BGR555BE:
2488 *yuv2packed1 = yuv2rgb15_1_c;
2489 *yuv2packed2 = yuv2rgb15_2_c;
2490 *yuv2packedX = yuv2rgb15_X_c;
2492 case PIX_FMT_RGB444LE:
2493 case PIX_FMT_RGB444BE:
2494 case PIX_FMT_BGR444LE:
2495 case PIX_FMT_BGR444BE:
2496 *yuv2packed1 = yuv2rgb12_1_c;
2497 *yuv2packed2 = yuv2rgb12_2_c;
2498 *yuv2packedX = yuv2rgb12_X_c;
2502 *yuv2packed1 = yuv2rgb8_1_c;
2503 *yuv2packed2 = yuv2rgb8_2_c;
2504 *yuv2packedX = yuv2rgb8_X_c;
2508 *yuv2packed1 = yuv2rgb4_1_c;
2509 *yuv2packed2 = yuv2rgb4_2_c;
2510 *yuv2packedX = yuv2rgb4_X_c;
2512 case PIX_FMT_RGB4_BYTE:
2513 case PIX_FMT_BGR4_BYTE:
2514 *yuv2packed1 = yuv2rgb4b_1_c;
2515 *yuv2packed2 = yuv2rgb4b_2_c;
2516 *yuv2packedX = yuv2rgb4b_X_c;
2522 #define DEBUG_SWSCALE_BUFFERS 0
2523 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2525 static int swScale(SwsContext *c, const uint8_t* src[],
2526 int srcStride[], int srcSliceY,
2527 int srcSliceH, uint8_t* dst[], int dstStride[])
2529 /* load a few things into local vars to make the code more readable? and faster */
2530 const int srcW= c->srcW;
2531 const int dstW= c->dstW;
2532 const int dstH= c->dstH;
2533 const int chrDstW= c->chrDstW;
2534 const int chrSrcW= c->chrSrcW;
2535 const int lumXInc= c->lumXInc;
2536 const int chrXInc= c->chrXInc;
2537 const enum PixelFormat dstFormat= c->dstFormat;
2538 const int flags= c->flags;
2539 int16_t *vLumFilterPos= c->vLumFilterPos;
2540 int16_t *vChrFilterPos= c->vChrFilterPos;
2541 int16_t *hLumFilterPos= c->hLumFilterPos;
2542 int16_t *hChrFilterPos= c->hChrFilterPos;
2543 int16_t *hLumFilter= c->hLumFilter;
2544 int16_t *hChrFilter= c->hChrFilter;
2545 int32_t *lumMmxFilter= c->lumMmxFilter;
2546 int32_t *chrMmxFilter= c->chrMmxFilter;
2547 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2548 const int vLumFilterSize= c->vLumFilterSize;
2549 const int vChrFilterSize= c->vChrFilterSize;
2550 const int hLumFilterSize= c->hLumFilterSize;
2551 const int hChrFilterSize= c->hChrFilterSize;
2552 int16_t **lumPixBuf= c->lumPixBuf;
2553 int16_t **chrUPixBuf= c->chrUPixBuf;
2554 int16_t **chrVPixBuf= c->chrVPixBuf;
2555 int16_t **alpPixBuf= c->alpPixBuf;
2556 const int vLumBufSize= c->vLumBufSize;
2557 const int vChrBufSize= c->vChrBufSize;
2558 uint8_t *formatConvBuffer= c->formatConvBuffer;
2559 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2560 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2562 uint32_t *pal=c->pal_yuv;
2563 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2565 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2566 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2567 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2568 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2569 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2570 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2572 /* vars which will change and which we need to store back in the context */
2574 int lumBufIndex= c->lumBufIndex;
2575 int chrBufIndex= c->chrBufIndex;
2576 int lastInLumBuf= c->lastInLumBuf;
2577 int lastInChrBuf= c->lastInChrBuf;
2579 if (isPacked(c->srcFormat)) {
2587 srcStride[3]= srcStride[0];
2589 srcStride[1]<<= c->vChrDrop;
2590 srcStride[2]<<= c->vChrDrop;
2592 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2593 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2594 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2595 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2596 srcSliceY, srcSliceH, dstY, dstH);
2597 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2598 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2600 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
2601 static int warnedAlready=0; //FIXME move this into the context perhaps
2602 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2603 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2604 " ->cannot do aligned memory accesses anymore\n");
2609 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
2610 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
2611 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
2613 static int warnedAlready=0;
2614 int cpu_flags = av_get_cpu_flags();
2615 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
2616 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
2621 /* Note the user might start scaling the picture in the middle so this
2622 will not get executed. This is not really intended but works
2623 currently, so people might do it. */
2624 if (srcSliceY ==0) {
2632 if (!should_dither) {
2633 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2637 for (;dstY < dstH; dstY++) {
2638 const int chrDstY= dstY>>c->chrDstVSubSample;
2639 uint8_t *dest[4] = {
2640 dst[0] + dstStride[0] * dstY,
2641 dst[1] + dstStride[1] * chrDstY,
2642 dst[2] + dstStride[2] * chrDstY,
2643 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2645 int use_mmx_vfilter= c->use_mmx_vfilter;
2647 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2648 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2649 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2650 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2651 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2652 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2655 //handle holes (FAST_BILINEAR & weird filters)
2656 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2657 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2658 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2659 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2661 DEBUG_BUFFERS("dstY: %d\n", dstY);
2662 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2663 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2664 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2665 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2667 // Do we have enough lines in this slice to output the dstY line
2668 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2670 if (!enough_lines) {
2671 lastLumSrcY = srcSliceY + srcSliceH - 1;
2672 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2673 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2674 lastLumSrcY, lastChrSrcY);
2677 //Do horizontal scaling
2678 while(lastInLumBuf < lastLumSrcY) {
2679 const uint8_t *src1[4] = {
2680 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2681 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2682 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2683 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2686 assert(lumBufIndex < 2*vLumBufSize);
2687 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2688 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2689 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2690 hLumFilter, hLumFilterPos, hLumFilterSize,
2693 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2694 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2695 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2699 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2700 lumBufIndex, lastInLumBuf);
2702 while(lastInChrBuf < lastChrSrcY) {
2703 const uint8_t *src1[4] = {
2704 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2705 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2706 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2707 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2710 assert(chrBufIndex < 2*vChrBufSize);
2711 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2712 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2713 //FIXME replace parameters through context struct (some at least)
2715 if (c->needs_hcscale)
2716 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2717 chrDstW, src1, chrSrcW, chrXInc,
2718 hChrFilter, hChrFilterPos, hChrFilterSize,
2719 formatConvBuffer, pal);
2721 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2722 chrBufIndex, lastInChrBuf);
2724 //wrap buf index around to stay inside the ring buffer
2725 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2726 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2728 break; //we can't output a dstY line so let's try with the next slice
2731 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2733 if (should_dither) {
2734 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2735 c->lumDither8 = dither_8x8_128[dstY & 7];
2737 if (dstY >= dstH-2) {
2738 // hmm looks like we can't use MMX here without overwriting this array's tail
2739 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2740 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2745 const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2746 const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2747 const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2748 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2749 int16_t *vLumFilter= c->vLumFilter;
2750 int16_t *vChrFilter= c->vChrFilter;
2752 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2753 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2755 vLumFilter += dstY * vLumFilterSize;
2756 vChrFilter += chrDstY * vChrFilterSize;
2758 av_assert0(use_mmx_vfilter != (
2759 yuv2planeX == yuv2planeX_10BE_c
2760 || yuv2planeX == yuv2planeX_10LE_c
2761 || yuv2planeX == yuv2planeX_9BE_c
2762 || yuv2planeX == yuv2planeX_9LE_c
2763 || yuv2planeX == yuv2planeX_16BE_c
2764 || yuv2planeX == yuv2planeX_16LE_c
2765 || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
2767 if(use_mmx_vfilter){
2768 vLumFilter= c->lumMmxFilter;
2769 vChrFilter= c->chrMmxFilter;
2772 if (vLumFilterSize == 1) {
2773 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2775 yuv2planeX(vLumFilter, vLumFilterSize,
2776 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2779 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2781 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2782 } else if (vChrFilterSize == 1) {
2783 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2784 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2786 yuv2planeX(vChrFilter, vChrFilterSize,
2787 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2788 yuv2planeX(vChrFilter, vChrFilterSize,
2789 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
2793 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2794 if(use_mmx_vfilter){
2795 vLumFilter= c->alpMmxFilter;
2797 if (vLumFilterSize == 1) {
2798 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2800 yuv2planeX(vLumFilter, vLumFilterSize,
2801 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2805 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2806 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2807 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2808 int chrAlpha = vChrFilter[2 * dstY + 1];
2809 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2810 alpPixBuf ? *alpSrcPtr : NULL,
2811 dest[0], dstW, chrAlpha, dstY);
2812 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2813 int lumAlpha = vLumFilter[2 * dstY + 1];
2814 int chrAlpha = vChrFilter[2 * dstY + 1];
2816 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2818 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2819 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2820 alpPixBuf ? alpSrcPtr : NULL,
2821 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2822 } else { //general RGB
2823 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2824 lumSrcPtr, vLumFilterSize,
2825 vChrFilter + dstY * vChrFilterSize,
2826 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2827 alpSrcPtr, dest[0], dstW, dstY);
2833 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2834 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2837 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2838 __asm__ volatile("sfence":::"memory");
2842 /* store changed local vars back in the context */
2844 c->lumBufIndex= lumBufIndex;
2845 c->chrBufIndex= chrBufIndex;
2846 c->lastInLumBuf= lastInLumBuf;
2847 c->lastInChrBuf= lastInChrBuf;
2849 return dstY - lastDstY;
2852 static av_cold void sws_init_swScale_c(SwsContext *c)
2854 enum PixelFormat srcFormat = c->srcFormat;
2856 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2857 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2860 c->chrToYV12 = NULL;
2862 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2863 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2864 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2865 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2869 case PIX_FMT_BGR4_BYTE:
2870 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2871 case PIX_FMT_GBRP9LE:
2872 case PIX_FMT_GBRP10LE:
2873 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2874 case PIX_FMT_GBRP9BE:
2875 case PIX_FMT_GBRP10BE:
2876 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2877 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2879 case PIX_FMT_YUV444P9LE:
2880 case PIX_FMT_YUV422P9LE:
2881 case PIX_FMT_YUV420P9LE:
2882 case PIX_FMT_YUV422P10LE:
2883 case PIX_FMT_YUV420P10LE:
2884 case PIX_FMT_YUV444P10LE:
2885 case PIX_FMT_YUV420P16LE:
2886 case PIX_FMT_YUV422P16LE:
2887 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2889 case PIX_FMT_YUV444P9BE:
2890 case PIX_FMT_YUV422P9BE:
2891 case PIX_FMT_YUV420P9BE:
2892 case PIX_FMT_YUV444P10BE:
2893 case PIX_FMT_YUV422P10BE:
2894 case PIX_FMT_YUV420P10BE:
2895 case PIX_FMT_YUV420P16BE:
2896 case PIX_FMT_YUV422P16BE:
2897 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2900 if (c->chrSrcHSubSample) {
2902 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2903 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2904 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2905 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2906 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2907 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2908 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2909 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2910 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2911 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2912 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2913 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2914 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2915 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2916 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2917 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2918 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2919 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2920 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
2924 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2925 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2926 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2927 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2928 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2929 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2930 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2931 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2932 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2933 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2934 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2935 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2936 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2937 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2938 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2939 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2940 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2941 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2945 c->lumToYV12 = NULL;
2946 c->alpToYV12 = NULL;
2947 switch (srcFormat) {
2948 case PIX_FMT_GBRP9LE:
2949 case PIX_FMT_GBRP10LE:
2950 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2951 case PIX_FMT_GBRP9BE:
2952 case PIX_FMT_GBRP10BE:
2953 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2954 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2956 case PIX_FMT_YUV444P9LE:
2957 case PIX_FMT_YUV422P9LE:
2958 case PIX_FMT_YUV420P9LE:
2959 case PIX_FMT_YUV422P10LE:
2960 case PIX_FMT_YUV420P10LE:
2961 case PIX_FMT_YUV444P10LE:
2962 case PIX_FMT_YUV420P16LE:
2963 case PIX_FMT_YUV422P16LE:
2964 case PIX_FMT_YUV444P16LE:
2965 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2967 case PIX_FMT_YUV444P9BE:
2968 case PIX_FMT_YUV422P9BE:
2969 case PIX_FMT_YUV420P9BE:
2970 case PIX_FMT_YUV444P10BE:
2971 case PIX_FMT_YUV422P10BE:
2972 case PIX_FMT_YUV420P10BE:
2973 case PIX_FMT_YUV420P16BE:
2974 case PIX_FMT_YUV422P16BE:
2975 case PIX_FMT_YUV444P16BE:
2976 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2978 case PIX_FMT_YUYV422 :
2979 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2980 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2981 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2982 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2983 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2984 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2985 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2986 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2987 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2988 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2989 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2990 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2994 case PIX_FMT_BGR4_BYTE:
2995 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2996 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2997 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2998 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2999 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
3000 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
3001 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
3002 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
3003 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
3004 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
3005 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
3008 switch (srcFormat) {
3010 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
3012 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
3013 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
3014 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
3019 if (c->srcBpc == 8) {
3020 if (c->dstBpc <= 10) {
3021 c->hyScale = c->hcScale = hScale8To15_c;
3022 if (c->flags & SWS_FAST_BILINEAR) {
3023 c->hyscale_fast = hyscale_fast_c;
3024 c->hcscale_fast = hcscale_fast_c;
3027 c->hyScale = c->hcScale = hScale8To19_c;
3030 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
3033 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
3034 if (c->dstBpc <= 10) {
3036 c->lumConvertRange = lumRangeFromJpeg_c;
3037 c->chrConvertRange = chrRangeFromJpeg_c;
3039 c->lumConvertRange = lumRangeToJpeg_c;
3040 c->chrConvertRange = chrRangeToJpeg_c;
3044 c->lumConvertRange = lumRangeFromJpeg16_c;
3045 c->chrConvertRange = chrRangeFromJpeg16_c;
3047 c->lumConvertRange = lumRangeToJpeg16_c;
3048 c->chrConvertRange = chrRangeToJpeg16_c;
3053 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
3054 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
3055 c->needs_hcscale = 1;
3058 SwsFunc ff_getSwsFunc(SwsContext *c)
3060 sws_init_swScale_c(c);
3063 ff_sws_init_swScale_mmx(c);
3065 ff_sws_init_swScale_altivec(c);