2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/avassert.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/cpu.h"
33 #include "libavutil/avutil.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/bswap.h"
36 #include "libavutil/pixdesc.h"
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
165 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
167 { 0, 1, 0, 1, 0, 1, 0, 1,},
168 { 1, 0, 1, 0, 1, 0, 1, 0,},
169 { 0, 1, 0, 1, 0, 1, 0, 1,},
170 { 1, 0, 1, 0, 1, 0, 1, 0,},
171 { 0, 1, 0, 1, 0, 1, 0, 1,},
172 { 1, 0, 1, 0, 1, 0, 1, 0,},
173 { 0, 1, 0, 1, 0, 1, 0, 1,},
174 { 1, 0, 1, 0, 1, 0, 1, 0,},
176 { 1, 2, 1, 2, 1, 2, 1, 2,},
177 { 3, 0, 3, 0, 3, 0, 3, 0,},
178 { 1, 2, 1, 2, 1, 2, 1, 2,},
179 { 3, 0, 3, 0, 3, 0, 3, 0,},
180 { 1, 2, 1, 2, 1, 2, 1, 2,},
181 { 3, 0, 3, 0, 3, 0, 3, 0,},
182 { 1, 2, 1, 2, 1, 2, 1, 2,},
183 { 3, 0, 3, 0, 3, 0, 3, 0,},
185 { 2, 4, 3, 5, 2, 4, 3, 5,},
186 { 6, 0, 7, 1, 6, 0, 7, 1,},
187 { 3, 5, 2, 4, 3, 5, 2, 4,},
188 { 7, 1, 6, 0, 7, 1, 6, 0,},
189 { 2, 4, 3, 5, 2, 4, 3, 5,},
190 { 6, 0, 7, 1, 6, 0, 7, 1,},
191 { 3, 5, 2, 4, 3, 5, 2, 4,},
192 { 7, 1, 6, 0, 7, 1, 6, 0,},
194 { 4, 8, 7, 11, 4, 8, 7, 11,},
195 { 12, 0, 15, 3, 12, 0, 15, 3,},
196 { 6, 10, 5, 9, 6, 10, 5, 9,},
197 { 14, 2, 13, 1, 14, 2, 13, 1,},
198 { 4, 8, 7, 11, 4, 8, 7, 11,},
199 { 12, 0, 15, 3, 12, 0, 15, 3,},
200 { 6, 10, 5, 9, 6, 10, 5, 9,},
201 { 14, 2, 13, 1, 14, 2, 13, 1,},
203 { 9, 17, 15, 23, 8, 16, 14, 22,},
204 { 25, 1, 31, 7, 24, 0, 30, 6,},
205 { 13, 21, 11, 19, 12, 20, 10, 18,},
206 { 29, 5, 27, 3, 28, 4, 26, 2,},
207 { 8, 16, 14, 22, 9, 17, 15, 23,},
208 { 24, 0, 30, 6, 25, 1, 31, 7,},
209 { 12, 20, 10, 18, 13, 21, 11, 19,},
210 { 28, 4, 26, 2, 29, 5, 27, 3,},
212 { 18, 34, 30, 46, 17, 33, 29, 45,},
213 { 50, 2, 62, 14, 49, 1, 61, 13,},
214 { 26, 42, 22, 38, 25, 41, 21, 37,},
215 { 58, 10, 54, 6, 57, 9, 53, 5,},
216 { 16, 32, 28, 44, 19, 35, 31, 47,},
217 { 48, 0, 60, 12, 51, 3, 63, 15,},
218 { 24, 40, 20, 36, 27, 43, 23, 39,},
219 { 56, 8, 52, 4, 59, 11, 55, 7,},
221 { 18, 34, 30, 46, 17, 33, 29, 45,},
222 { 50, 2, 62, 14, 49, 1, 61, 13,},
223 { 26, 42, 22, 38, 25, 41, 21, 37,},
224 { 58, 10, 54, 6, 57, 9, 53, 5,},
225 { 16, 32, 28, 44, 19, 35, 31, 47,},
226 { 48, 0, 60, 12, 51, 3, 63, 15,},
227 { 24, 40, 20, 36, 27, 43, 23, 39,},
228 { 56, 8, 52, 4, 59, 11, 55, 7,},
230 { 36, 68, 60, 92, 34, 66, 58, 90,},
231 { 100, 4,124, 28, 98, 2,122, 26,},
232 { 52, 84, 44, 76, 50, 82, 42, 74,},
233 { 116, 20,108, 12,114, 18,106, 10,},
234 { 32, 64, 56, 88, 38, 70, 62, 94,},
235 { 96, 0,120, 24,102, 6,126, 30,},
236 { 48, 80, 40, 72, 54, 86, 46, 78,},
237 { 112, 16,104, 8,118, 22,110, 14,},
240 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
242 const uint16_t dither_scale[15][16]={
243 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
244 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
245 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
246 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
247 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
248 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
249 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
250 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
251 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
252 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
253 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
254 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
255 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
256 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
257 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
260 #define output_pixel(pos, val, bias, signedness) \
262 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
264 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
267 static av_always_inline void
268 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
269 int big_endian, int output_bits)
273 av_assert0(output_bits == 16);
275 for (i = 0; i < dstW; i++) {
276 int val = src[i] + (1 << (shift - 1));
277 output_pixel(&dest[i], val, 0, uint);
281 static av_always_inline void
282 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
283 const int32_t **src, uint16_t *dest, int dstW,
284 int big_endian, int output_bits)
288 av_assert0(output_bits == 16);
290 for (i = 0; i < dstW; i++) {
291 int val = 1 << (shift - 1);
294 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
295 * filters (or anything with negative coeffs, the range can be slightly
296 * wider in both directions. To account for this overflow, we subtract
297 * a constant so it always fits in the signed range (assuming a
298 * reasonable filterSize), and re-add that at the end. */
300 for (j = 0; j < filterSize; j++)
301 val += src[j][i] * filter[j];
303 output_pixel(&dest[i], val, 0x8000, int);
309 #define output_pixel(pos, val) \
311 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
313 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
316 static av_always_inline void
317 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
318 int big_endian, int output_bits)
321 int shift = 15 - output_bits;
323 for (i = 0; i < dstW; i++) {
324 int val = src[i] + (1 << (shift - 1));
325 output_pixel(&dest[i], val);
329 static av_always_inline void
330 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
331 const int16_t **src, uint16_t *dest, int dstW,
332 int big_endian, int output_bits)
335 int shift = 11 + 16 - output_bits;
337 for (i = 0; i < dstW; i++) {
338 int val = 1 << (shift - 1);
341 for (j = 0; j < filterSize; j++)
342 val += src[j][i] * filter[j];
344 output_pixel(&dest[i], val);
350 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
351 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
352 uint8_t *dest, int dstW, \
353 const uint8_t *dither, int offset)\
355 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
356 (uint16_t *) dest, dstW, is_be, bits); \
358 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
359 const int16_t **src, uint8_t *dest, int dstW, \
360 const uint8_t *dither, int offset)\
362 yuv2planeX_## template_size ## _c_template(filter, \
363 filterSize, (const typeX_t **) src, \
364 (uint16_t *) dest, dstW, is_be, bits); \
366 yuv2NBPS( 9, BE, 1, 10, int16_t)
367 yuv2NBPS( 9, LE, 0, 10, int16_t)
368 yuv2NBPS(10, BE, 1, 10, int16_t)
369 yuv2NBPS(10, LE, 0, 10, int16_t)
370 yuv2NBPS(16, BE, 1, 16, int32_t)
371 yuv2NBPS(16, LE, 0, 16, int32_t)
373 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
374 const int16_t **src, uint8_t *dest, int dstW,
375 const uint8_t *dither, int offset)
378 for (i=0; i<dstW; i++) {
379 int val = dither[(i + offset) & 7] << 12;
381 for (j=0; j<filterSize; j++)
382 val += src[j][i] * filter[j];
384 dest[i]= av_clip_uint8(val>>19);
388 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
389 const uint8_t *dither, int offset)
392 for (i=0; i<dstW; i++) {
393 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
394 dest[i]= av_clip_uint8(val);
398 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
399 const int16_t **chrUSrc, const int16_t **chrVSrc,
400 uint8_t *dest, int chrDstW)
402 enum PixelFormat dstFormat = c->dstFormat;
403 const uint8_t *chrDither = c->chrDither8;
406 if (dstFormat == PIX_FMT_NV12)
407 for (i=0; i<chrDstW; i++) {
408 int u = chrDither[i & 7] << 12;
409 int v = chrDither[(i + 3) & 7] << 12;
411 for (j=0; j<chrFilterSize; j++) {
412 u += chrUSrc[j][i] * chrFilter[j];
413 v += chrVSrc[j][i] * chrFilter[j];
416 dest[2*i]= av_clip_uint8(u>>19);
417 dest[2*i+1]= av_clip_uint8(v>>19);
420 for (i=0; i<chrDstW; i++) {
421 int u = chrDither[i & 7] << 12;
422 int v = chrDither[(i + 3) & 7] << 12;
424 for (j=0; j<chrFilterSize; j++) {
425 u += chrUSrc[j][i] * chrFilter[j];
426 v += chrVSrc[j][i] * chrFilter[j];
429 dest[2*i]= av_clip_uint8(v>>19);
430 dest[2*i+1]= av_clip_uint8(u>>19);
434 #define output_pixel(pos, val) \
435 if (target == PIX_FMT_GRAY16BE) { \
441 static av_always_inline void
442 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
443 const int32_t **lumSrc, int lumFilterSize,
444 const int16_t *chrFilter, const int32_t **chrUSrc,
445 const int32_t **chrVSrc, int chrFilterSize,
446 const int32_t **alpSrc, uint16_t *dest, int dstW,
447 int y, enum PixelFormat target)
451 for (i = 0; i < (dstW >> 1); i++) {
453 int Y1 = (1 << 14) - 0x40000000;
454 int Y2 = (1 << 14) - 0x40000000;
456 for (j = 0; j < lumFilterSize; j++) {
457 Y1 += lumSrc[j][i * 2] * lumFilter[j];
458 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
462 Y1 = av_clip_int16(Y1);
463 Y2 = av_clip_int16(Y2);
464 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
465 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
469 static av_always_inline void
470 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
471 const int32_t *ubuf[2], const int32_t *vbuf[2],
472 const int32_t *abuf[2], uint16_t *dest, int dstW,
473 int yalpha, int uvalpha, int y,
474 enum PixelFormat target)
476 int yalpha1 = 4095 - yalpha;
478 const int32_t *buf0 = buf[0], *buf1 = buf[1];
480 for (i = 0; i < (dstW >> 1); i++) {
481 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
482 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
484 output_pixel(&dest[i * 2 + 0], Y1);
485 output_pixel(&dest[i * 2 + 1], Y2);
489 static av_always_inline void
490 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
491 const int32_t *ubuf[2], const int32_t *vbuf[2],
492 const int32_t *abuf0, uint16_t *dest, int dstW,
493 int uvalpha, int y, enum PixelFormat target)
497 for (i = 0; i < (dstW >> 1); i++) {
498 int Y1 = (buf0[i * 2 ]+4)>>3;
499 int Y2 = (buf0[i * 2 + 1]+4)>>3;
501 output_pixel(&dest[i * 2 + 0], Y1);
502 output_pixel(&dest[i * 2 + 1], Y2);
508 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
509 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
510 const int16_t **_lumSrc, int lumFilterSize, \
511 const int16_t *chrFilter, const int16_t **_chrUSrc, \
512 const int16_t **_chrVSrc, int chrFilterSize, \
513 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
516 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
517 **chrUSrc = (const int32_t **) _chrUSrc, \
518 **chrVSrc = (const int32_t **) _chrVSrc, \
519 **alpSrc = (const int32_t **) _alpSrc; \
520 uint16_t *dest = (uint16_t *) _dest; \
521 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
522 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
523 alpSrc, dest, dstW, y, fmt); \
526 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
527 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
528 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
529 int yalpha, int uvalpha, int y) \
531 const int32_t **buf = (const int32_t **) _buf, \
532 **ubuf = (const int32_t **) _ubuf, \
533 **vbuf = (const int32_t **) _vbuf, \
534 **abuf = (const int32_t **) _abuf; \
535 uint16_t *dest = (uint16_t *) _dest; \
536 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
537 dest, dstW, yalpha, uvalpha, y, fmt); \
540 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
541 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
542 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
543 int uvalpha, int y) \
545 const int32_t *buf0 = (const int32_t *) _buf0, \
546 **ubuf = (const int32_t **) _ubuf, \
547 **vbuf = (const int32_t **) _vbuf, \
548 *abuf0 = (const int32_t *) _abuf0; \
549 uint16_t *dest = (uint16_t *) _dest; \
550 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
551 dstW, uvalpha, y, fmt); \
554 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
555 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
557 #define output_pixel(pos, acc) \
558 if (target == PIX_FMT_MONOBLACK) { \
564 static av_always_inline void
565 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
566 const int16_t **lumSrc, int lumFilterSize,
567 const int16_t *chrFilter, const int16_t **chrUSrc,
568 const int16_t **chrVSrc, int chrFilterSize,
569 const int16_t **alpSrc, uint8_t *dest, int dstW,
570 int y, enum PixelFormat target)
572 const uint8_t * const d128=dither_8x8_220[y&7];
573 uint8_t *g = c->table_gU[128] + c->table_gV[128];
577 for (i = 0; i < dstW - 1; i += 2) {
582 for (j = 0; j < lumFilterSize; j++) {
583 Y1 += lumSrc[j][i] * lumFilter[j];
584 Y2 += lumSrc[j][i+1] * lumFilter[j];
588 if ((Y1 | Y2) & 0x100) {
589 Y1 = av_clip_uint8(Y1);
590 Y2 = av_clip_uint8(Y2);
592 acc += acc + g[Y1 + d128[(i + 0) & 7]];
593 acc += acc + g[Y2 + d128[(i + 1) & 7]];
595 output_pixel(*dest++, acc);
600 static av_always_inline void
601 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
602 const int16_t *ubuf[2], const int16_t *vbuf[2],
603 const int16_t *abuf[2], uint8_t *dest, int dstW,
604 int yalpha, int uvalpha, int y,
605 enum PixelFormat target)
607 const int16_t *buf0 = buf[0], *buf1 = buf[1];
608 const uint8_t * const d128 = dither_8x8_220[y & 7];
609 uint8_t *g = c->table_gU[128] + c->table_gV[128];
610 int yalpha1 = 4095 - yalpha;
613 for (i = 0; i < dstW - 7; i += 8) {
614 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
615 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
616 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
617 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
618 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
619 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
620 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
621 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
622 output_pixel(*dest++, acc);
626 static av_always_inline void
627 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
628 const int16_t *ubuf[2], const int16_t *vbuf[2],
629 const int16_t *abuf0, uint8_t *dest, int dstW,
630 int uvalpha, int y, enum PixelFormat target)
632 const uint8_t * const d128 = dither_8x8_220[y & 7];
633 uint8_t *g = c->table_gU[128] + c->table_gV[128];
636 for (i = 0; i < dstW - 7; i += 8) {
637 int acc = g[(buf0[i ] >> 7) + d128[0]];
638 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
639 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
640 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
641 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
642 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
643 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
644 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
645 output_pixel(*dest++, acc);
651 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
652 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
653 const int16_t **lumSrc, int lumFilterSize, \
654 const int16_t *chrFilter, const int16_t **chrUSrc, \
655 const int16_t **chrVSrc, int chrFilterSize, \
656 const int16_t **alpSrc, uint8_t *dest, int dstW, \
659 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
660 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
661 alpSrc, dest, dstW, y, fmt); \
664 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
665 const int16_t *ubuf[2], const int16_t *vbuf[2], \
666 const int16_t *abuf[2], uint8_t *dest, int dstW, \
667 int yalpha, int uvalpha, int y) \
669 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
670 dest, dstW, yalpha, uvalpha, y, fmt); \
673 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
674 const int16_t *ubuf[2], const int16_t *vbuf[2], \
675 const int16_t *abuf0, uint8_t *dest, int dstW, \
676 int uvalpha, int y) \
678 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
679 abuf0, dest, dstW, uvalpha, \
683 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
684 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
686 #define output_pixels(pos, Y1, U, Y2, V) \
687 if (target == PIX_FMT_YUYV422) { \
688 dest[pos + 0] = Y1; \
690 dest[pos + 2] = Y2; \
694 dest[pos + 1] = Y1; \
696 dest[pos + 3] = Y2; \
699 static av_always_inline void
700 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
701 const int16_t **lumSrc, int lumFilterSize,
702 const int16_t *chrFilter, const int16_t **chrUSrc,
703 const int16_t **chrVSrc, int chrFilterSize,
704 const int16_t **alpSrc, uint8_t *dest, int dstW,
705 int y, enum PixelFormat target)
709 for (i = 0; i < (dstW >> 1); i++) {
716 for (j = 0; j < lumFilterSize; j++) {
717 Y1 += lumSrc[j][i * 2] * lumFilter[j];
718 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
720 for (j = 0; j < chrFilterSize; j++) {
721 U += chrUSrc[j][i] * chrFilter[j];
722 V += chrVSrc[j][i] * chrFilter[j];
728 if ((Y1 | Y2 | U | V) & 0x100) {
729 Y1 = av_clip_uint8(Y1);
730 Y2 = av_clip_uint8(Y2);
731 U = av_clip_uint8(U);
732 V = av_clip_uint8(V);
734 output_pixels(4*i, Y1, U, Y2, V);
738 static av_always_inline void
739 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
740 const int16_t *ubuf[2], const int16_t *vbuf[2],
741 const int16_t *abuf[2], uint8_t *dest, int dstW,
742 int yalpha, int uvalpha, int y,
743 enum PixelFormat target)
745 const int16_t *buf0 = buf[0], *buf1 = buf[1],
746 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
747 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
748 int yalpha1 = 4095 - yalpha;
749 int uvalpha1 = 4095 - uvalpha;
752 for (i = 0; i < (dstW >> 1); i++) {
753 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
754 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
755 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
756 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
758 output_pixels(i * 4, Y1, U, Y2, V);
762 static av_always_inline void
763 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
764 const int16_t *ubuf[2], const int16_t *vbuf[2],
765 const int16_t *abuf0, uint8_t *dest, int dstW,
766 int uvalpha, int y, enum PixelFormat target)
768 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
769 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
772 if (uvalpha < 2048) {
773 for (i = 0; i < (dstW >> 1); i++) {
774 int Y1 = buf0[i * 2] >> 7;
775 int Y2 = buf0[i * 2 + 1] >> 7;
776 int U = ubuf1[i] >> 7;
777 int V = vbuf1[i] >> 7;
779 output_pixels(i * 4, Y1, U, Y2, V);
782 for (i = 0; i < (dstW >> 1); i++) {
783 int Y1 = buf0[i * 2] >> 7;
784 int Y2 = buf0[i * 2 + 1] >> 7;
785 int U = (ubuf0[i] + ubuf1[i]) >> 8;
786 int V = (vbuf0[i] + vbuf1[i]) >> 8;
788 output_pixels(i * 4, Y1, U, Y2, V);
795 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
796 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
798 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
799 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
800 #define output_pixel(pos, val) \
801 if (isBE(target)) { \
807 static av_always_inline void
808 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
809 const int32_t **lumSrc, int lumFilterSize,
810 const int16_t *chrFilter, const int32_t **chrUSrc,
811 const int32_t **chrVSrc, int chrFilterSize,
812 const int32_t **alpSrc, uint16_t *dest, int dstW,
813 int y, enum PixelFormat target)
817 for (i = 0; i < (dstW >> 1); i++) {
819 int Y1 = -0x40000000;
820 int Y2 = -0x40000000;
821 int U = -128 << 23; // 19
825 for (j = 0; j < lumFilterSize; j++) {
826 Y1 += lumSrc[j][i * 2] * lumFilter[j];
827 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
829 for (j = 0; j < chrFilterSize; j++) {
830 U += chrUSrc[j][i] * chrFilter[j];
831 V += chrVSrc[j][i] * chrFilter[j];
834 // 8bit: 12+15=27; 16-bit: 12+19=31
842 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
843 Y1 -= c->yuv2rgb_y_offset;
844 Y2 -= c->yuv2rgb_y_offset;
845 Y1 *= c->yuv2rgb_y_coeff;
846 Y2 *= c->yuv2rgb_y_coeff;
849 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
851 R = V * c->yuv2rgb_v2r_coeff;
852 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
853 B = U * c->yuv2rgb_u2b_coeff;
855 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
856 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
857 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
858 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
859 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
860 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
861 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
866 static av_always_inline void
867 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
868 const int32_t *ubuf[2], const int32_t *vbuf[2],
869 const int32_t *abuf[2], uint16_t *dest, int dstW,
870 int yalpha, int uvalpha, int y,
871 enum PixelFormat target)
873 const int32_t *buf0 = buf[0], *buf1 = buf[1],
874 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
875 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
876 int yalpha1 = 4095 - yalpha;
877 int uvalpha1 = 4095 - uvalpha;
880 for (i = 0; i < (dstW >> 1); i++) {
881 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
882 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
883 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
884 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
887 Y1 -= c->yuv2rgb_y_offset;
888 Y2 -= c->yuv2rgb_y_offset;
889 Y1 *= c->yuv2rgb_y_coeff;
890 Y2 *= c->yuv2rgb_y_coeff;
894 R = V * c->yuv2rgb_v2r_coeff;
895 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
896 B = U * c->yuv2rgb_u2b_coeff;
898 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
899 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
900 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
901 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
902 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
903 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
908 static av_always_inline void
909 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
910 const int32_t *ubuf[2], const int32_t *vbuf[2],
911 const int32_t *abuf0, uint16_t *dest, int dstW,
912 int uvalpha, int y, enum PixelFormat target)
914 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
915 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
918 if (uvalpha < 2048) {
919 for (i = 0; i < (dstW >> 1); i++) {
920 int Y1 = (buf0[i * 2] ) >> 2;
921 int Y2 = (buf0[i * 2 + 1]) >> 2;
922 int U = (ubuf0[i] + (-128 << 11)) >> 2;
923 int V = (vbuf0[i] + (-128 << 11)) >> 2;
926 Y1 -= c->yuv2rgb_y_offset;
927 Y2 -= c->yuv2rgb_y_offset;
928 Y1 *= c->yuv2rgb_y_coeff;
929 Y2 *= c->yuv2rgb_y_coeff;
933 R = V * c->yuv2rgb_v2r_coeff;
934 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
935 B = U * c->yuv2rgb_u2b_coeff;
937 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
938 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
939 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
940 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
941 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
942 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
946 for (i = 0; i < (dstW >> 1); i++) {
947 int Y1 = (buf0[i * 2] ) >> 2;
948 int Y2 = (buf0[i * 2 + 1]) >> 2;
949 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
950 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
953 Y1 -= c->yuv2rgb_y_offset;
954 Y2 -= c->yuv2rgb_y_offset;
955 Y1 *= c->yuv2rgb_y_coeff;
956 Y2 *= c->yuv2rgb_y_coeff;
960 R = V * c->yuv2rgb_v2r_coeff;
961 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
962 B = U * c->yuv2rgb_u2b_coeff;
964 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
965 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
966 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
967 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
968 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
969 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
979 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
980 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
981 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
982 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
984 static av_always_inline void
985 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
986 unsigned U, unsigned V, unsigned A1, unsigned A2,
987 const void *_r, const void *_g, const void *_b, int y,
988 enum PixelFormat target, int hasAlpha)
990 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
991 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
992 uint32_t *dest = (uint32_t *) _dest;
993 const uint32_t *r = (const uint32_t *) _r;
994 const uint32_t *g = (const uint32_t *) _g;
995 const uint32_t *b = (const uint32_t *) _b;
998 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1000 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1001 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1004 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1006 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1007 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1009 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1010 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1013 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1014 uint8_t *dest = (uint8_t *) _dest;
1015 const uint8_t *r = (const uint8_t *) _r;
1016 const uint8_t *g = (const uint8_t *) _g;
1017 const uint8_t *b = (const uint8_t *) _b;
1019 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1020 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1022 dest[i * 6 + 0] = r_b[Y1];
1023 dest[i * 6 + 1] = g[Y1];
1024 dest[i * 6 + 2] = b_r[Y1];
1025 dest[i * 6 + 3] = r_b[Y2];
1026 dest[i * 6 + 4] = g[Y2];
1027 dest[i * 6 + 5] = b_r[Y2];
1030 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1031 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1032 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1033 uint16_t *dest = (uint16_t *) _dest;
1034 const uint16_t *r = (const uint16_t *) _r;
1035 const uint16_t *g = (const uint16_t *) _g;
1036 const uint16_t *b = (const uint16_t *) _b;
1037 int dr1, dg1, db1, dr2, dg2, db2;
1039 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1040 dr1 = dither_2x2_8[ y & 1 ][0];
1041 dg1 = dither_2x2_4[ y & 1 ][0];
1042 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1043 dr2 = dither_2x2_8[ y & 1 ][1];
1044 dg2 = dither_2x2_4[ y & 1 ][1];
1045 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1046 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1047 dr1 = dither_2x2_8[ y & 1 ][0];
1048 dg1 = dither_2x2_8[ y & 1 ][1];
1049 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1050 dr2 = dither_2x2_8[ y & 1 ][1];
1051 dg2 = dither_2x2_8[ y & 1 ][0];
1052 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1054 dr1 = dither_4x4_16[ y & 3 ][0];
1055 dg1 = dither_4x4_16[ y & 3 ][1];
1056 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1057 dr2 = dither_4x4_16[ y & 3 ][1];
1058 dg2 = dither_4x4_16[ y & 3 ][0];
1059 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1062 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1063 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1064 } else /* 8/4-bit */ {
1065 uint8_t *dest = (uint8_t *) _dest;
1066 const uint8_t *r = (const uint8_t *) _r;
1067 const uint8_t *g = (const uint8_t *) _g;
1068 const uint8_t *b = (const uint8_t *) _b;
1069 int dr1, dg1, db1, dr2, dg2, db2;
1071 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1072 const uint8_t * const d64 = dither_8x8_73[y & 7];
1073 const uint8_t * const d32 = dither_8x8_32[y & 7];
1074 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1075 db1 = d64[(i * 2 + 0) & 7];
1076 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1077 db2 = d64[(i * 2 + 1) & 7];
1079 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1080 const uint8_t * const d128 = dither_8x8_220[y & 7];
1081 dr1 = db1 = d128[(i * 2 + 0) & 7];
1082 dg1 = d64[(i * 2 + 0) & 7];
1083 dr2 = db2 = d128[(i * 2 + 1) & 7];
1084 dg2 = d64[(i * 2 + 1) & 7];
1087 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1088 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1089 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1091 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1092 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1097 static av_always_inline void
1098 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1099 const int16_t **lumSrc, int lumFilterSize,
1100 const int16_t *chrFilter, const int16_t **chrUSrc,
1101 const int16_t **chrVSrc, int chrFilterSize,
1102 const int16_t **alpSrc, uint8_t *dest, int dstW,
1103 int y, enum PixelFormat target, int hasAlpha)
1107 for (i = 0; i < (dstW >> 1); i++) {
1113 int av_unused A1, A2;
1114 const void *r, *g, *b;
1116 for (j = 0; j < lumFilterSize; j++) {
1117 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1118 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1120 for (j = 0; j < chrFilterSize; j++) {
1121 U += chrUSrc[j][i] * chrFilter[j];
1122 V += chrVSrc[j][i] * chrFilter[j];
1128 if ((Y1 | Y2 | U | V) & 0x100) {
1129 Y1 = av_clip_uint8(Y1);
1130 Y2 = av_clip_uint8(Y2);
1131 U = av_clip_uint8(U);
1132 V = av_clip_uint8(V);
1137 for (j = 0; j < lumFilterSize; j++) {
1138 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1139 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1143 if ((A1 | A2) & 0x100) {
1144 A1 = av_clip_uint8(A1);
1145 A2 = av_clip_uint8(A2);
1149 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1151 g = (c->table_gU[U] + c->table_gV[V]);
1154 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1155 r, g, b, y, target, hasAlpha);
1159 static av_always_inline void
1160 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1161 const int16_t *ubuf[2], const int16_t *vbuf[2],
1162 const int16_t *abuf[2], uint8_t *dest, int dstW,
1163 int yalpha, int uvalpha, int y,
1164 enum PixelFormat target, int hasAlpha)
1166 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1167 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1168 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1169 *abuf0 = hasAlpha ? abuf[0] : NULL,
1170 *abuf1 = hasAlpha ? abuf[1] : NULL;
1171 int yalpha1 = 4095 - yalpha;
1172 int uvalpha1 = 4095 - uvalpha;
1175 for (i = 0; i < (dstW >> 1); i++) {
1176 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1177 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1178 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1179 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1181 const void *r = c->table_rV[V],
1182 *g = (c->table_gU[U] + c->table_gV[V]),
1183 *b = c->table_bU[U];
1186 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1187 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1190 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1191 r, g, b, y, target, hasAlpha);
1195 static av_always_inline void
1196 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1197 const int16_t *ubuf[2], const int16_t *vbuf[2],
1198 const int16_t *abuf0, uint8_t *dest, int dstW,
1199 int uvalpha, int y, enum PixelFormat target,
1202 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1203 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1206 if (uvalpha < 2048) {
1207 for (i = 0; i < (dstW >> 1); i++) {
1208 int Y1 = buf0[i * 2] >> 7;
1209 int Y2 = buf0[i * 2 + 1] >> 7;
1210 int U = ubuf1[i] >> 7;
1211 int V = vbuf1[i] >> 7;
1213 const void *r = c->table_rV[V],
1214 *g = (c->table_gU[U] + c->table_gV[V]),
1215 *b = c->table_bU[U];
1218 A1 = abuf0[i * 2 ] >> 7;
1219 A2 = abuf0[i * 2 + 1] >> 7;
1222 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1223 r, g, b, y, target, hasAlpha);
1226 for (i = 0; i < (dstW >> 1); i++) {
1227 int Y1 = buf0[i * 2] >> 7;
1228 int Y2 = buf0[i * 2 + 1] >> 7;
1229 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1230 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1232 const void *r = c->table_rV[V],
1233 *g = (c->table_gU[U] + c->table_gV[V]),
1234 *b = c->table_bU[U];
1237 A1 = abuf0[i * 2 ] >> 7;
1238 A2 = abuf0[i * 2 + 1] >> 7;
1241 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1242 r, g, b, y, target, hasAlpha);
1247 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1248 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1249 const int16_t **lumSrc, int lumFilterSize, \
1250 const int16_t *chrFilter, const int16_t **chrUSrc, \
1251 const int16_t **chrVSrc, int chrFilterSize, \
1252 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1255 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1256 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1257 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1259 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1260 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1261 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1262 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1263 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1264 int yalpha, int uvalpha, int y) \
1266 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1267 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1270 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1271 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1272 const int16_t *abuf0, uint8_t *dest, int dstW, \
1273 int uvalpha, int y) \
1275 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1276 dstW, uvalpha, y, fmt, hasAlpha); \
1280 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1281 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1283 #if CONFIG_SWSCALE_ALPHA
1284 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1285 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1287 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1288 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1290 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1291 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1292 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1293 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1294 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1295 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1296 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1297 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1299 static av_always_inline void
1300 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1301 const int16_t **lumSrc, int lumFilterSize,
1302 const int16_t *chrFilter, const int16_t **chrUSrc,
1303 const int16_t **chrVSrc, int chrFilterSize,
1304 const int16_t **alpSrc, uint8_t *dest,
1305 int dstW, int y, enum PixelFormat target, int hasAlpha)
1308 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1310 for (i = 0; i < dstW; i++) {
1313 int U = (1<<9)-(128 << 19);
1314 int V = (1<<9)-(128 << 19);
1318 for (j = 0; j < lumFilterSize; j++) {
1319 Y += lumSrc[j][i] * lumFilter[j];
1321 for (j = 0; j < chrFilterSize; j++) {
1322 U += chrUSrc[j][i] * chrFilter[j];
1323 V += chrVSrc[j][i] * chrFilter[j];
1330 for (j = 0; j < lumFilterSize; j++) {
1331 A += alpSrc[j][i] * lumFilter[j];
1335 A = av_clip_uint8(A);
1337 Y -= c->yuv2rgb_y_offset;
1338 Y *= c->yuv2rgb_y_coeff;
1340 R = Y + V*c->yuv2rgb_v2r_coeff;
1341 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1342 B = Y + U*c->yuv2rgb_u2b_coeff;
1343 if ((R | G | B) & 0xC0000000) {
1344 R = av_clip_uintp2(R, 30);
1345 G = av_clip_uintp2(G, 30);
1346 B = av_clip_uintp2(B, 30);
1351 dest[0] = hasAlpha ? A : 255;
1365 dest[3] = hasAlpha ? A : 255;
1368 dest[0] = hasAlpha ? A : 255;
1382 dest[3] = hasAlpha ? A : 255;
1390 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1391 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1392 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1393 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1395 #if CONFIG_SWSCALE_ALPHA
1396 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1397 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1398 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1399 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1401 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1402 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1403 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1404 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1406 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1407 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1409 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1410 int width, int height,
1414 uint8_t *ptr = plane + stride*y;
1415 for (i=0; i<height; i++) {
1416 memset(ptr, val, width);
1421 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1423 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1424 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1426 static av_always_inline void
1427 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1428 enum PixelFormat origin)
1431 for (i = 0; i < width; i++) {
1432 unsigned int r_b = input_pixel(&src[i*3+0]);
1433 unsigned int g = input_pixel(&src[i*3+1]);
1434 unsigned int b_r = input_pixel(&src[i*3+2]);
1436 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1440 static av_always_inline void
1441 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1442 const uint16_t *src1, const uint16_t *src2,
1443 int width, enum PixelFormat origin)
1447 for (i = 0; i < width; i++) {
1448 int r_b = input_pixel(&src1[i*3+0]);
1449 int g = input_pixel(&src1[i*3+1]);
1450 int b_r = input_pixel(&src1[i*3+2]);
1452 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1453 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1457 static av_always_inline void
1458 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1459 const uint16_t *src1, const uint16_t *src2,
1460 int width, enum PixelFormat origin)
1464 for (i = 0; i < width; i++) {
1465 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1466 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1467 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1469 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1470 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1478 #define rgb48funcs(pattern, BE_LE, origin) \
1479 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
1480 int width, uint32_t *unused) \
1482 const uint16_t *src = (const uint16_t *) _src; \
1483 uint16_t *dst = (uint16_t *) _dst; \
1484 rgb48ToY_c_template(dst, src, width, origin); \
1487 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1488 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1489 int width, uint32_t *unused) \
1491 const uint16_t *src1 = (const uint16_t *) _src1, \
1492 *src2 = (const uint16_t *) _src2; \
1493 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1494 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1497 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1498 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1499 int width, uint32_t *unused) \
1501 const uint16_t *src1 = (const uint16_t *) _src1, \
1502 *src2 = (const uint16_t *) _src2; \
1503 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1504 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1507 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1508 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1509 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1510 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1512 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1513 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1514 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1516 static av_always_inline void
1517 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1518 int width, enum PixelFormat origin,
1519 int shr, int shg, int shb, int shp,
1520 int maskr, int maskg, int maskb,
1521 int rsh, int gsh, int bsh, int S)
1523 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1524 const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
1527 for (i = 0; i < width; i++) {
1528 int px = input_pixel(i) >> shp;
1529 int b = (px & maskb) >> shb;
1530 int g = (px & maskg) >> shg;
1531 int r = (px & maskr) >> shr;
1533 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1537 static av_always_inline void
1538 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1539 const uint8_t *src, int width,
1540 enum PixelFormat origin,
1541 int shr, int shg, int shb, int shp,
1542 int maskr, int maskg, int maskb,
1543 int rsh, int gsh, int bsh, int S)
1545 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1546 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1547 const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
1550 for (i = 0; i < width; i++) {
1551 int px = input_pixel(i) >> shp;
1552 int b = (px & maskb) >> shb;
1553 int g = (px & maskg) >> shg;
1554 int r = (px & maskr) >> shr;
1556 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1557 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1561 static av_always_inline void
1562 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1563 const uint8_t *src, int width,
1564 enum PixelFormat origin,
1565 int shr, int shg, int shb, int shp,
1566 int maskr, int maskg, int maskb,
1567 int rsh, int gsh, int bsh, int S)
1569 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1570 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1571 maskgx = ~(maskr | maskb);
1572 const unsigned rnd = (256U<<(S)) + (1<<(S-6));
1575 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1576 for (i = 0; i < width; i++) {
1577 int px0 = input_pixel(2 * i + 0) >> shp;
1578 int px1 = input_pixel(2 * i + 1) >> shp;
1579 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1580 int rb = px0 + px1 - g;
1582 b = (rb & maskb) >> shb;
1583 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1584 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1587 g = (g & maskg) >> shg;
1589 r = (rb & maskr) >> shr;
1591 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1592 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1598 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1599 maskg, maskb, rsh, gsh, bsh, S) \
1600 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
1601 int width, uint32_t *unused) \
1603 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
1604 shr, shg, shb, shp, \
1605 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1608 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1609 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1610 int width, uint32_t *unused) \
1612 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1613 shr, shg, shb, shp, \
1614 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1617 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1618 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1619 int width, uint32_t *unused) \
1621 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1622 shr, shg, shb, shp, \
1623 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1626 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1627 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1628 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1629 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1630 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1631 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1632 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1633 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1634 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1635 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1636 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1637 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1639 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
1640 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1641 int width, enum PixelFormat origin)
1644 for (i = 0; i < width; i++) {
1645 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
1646 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
1647 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
1649 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1650 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1654 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1657 for (i=0; i<width; i++) {
1658 dst[i]= src[4*i]<<6;
1662 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1665 for (i=0; i<width; i++) {
1666 dst[i]= src[4*i+3]<<6;
1670 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
1673 for (i=0; i<width; i++) {
1676 dst[i]= (pal[d] >> 24)<<6;
1680 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
1683 for (i=0; i<width; i++) {
1686 dst[i]= (pal[d] & 0xFF)<<6;
1690 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1691 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1692 int width, uint32_t *pal)
1695 assert(src1 == src2);
1696 for (i=0; i<width; i++) {
1697 int p= pal[src1[i]];
1699 dstU[i]= (uint8_t)(p>> 8)<<6;
1700 dstV[i]= (uint8_t)(p>>16)<<6;
1704 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1707 for (i=0; i<width/8; i++) {
1710 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1714 for(j=0; j<(width&7); j++)
1715 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1719 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1722 for (i=0; i<width/8; i++) {
1725 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1729 for(j=0; j<(width&7); j++)
1730 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1734 //FIXME yuy2* can read up to 7 samples too much
1736 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1740 for (i=0; i<width; i++)
1744 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1745 const uint8_t *src2, int width, uint32_t *unused)
1748 for (i=0; i<width; i++) {
1749 dstU[i]= src1[4*i + 1];
1750 dstV[i]= src1[4*i + 3];
1752 assert(src1 == src2);
1755 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1758 const uint16_t *src = (const uint16_t *) _src;
1759 uint16_t *dst = (uint16_t *) _dst;
1760 for (i=0; i<width; i++) {
1761 dst[i] = av_bswap16(src[i]);
1765 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
1766 const uint8_t *_src2, int width, uint32_t *unused)
1769 const uint16_t *src1 = (const uint16_t *) _src1,
1770 *src2 = (const uint16_t *) _src2;
1771 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1772 for (i=0; i<width; i++) {
1773 dstU[i] = av_bswap16(src1[i]);
1774 dstV[i] = av_bswap16(src2[i]);
1778 /* This is almost identical to the previous, end exists only because
1779 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1780 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1784 for (i=0; i<width; i++)
1788 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1789 const uint8_t *src2, int width, uint32_t *unused)
1792 for (i=0; i<width; i++) {
1793 dstU[i]= src1[4*i + 0];
1794 dstV[i]= src1[4*i + 2];
1796 assert(src1 == src2);
1799 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1800 const uint8_t *src, int width)
1803 for (i = 0; i < width; i++) {
1804 dst1[i] = src[2*i+0];
1805 dst2[i] = src[2*i+1];
1809 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1810 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1811 int width, uint32_t *unused)
1813 nvXXtoUV_c(dstU, dstV, src1, width);
1816 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1817 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1818 int width, uint32_t *unused)
1820 nvXXtoUV_c(dstV, dstU, src1, width);
1823 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1825 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
1826 int width, uint32_t *unused)
1829 for (i=0; i<width; i++) {
1834 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1838 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1839 const uint8_t *src2, int width, uint32_t *unused)
1842 for (i=0; i<width; i++) {
1843 int b= src1[3*i + 0];
1844 int g= src1[3*i + 1];
1845 int r= src1[3*i + 2];
1847 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1848 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1850 assert(src1 == src2);
1853 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1854 const uint8_t *src2, int width, uint32_t *unused)
1857 for (i=0; i<width; i++) {
1858 int b= src1[6*i + 0] + src1[6*i + 3];
1859 int g= src1[6*i + 1] + src1[6*i + 4];
1860 int r= src1[6*i + 2] + src1[6*i + 5];
1862 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1863 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1865 assert(src1 == src2);
1868 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1872 for (i=0; i<width; i++) {
1877 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1881 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1882 const uint8_t *src2, int width, uint32_t *unused)
1886 for (i=0; i<width; i++) {
1887 int r= src1[3*i + 0];
1888 int g= src1[3*i + 1];
1889 int b= src1[3*i + 2];
1891 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1892 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1896 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1897 const uint8_t *src2, int width, uint32_t *unused)
1901 for (i=0; i<width; i++) {
1902 int r= src1[6*i + 0] + src1[6*i + 3];
1903 int g= src1[6*i + 1] + src1[6*i + 4];
1904 int b= src1[6*i + 2] + src1[6*i + 5];
1906 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1907 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1911 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
1914 for (i = 0; i < width; i++) {
1919 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1923 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1926 const uint16_t **src = (const uint16_t **) _src;
1927 uint16_t *dst = (uint16_t *) _dst;
1928 for (i = 0; i < width; i++) {
1929 int g = AV_RL16(src[0] + i);
1930 int b = AV_RL16(src[1] + i);
1931 int r = AV_RL16(src[2] + i);
1933 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1937 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1940 const uint16_t **src = (const uint16_t **) _src;
1941 uint16_t *dst = (uint16_t *) _dst;
1942 for (i = 0; i < width; i++) {
1943 int g = AV_RB16(src[0] + i);
1944 int b = AV_RB16(src[1] + i);
1945 int r = AV_RB16(src[2] + i);
1947 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1951 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
1954 for (i = 0; i < width; i++) {
1959 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1960 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1964 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1967 const uint16_t **src = (const uint16_t **) _src;
1968 uint16_t *dstU = (uint16_t *) _dstU;
1969 uint16_t *dstV = (uint16_t *) _dstV;
1970 for (i = 0; i < width; i++) {
1971 int g = AV_RL16(src[0] + i);
1972 int b = AV_RL16(src[1] + i);
1973 int r = AV_RL16(src[2] + i);
1975 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1976 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1980 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1983 const uint16_t **src = (const uint16_t **) _src;
1984 uint16_t *dstU = (uint16_t *) _dstU;
1985 uint16_t *dstV = (uint16_t *) _dstV;
1986 for (i = 0; i < width; i++) {
1987 int g = AV_RB16(src[0] + i);
1988 int b = AV_RB16(src[1] + i);
1989 int r = AV_RB16(src[2] + i);
1991 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1992 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1996 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1997 const int16_t *filter,
1998 const int16_t *filterPos, int filterSize)
2001 int32_t *dst = (int32_t *) _dst;
2002 const uint16_t *src = (const uint16_t *) _src;
2003 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2006 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2009 for (i = 0; i < dstW; i++) {
2011 int srcPos = filterPos[i];
2014 for (j = 0; j < filterSize; j++) {
2015 val += src[srcPos + j] * filter[filterSize * i + j];
2017 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2018 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2022 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2023 const int16_t *filter,
2024 const int16_t *filterPos, int filterSize)
2027 const uint16_t *src = (const uint16_t *) _src;
2028 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2031 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2033 for (i = 0; i < dstW; i++) {
2035 int srcPos = filterPos[i];
2038 for (j = 0; j < filterSize; j++) {
2039 val += src[srcPos + j] * filter[filterSize * i + j];
2041 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2042 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2046 // bilinear / bicubic scaling
2047 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2048 const int16_t *filter, const int16_t *filterPos,
2052 for (i=0; i<dstW; i++) {
2054 int srcPos= filterPos[i];
2056 for (j=0; j<filterSize; j++) {
2057 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2059 //filter += hFilterSize;
2060 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2065 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2066 const int16_t *filter, const int16_t *filterPos,
2070 int32_t *dst = (int32_t *) _dst;
2071 for (i=0; i<dstW; i++) {
2073 int srcPos= filterPos[i];
2075 for (j=0; j<filterSize; j++) {
2076 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2078 //filter += hFilterSize;
2079 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2084 //FIXME all pal and rgb srcFormats could do this convertion as well
2085 //FIXME all scalers more complex than bilinear could do half of this transform
2086 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2089 for (i = 0; i < width; i++) {
2090 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2091 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2094 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2097 for (i = 0; i < width; i++) {
2098 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2099 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2102 static void lumRangeToJpeg_c(int16_t *dst, int width)
2105 for (i = 0; i < width; i++)
2106 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2108 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2111 for (i = 0; i < width; i++)
2112 dst[i] = (dst[i]*14071 + 33561947)>>14;
2115 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2118 int32_t *dstU = (int32_t *) _dstU;
2119 int32_t *dstV = (int32_t *) _dstV;
2120 for (i = 0; i < width; i++) {
2121 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2122 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2125 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2128 int32_t *dstU = (int32_t *) _dstU;
2129 int32_t *dstV = (int32_t *) _dstV;
2130 for (i = 0; i < width; i++) {
2131 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2132 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2135 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2138 int32_t *dst = (int32_t *) _dst;
2139 for (i = 0; i < width; i++)
2140 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2142 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2145 int32_t *dst = (int32_t *) _dst;
2146 for (i = 0; i < width; i++)
2147 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2150 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2151 const uint8_t *src, int srcW, int xInc)
2154 unsigned int xpos=0;
2155 for (i=0;i<dstWidth;i++) {
2156 register unsigned int xx=xpos>>16;
2157 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2158 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2161 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2162 dst[i] = src[srcW-1]*128;
2165 // *** horizontal scale Y line to temp buffer
2166 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2167 const uint8_t *src_in[4], int srcW, int xInc,
2168 const int16_t *hLumFilter,
2169 const int16_t *hLumFilterPos, int hLumFilterSize,
2170 uint8_t *formatConvBuffer,
2171 uint32_t *pal, int isAlpha)
2173 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2174 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2175 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2178 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
2179 src= formatConvBuffer;
2180 } else if (c->readLumPlanar && !isAlpha) {
2181 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2182 src = formatConvBuffer;
2185 if (!c->hyscale_fast) {
2186 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2187 } else { // fast bilinear upscale / crap downscale
2188 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2192 convertRange(dst, dstWidth);
2195 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2196 int dstWidth, const uint8_t *src1,
2197 const uint8_t *src2, int srcW, int xInc)
2200 unsigned int xpos=0;
2201 for (i=0;i<dstWidth;i++) {
2202 register unsigned int xx=xpos>>16;
2203 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2204 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2205 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2208 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2209 dst1[i] = src1[srcW-1]*128;
2210 dst2[i] = src2[srcW-1]*128;
2214 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2215 const uint8_t *src_in[4],
2216 int srcW, int xInc, const int16_t *hChrFilter,
2217 const int16_t *hChrFilterPos, int hChrFilterSize,
2218 uint8_t *formatConvBuffer, uint32_t *pal)
2220 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2222 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2223 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
2224 src1= formatConvBuffer;
2226 } else if (c->readChrPlanar) {
2227 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2228 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2229 src1= formatConvBuffer;
2233 if (!c->hcscale_fast) {
2234 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2235 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2236 } else { // fast bilinear upscale / crap downscale
2237 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2240 if (c->chrConvertRange)
2241 c->chrConvertRange(dst1, dst2, dstWidth);
2244 static av_always_inline void
2245 find_c_packed_planar_out_funcs(SwsContext *c,
2246 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2247 yuv2interleavedX_fn *yuv2nv12cX,
2248 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2249 yuv2packedX_fn *yuv2packedX)
2251 enum PixelFormat dstFormat = c->dstFormat;
2253 if (is16BPS(dstFormat)) {
2254 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2255 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2256 } else if (is9_OR_10BPS(dstFormat)) {
2257 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2258 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2259 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2261 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2262 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2265 *yuv2plane1 = yuv2plane1_8_c;
2266 *yuv2planeX = yuv2planeX_8_c;
2267 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2268 *yuv2nv12cX = yuv2nv12cX_c;
2271 if(c->flags & SWS_FULL_CHR_H_INT) {
2272 switch (dstFormat) {
2275 *yuv2packedX = yuv2rgba32_full_X_c;
2277 #if CONFIG_SWSCALE_ALPHA
2279 *yuv2packedX = yuv2rgba32_full_X_c;
2281 #endif /* CONFIG_SWSCALE_ALPHA */
2283 *yuv2packedX = yuv2rgbx32_full_X_c;
2285 #endif /* !CONFIG_SMALL */
2289 *yuv2packedX = yuv2argb32_full_X_c;
2291 #if CONFIG_SWSCALE_ALPHA
2293 *yuv2packedX = yuv2argb32_full_X_c;
2295 #endif /* CONFIG_SWSCALE_ALPHA */
2297 *yuv2packedX = yuv2xrgb32_full_X_c;
2299 #endif /* !CONFIG_SMALL */
2303 *yuv2packedX = yuv2bgra32_full_X_c;
2305 #if CONFIG_SWSCALE_ALPHA
2307 *yuv2packedX = yuv2bgra32_full_X_c;
2309 #endif /* CONFIG_SWSCALE_ALPHA */
2311 *yuv2packedX = yuv2bgrx32_full_X_c;
2313 #endif /* !CONFIG_SMALL */
2317 *yuv2packedX = yuv2abgr32_full_X_c;
2319 #if CONFIG_SWSCALE_ALPHA
2321 *yuv2packedX = yuv2abgr32_full_X_c;
2323 #endif /* CONFIG_SWSCALE_ALPHA */
2325 *yuv2packedX = yuv2xbgr32_full_X_c;
2327 #endif /* !CONFIG_SMALL */
2330 *yuv2packedX = yuv2rgb24_full_X_c;
2333 *yuv2packedX = yuv2bgr24_full_X_c;
2340 switch (dstFormat) {
2341 case PIX_FMT_RGB48LE:
2342 *yuv2packed1 = yuv2rgb48le_1_c;
2343 *yuv2packed2 = yuv2rgb48le_2_c;
2344 *yuv2packedX = yuv2rgb48le_X_c;
2346 case PIX_FMT_RGB48BE:
2347 *yuv2packed1 = yuv2rgb48be_1_c;
2348 *yuv2packed2 = yuv2rgb48be_2_c;
2349 *yuv2packedX = yuv2rgb48be_X_c;
2351 case PIX_FMT_BGR48LE:
2352 *yuv2packed1 = yuv2bgr48le_1_c;
2353 *yuv2packed2 = yuv2bgr48le_2_c;
2354 *yuv2packedX = yuv2bgr48le_X_c;
2356 case PIX_FMT_BGR48BE:
2357 *yuv2packed1 = yuv2bgr48be_1_c;
2358 *yuv2packed2 = yuv2bgr48be_2_c;
2359 *yuv2packedX = yuv2bgr48be_X_c;
2364 *yuv2packed1 = yuv2rgb32_1_c;
2365 *yuv2packed2 = yuv2rgb32_2_c;
2366 *yuv2packedX = yuv2rgb32_X_c;
2368 #if CONFIG_SWSCALE_ALPHA
2370 *yuv2packed1 = yuv2rgba32_1_c;
2371 *yuv2packed2 = yuv2rgba32_2_c;
2372 *yuv2packedX = yuv2rgba32_X_c;
2374 #endif /* CONFIG_SWSCALE_ALPHA */
2376 *yuv2packed1 = yuv2rgbx32_1_c;
2377 *yuv2packed2 = yuv2rgbx32_2_c;
2378 *yuv2packedX = yuv2rgbx32_X_c;
2380 #endif /* !CONFIG_SMALL */
2382 case PIX_FMT_RGB32_1:
2383 case PIX_FMT_BGR32_1:
2385 *yuv2packed1 = yuv2rgb32_1_1_c;
2386 *yuv2packed2 = yuv2rgb32_1_2_c;
2387 *yuv2packedX = yuv2rgb32_1_X_c;
2389 #if CONFIG_SWSCALE_ALPHA
2391 *yuv2packed1 = yuv2rgba32_1_1_c;
2392 *yuv2packed2 = yuv2rgba32_1_2_c;
2393 *yuv2packedX = yuv2rgba32_1_X_c;
2395 #endif /* CONFIG_SWSCALE_ALPHA */
2397 *yuv2packed1 = yuv2rgbx32_1_1_c;
2398 *yuv2packed2 = yuv2rgbx32_1_2_c;
2399 *yuv2packedX = yuv2rgbx32_1_X_c;
2401 #endif /* !CONFIG_SMALL */
2404 *yuv2packed1 = yuv2rgb24_1_c;
2405 *yuv2packed2 = yuv2rgb24_2_c;
2406 *yuv2packedX = yuv2rgb24_X_c;
2409 *yuv2packed1 = yuv2bgr24_1_c;
2410 *yuv2packed2 = yuv2bgr24_2_c;
2411 *yuv2packedX = yuv2bgr24_X_c;
2413 case PIX_FMT_RGB565LE:
2414 case PIX_FMT_RGB565BE:
2415 case PIX_FMT_BGR565LE:
2416 case PIX_FMT_BGR565BE:
2417 *yuv2packed1 = yuv2rgb16_1_c;
2418 *yuv2packed2 = yuv2rgb16_2_c;
2419 *yuv2packedX = yuv2rgb16_X_c;
2421 case PIX_FMT_RGB555LE:
2422 case PIX_FMT_RGB555BE:
2423 case PIX_FMT_BGR555LE:
2424 case PIX_FMT_BGR555BE:
2425 *yuv2packed1 = yuv2rgb15_1_c;
2426 *yuv2packed2 = yuv2rgb15_2_c;
2427 *yuv2packedX = yuv2rgb15_X_c;
2429 case PIX_FMT_RGB444LE:
2430 case PIX_FMT_RGB444BE:
2431 case PIX_FMT_BGR444LE:
2432 case PIX_FMT_BGR444BE:
2433 *yuv2packed1 = yuv2rgb12_1_c;
2434 *yuv2packed2 = yuv2rgb12_2_c;
2435 *yuv2packedX = yuv2rgb12_X_c;
2439 *yuv2packed1 = yuv2rgb8_1_c;
2440 *yuv2packed2 = yuv2rgb8_2_c;
2441 *yuv2packedX = yuv2rgb8_X_c;
2445 *yuv2packed1 = yuv2rgb4_1_c;
2446 *yuv2packed2 = yuv2rgb4_2_c;
2447 *yuv2packedX = yuv2rgb4_X_c;
2449 case PIX_FMT_RGB4_BYTE:
2450 case PIX_FMT_BGR4_BYTE:
2451 *yuv2packed1 = yuv2rgb4b_1_c;
2452 *yuv2packed2 = yuv2rgb4b_2_c;
2453 *yuv2packedX = yuv2rgb4b_X_c;
2457 switch (dstFormat) {
2458 case PIX_FMT_GRAY16BE:
2459 *yuv2packed1 = yuv2gray16BE_1_c;
2460 *yuv2packed2 = yuv2gray16BE_2_c;
2461 *yuv2packedX = yuv2gray16BE_X_c;
2463 case PIX_FMT_GRAY16LE:
2464 *yuv2packed1 = yuv2gray16LE_1_c;
2465 *yuv2packed2 = yuv2gray16LE_2_c;
2466 *yuv2packedX = yuv2gray16LE_X_c;
2468 case PIX_FMT_MONOWHITE:
2469 *yuv2packed1 = yuv2monowhite_1_c;
2470 *yuv2packed2 = yuv2monowhite_2_c;
2471 *yuv2packedX = yuv2monowhite_X_c;
2473 case PIX_FMT_MONOBLACK:
2474 *yuv2packed1 = yuv2monoblack_1_c;
2475 *yuv2packed2 = yuv2monoblack_2_c;
2476 *yuv2packedX = yuv2monoblack_X_c;
2478 case PIX_FMT_YUYV422:
2479 *yuv2packed1 = yuv2yuyv422_1_c;
2480 *yuv2packed2 = yuv2yuyv422_2_c;
2481 *yuv2packedX = yuv2yuyv422_X_c;
2483 case PIX_FMT_UYVY422:
2484 *yuv2packed1 = yuv2uyvy422_1_c;
2485 *yuv2packed2 = yuv2uyvy422_2_c;
2486 *yuv2packedX = yuv2uyvy422_X_c;
2491 #define DEBUG_SWSCALE_BUFFERS 0
2492 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2494 static int swScale(SwsContext *c, const uint8_t* src[],
2495 int srcStride[], int srcSliceY,
2496 int srcSliceH, uint8_t* dst[], int dstStride[])
2498 /* load a few things into local vars to make the code more readable? and faster */
2499 const int srcW= c->srcW;
2500 const int dstW= c->dstW;
2501 const int dstH= c->dstH;
2502 const int chrDstW= c->chrDstW;
2503 const int chrSrcW= c->chrSrcW;
2504 const int lumXInc= c->lumXInc;
2505 const int chrXInc= c->chrXInc;
2506 const enum PixelFormat dstFormat= c->dstFormat;
2507 const int flags= c->flags;
2508 int16_t *vLumFilterPos= c->vLumFilterPos;
2509 int16_t *vChrFilterPos= c->vChrFilterPos;
2510 int16_t *hLumFilterPos= c->hLumFilterPos;
2511 int16_t *hChrFilterPos= c->hChrFilterPos;
2512 int16_t *hLumFilter= c->hLumFilter;
2513 int16_t *hChrFilter= c->hChrFilter;
2514 int32_t *lumMmxFilter= c->lumMmxFilter;
2515 int32_t *chrMmxFilter= c->chrMmxFilter;
2516 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2517 const int vLumFilterSize= c->vLumFilterSize;
2518 const int vChrFilterSize= c->vChrFilterSize;
2519 const int hLumFilterSize= c->hLumFilterSize;
2520 const int hChrFilterSize= c->hChrFilterSize;
2521 int16_t **lumPixBuf= c->lumPixBuf;
2522 int16_t **chrUPixBuf= c->chrUPixBuf;
2523 int16_t **chrVPixBuf= c->chrVPixBuf;
2524 int16_t **alpPixBuf= c->alpPixBuf;
2525 const int vLumBufSize= c->vLumBufSize;
2526 const int vChrBufSize= c->vChrBufSize;
2527 uint8_t *formatConvBuffer= c->formatConvBuffer;
2528 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2529 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2531 uint32_t *pal=c->pal_yuv;
2532 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2534 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2535 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2536 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2537 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2538 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2539 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2541 /* vars which will change and which we need to store back in the context */
2543 int lumBufIndex= c->lumBufIndex;
2544 int chrBufIndex= c->chrBufIndex;
2545 int lastInLumBuf= c->lastInLumBuf;
2546 int lastInChrBuf= c->lastInChrBuf;
2548 if (isPacked(c->srcFormat)) {
2556 srcStride[3]= srcStride[0];
2558 srcStride[1]<<= c->vChrDrop;
2559 srcStride[2]<<= c->vChrDrop;
2561 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2562 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2563 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2564 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2565 srcSliceY, srcSliceH, dstY, dstH);
2566 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2567 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2569 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
2570 static int warnedAlready=0; //FIXME move this into the context perhaps
2571 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2572 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2573 " ->cannot do aligned memory accesses anymore\n");
2578 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
2579 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
2580 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
2582 static int warnedAlready=0;
2583 int cpu_flags = av_get_cpu_flags();
2584 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
2585 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
2590 /* Note the user might start scaling the picture in the middle so this
2591 will not get executed. This is not really intended but works
2592 currently, so people might do it. */
2593 if (srcSliceY ==0) {
2601 if (!should_dither) {
2602 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2606 for (;dstY < dstH; dstY++) {
2607 const int chrDstY= dstY>>c->chrDstVSubSample;
2608 uint8_t *dest[4] = {
2609 dst[0] + dstStride[0] * dstY,
2610 dst[1] + dstStride[1] * chrDstY,
2611 dst[2] + dstStride[2] * chrDstY,
2612 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2614 int use_mmx_vfilter= c->use_mmx_vfilter;
2616 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2617 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2618 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2619 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2620 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2621 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2624 //handle holes (FAST_BILINEAR & weird filters)
2625 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2626 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2627 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2628 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2630 DEBUG_BUFFERS("dstY: %d\n", dstY);
2631 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2632 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2633 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2634 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2636 // Do we have enough lines in this slice to output the dstY line
2637 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2639 if (!enough_lines) {
2640 lastLumSrcY = srcSliceY + srcSliceH - 1;
2641 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2642 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2643 lastLumSrcY, lastChrSrcY);
2646 //Do horizontal scaling
2647 while(lastInLumBuf < lastLumSrcY) {
2648 const uint8_t *src1[4] = {
2649 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2650 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2651 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2652 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2655 assert(lumBufIndex < 2*vLumBufSize);
2656 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2657 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2658 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2659 hLumFilter, hLumFilterPos, hLumFilterSize,
2662 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2663 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2664 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2668 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2669 lumBufIndex, lastInLumBuf);
2671 while(lastInChrBuf < lastChrSrcY) {
2672 const uint8_t *src1[4] = {
2673 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2674 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2675 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2676 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2679 assert(chrBufIndex < 2*vChrBufSize);
2680 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2681 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2682 //FIXME replace parameters through context struct (some at least)
2684 if (c->needs_hcscale)
2685 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2686 chrDstW, src1, chrSrcW, chrXInc,
2687 hChrFilter, hChrFilterPos, hChrFilterSize,
2688 formatConvBuffer, pal);
2690 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2691 chrBufIndex, lastInChrBuf);
2693 //wrap buf index around to stay inside the ring buffer
2694 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2695 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2697 break; //we can't output a dstY line so let's try with the next slice
2700 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2702 if (should_dither) {
2703 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2704 c->lumDither8 = dither_8x8_128[dstY & 7];
2706 if (dstY >= dstH-2) {
2707 // hmm looks like we can't use MMX here without overwriting this array's tail
2708 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2709 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2714 const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2715 const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2716 const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2717 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2718 int16_t *vLumFilter= c->vLumFilter;
2719 int16_t *vChrFilter= c->vChrFilter;
2721 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2722 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2724 vLumFilter += dstY * vLumFilterSize;
2725 vChrFilter += chrDstY * vChrFilterSize;
2727 av_assert0(use_mmx_vfilter != (
2728 yuv2planeX == yuv2planeX_10BE_c
2729 || yuv2planeX == yuv2planeX_10LE_c
2730 || yuv2planeX == yuv2planeX_9BE_c
2731 || yuv2planeX == yuv2planeX_9LE_c
2732 || yuv2planeX == yuv2planeX_16BE_c
2733 || yuv2planeX == yuv2planeX_16LE_c
2734 || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
2736 if(use_mmx_vfilter){
2737 vLumFilter= c->lumMmxFilter;
2738 vChrFilter= c->chrMmxFilter;
2741 if (vLumFilterSize == 1) {
2742 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2744 yuv2planeX(vLumFilter, vLumFilterSize,
2745 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2748 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2750 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2751 } else if (vChrFilterSize == 1) {
2752 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2753 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2755 yuv2planeX(vChrFilter, vChrFilterSize,
2756 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2757 yuv2planeX(vChrFilter, vChrFilterSize,
2758 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
2762 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2763 if(use_mmx_vfilter){
2764 vLumFilter= c->alpMmxFilter;
2766 if (vLumFilterSize == 1) {
2767 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2769 yuv2planeX(vLumFilter, vLumFilterSize,
2770 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2774 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2775 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2776 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2777 int chrAlpha = vChrFilter[2 * dstY + 1];
2778 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2779 alpPixBuf ? *alpSrcPtr : NULL,
2780 dest[0], dstW, chrAlpha, dstY);
2781 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2782 int lumAlpha = vLumFilter[2 * dstY + 1];
2783 int chrAlpha = vChrFilter[2 * dstY + 1];
2785 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2787 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2788 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2789 alpPixBuf ? alpSrcPtr : NULL,
2790 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2791 } else { //general RGB
2792 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2793 lumSrcPtr, vLumFilterSize,
2794 vChrFilter + dstY * vChrFilterSize,
2795 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2796 alpSrcPtr, dest[0], dstW, dstY);
2802 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2803 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2806 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2807 __asm__ volatile("sfence":::"memory");
2811 /* store changed local vars back in the context */
2813 c->lumBufIndex= lumBufIndex;
2814 c->chrBufIndex= chrBufIndex;
2815 c->lastInLumBuf= lastInLumBuf;
2816 c->lastInChrBuf= lastInChrBuf;
2818 return dstY - lastDstY;
2821 static av_cold void sws_init_swScale_c(SwsContext *c)
2823 enum PixelFormat srcFormat = c->srcFormat;
2825 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2826 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2829 c->chrToYV12 = NULL;
2831 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2832 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2833 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2834 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2838 case PIX_FMT_BGR4_BYTE:
2839 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2840 case PIX_FMT_GBRP9LE:
2841 case PIX_FMT_GBRP10LE:
2842 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2843 case PIX_FMT_GBRP9BE:
2844 case PIX_FMT_GBRP10BE:
2845 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2846 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2848 case PIX_FMT_YUV444P9LE:
2849 case PIX_FMT_YUV422P9LE:
2850 case PIX_FMT_YUV420P9LE:
2851 case PIX_FMT_YUV422P10LE:
2852 case PIX_FMT_YUV420P10LE:
2853 case PIX_FMT_YUV444P10LE:
2854 case PIX_FMT_YUV420P16LE:
2855 case PIX_FMT_YUV422P16LE:
2856 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2858 case PIX_FMT_YUV444P9BE:
2859 case PIX_FMT_YUV422P9BE:
2860 case PIX_FMT_YUV420P9BE:
2861 case PIX_FMT_YUV444P10BE:
2862 case PIX_FMT_YUV422P10BE:
2863 case PIX_FMT_YUV420P10BE:
2864 case PIX_FMT_YUV420P16BE:
2865 case PIX_FMT_YUV422P16BE:
2866 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2869 if (c->chrSrcHSubSample) {
2871 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2872 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2873 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2874 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2875 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2876 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2877 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2878 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2879 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2880 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2881 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2882 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2883 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2884 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2885 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2886 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2887 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2888 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2889 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
2893 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2894 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2895 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2896 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2897 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2898 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2899 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2900 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2901 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2902 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2903 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2904 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2905 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2906 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2907 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2908 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2909 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2910 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2914 c->lumToYV12 = NULL;
2915 c->alpToYV12 = NULL;
2916 switch (srcFormat) {
2917 case PIX_FMT_GBRP9LE:
2918 case PIX_FMT_GBRP10LE:
2919 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2920 case PIX_FMT_GBRP9BE:
2921 case PIX_FMT_GBRP10BE:
2922 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2923 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2925 case PIX_FMT_YUV444P9LE:
2926 case PIX_FMT_YUV422P9LE:
2927 case PIX_FMT_YUV420P9LE:
2928 case PIX_FMT_YUV422P10LE:
2929 case PIX_FMT_YUV420P10LE:
2930 case PIX_FMT_YUV444P10LE:
2931 case PIX_FMT_YUV420P16LE:
2932 case PIX_FMT_YUV422P16LE:
2933 case PIX_FMT_YUV444P16LE:
2934 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2936 case PIX_FMT_YUV444P9BE:
2937 case PIX_FMT_YUV422P9BE:
2938 case PIX_FMT_YUV420P9BE:
2939 case PIX_FMT_YUV444P10BE:
2940 case PIX_FMT_YUV422P10BE:
2941 case PIX_FMT_YUV420P10BE:
2942 case PIX_FMT_YUV420P16BE:
2943 case PIX_FMT_YUV422P16BE:
2944 case PIX_FMT_YUV444P16BE:
2945 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2947 case PIX_FMT_YUYV422 :
2948 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2949 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2950 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2951 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2952 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2953 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2954 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2955 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2956 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2957 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2958 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2959 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2963 case PIX_FMT_BGR4_BYTE:
2964 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2965 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2966 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2967 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2968 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2969 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2970 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2971 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2972 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2973 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2974 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2977 switch (srcFormat) {
2979 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2981 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2982 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2983 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2988 if (c->srcBpc == 8) {
2989 if (c->dstBpc <= 10) {
2990 c->hyScale = c->hcScale = hScale8To15_c;
2991 if (c->flags & SWS_FAST_BILINEAR) {
2992 c->hyscale_fast = hyscale_fast_c;
2993 c->hcscale_fast = hcscale_fast_c;
2996 c->hyScale = c->hcScale = hScale8To19_c;
2999 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
3002 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
3003 if (c->dstBpc <= 10) {
3005 c->lumConvertRange = lumRangeFromJpeg_c;
3006 c->chrConvertRange = chrRangeFromJpeg_c;
3008 c->lumConvertRange = lumRangeToJpeg_c;
3009 c->chrConvertRange = chrRangeToJpeg_c;
3013 c->lumConvertRange = lumRangeFromJpeg16_c;
3014 c->chrConvertRange = chrRangeFromJpeg16_c;
3016 c->lumConvertRange = lumRangeToJpeg16_c;
3017 c->chrConvertRange = chrRangeToJpeg16_c;
3022 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
3023 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
3024 c->needs_hcscale = 1;
3027 SwsFunc ff_getSwsFunc(SwsContext *c)
3029 sws_init_swScale_c(c);
3032 ff_sws_init_swScale_mmx(c);
3034 ff_sws_init_swScale_altivec(c);