2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/avassert.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/cpu.h"
33 #include "libavutil/avutil.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/bswap.h"
36 #include "libavutil/pixdesc.h"
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
165 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
167 { 0, 1, 0, 1, 0, 1, 0, 1,},
168 { 1, 0, 1, 0, 1, 0, 1, 0,},
169 { 0, 1, 0, 1, 0, 1, 0, 1,},
170 { 1, 0, 1, 0, 1, 0, 1, 0,},
171 { 0, 1, 0, 1, 0, 1, 0, 1,},
172 { 1, 0, 1, 0, 1, 0, 1, 0,},
173 { 0, 1, 0, 1, 0, 1, 0, 1,},
174 { 1, 0, 1, 0, 1, 0, 1, 0,},
176 { 1, 2, 1, 2, 1, 2, 1, 2,},
177 { 3, 0, 3, 0, 3, 0, 3, 0,},
178 { 1, 2, 1, 2, 1, 2, 1, 2,},
179 { 3, 0, 3, 0, 3, 0, 3, 0,},
180 { 1, 2, 1, 2, 1, 2, 1, 2,},
181 { 3, 0, 3, 0, 3, 0, 3, 0,},
182 { 1, 2, 1, 2, 1, 2, 1, 2,},
183 { 3, 0, 3, 0, 3, 0, 3, 0,},
185 { 2, 4, 3, 5, 2, 4, 3, 5,},
186 { 6, 0, 7, 1, 6, 0, 7, 1,},
187 { 3, 5, 2, 4, 3, 5, 2, 4,},
188 { 7, 1, 6, 0, 7, 1, 6, 0,},
189 { 2, 4, 3, 5, 2, 4, 3, 5,},
190 { 6, 0, 7, 1, 6, 0, 7, 1,},
191 { 3, 5, 2, 4, 3, 5, 2, 4,},
192 { 7, 1, 6, 0, 7, 1, 6, 0,},
194 { 4, 8, 7, 11, 4, 8, 7, 11,},
195 { 12, 0, 15, 3, 12, 0, 15, 3,},
196 { 6, 10, 5, 9, 6, 10, 5, 9,},
197 { 14, 2, 13, 1, 14, 2, 13, 1,},
198 { 4, 8, 7, 11, 4, 8, 7, 11,},
199 { 12, 0, 15, 3, 12, 0, 15, 3,},
200 { 6, 10, 5, 9, 6, 10, 5, 9,},
201 { 14, 2, 13, 1, 14, 2, 13, 1,},
203 { 9, 17, 15, 23, 8, 16, 14, 22,},
204 { 25, 1, 31, 7, 24, 0, 30, 6,},
205 { 13, 21, 11, 19, 12, 20, 10, 18,},
206 { 29, 5, 27, 3, 28, 4, 26, 2,},
207 { 8, 16, 14, 22, 9, 17, 15, 23,},
208 { 24, 0, 30, 6, 25, 1, 31, 7,},
209 { 12, 20, 10, 18, 13, 21, 11, 19,},
210 { 28, 4, 26, 2, 29, 5, 27, 3,},
212 { 18, 34, 30, 46, 17, 33, 29, 45,},
213 { 50, 2, 62, 14, 49, 1, 61, 13,},
214 { 26, 42, 22, 38, 25, 41, 21, 37,},
215 { 58, 10, 54, 6, 57, 9, 53, 5,},
216 { 16, 32, 28, 44, 19, 35, 31, 47,},
217 { 48, 0, 60, 12, 51, 3, 63, 15,},
218 { 24, 40, 20, 36, 27, 43, 23, 39,},
219 { 56, 8, 52, 4, 59, 11, 55, 7,},
221 { 18, 34, 30, 46, 17, 33, 29, 45,},
222 { 50, 2, 62, 14, 49, 1, 61, 13,},
223 { 26, 42, 22, 38, 25, 41, 21, 37,},
224 { 58, 10, 54, 6, 57, 9, 53, 5,},
225 { 16, 32, 28, 44, 19, 35, 31, 47,},
226 { 48, 0, 60, 12, 51, 3, 63, 15,},
227 { 24, 40, 20, 36, 27, 43, 23, 39,},
228 { 56, 8, 52, 4, 59, 11, 55, 7,},
230 { 36, 68, 60, 92, 34, 66, 58, 90,},
231 { 100, 4,124, 28, 98, 2,122, 26,},
232 { 52, 84, 44, 76, 50, 82, 42, 74,},
233 { 116, 20,108, 12,114, 18,106, 10,},
234 { 32, 64, 56, 88, 38, 70, 62, 94,},
235 { 96, 0,120, 24,102, 6,126, 30,},
236 { 48, 80, 40, 72, 54, 86, 46, 78,},
237 { 112, 16,104, 8,118, 22,110, 14,},
240 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
242 const uint16_t dither_scale[15][16]={
243 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
244 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
245 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
246 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
247 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
248 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
249 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
250 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
251 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
252 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
253 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
254 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
255 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
256 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
257 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
260 #define output_pixel(pos, val, bias, signedness) \
262 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
264 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
267 static av_always_inline void
268 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
269 int big_endian, int output_bits)
273 av_assert0(output_bits == 16);
275 for (i = 0; i < dstW; i++) {
276 int val = src[i] + (1 << (shift - 1));
277 output_pixel(&dest[i], val, 0, uint);
281 static av_always_inline void
282 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
283 const int32_t **src, uint16_t *dest, int dstW,
284 int big_endian, int output_bits)
288 av_assert0(output_bits == 16);
290 for (i = 0; i < dstW; i++) {
291 int val = 1 << (shift - 1);
294 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
295 * filters (or anything with negative coeffs, the range can be slightly
296 * wider in both directions. To account for this overflow, we subtract
297 * a constant so it always fits in the signed range (assuming a
298 * reasonable filterSize), and re-add that at the end. */
300 for (j = 0; j < filterSize; j++)
301 val += src[j][i] * filter[j];
303 output_pixel(&dest[i], val, 0x8000, int);
309 #define output_pixel(pos, val) \
311 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
313 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
316 static av_always_inline void
317 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
318 int big_endian, int output_bits)
321 int shift = 15 - output_bits;
323 for (i = 0; i < dstW; i++) {
324 int val = src[i] + (1 << (shift - 1));
325 output_pixel(&dest[i], val);
329 static av_always_inline void
330 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
331 const int16_t **src, uint16_t *dest, int dstW,
332 int big_endian, int output_bits)
335 int shift = 11 + 16 - output_bits;
337 for (i = 0; i < dstW; i++) {
338 int val = 1 << (shift - 1);
341 for (j = 0; j < filterSize; j++)
342 val += src[j][i] * filter[j];
344 output_pixel(&dest[i], val);
350 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
351 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
352 uint8_t *dest, int dstW, \
353 const uint8_t *dither, int offset)\
355 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
356 (uint16_t *) dest, dstW, is_be, bits); \
358 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
359 const int16_t **src, uint8_t *dest, int dstW, \
360 const uint8_t *dither, int offset)\
362 yuv2planeX_## template_size ## _c_template(filter, \
363 filterSize, (const typeX_t **) src, \
364 (uint16_t *) dest, dstW, is_be, bits); \
366 yuv2NBPS( 9, BE, 1, 10, int16_t)
367 yuv2NBPS( 9, LE, 0, 10, int16_t)
368 yuv2NBPS(10, BE, 1, 10, int16_t)
369 yuv2NBPS(10, LE, 0, 10, int16_t)
370 yuv2NBPS(16, BE, 1, 16, int32_t)
371 yuv2NBPS(16, LE, 0, 16, int32_t)
373 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
374 const int16_t **src, uint8_t *dest, int dstW,
375 const uint8_t *dither, int offset)
378 for (i=0; i<dstW; i++) {
379 int val = dither[(i + offset) & 7] << 12;
381 for (j=0; j<filterSize; j++)
382 val += src[j][i] * filter[j];
384 dest[i]= av_clip_uint8(val>>19);
388 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
389 const uint8_t *dither, int offset)
392 for (i=0; i<dstW; i++) {
393 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
394 dest[i]= av_clip_uint8(val);
398 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
399 const int16_t **chrUSrc, const int16_t **chrVSrc,
400 uint8_t *dest, int chrDstW)
402 enum PixelFormat dstFormat = c->dstFormat;
403 const uint8_t *chrDither = c->chrDither8;
406 if (dstFormat == PIX_FMT_NV12)
407 for (i=0; i<chrDstW; i++) {
408 int u = chrDither[i & 7] << 12;
409 int v = chrDither[(i + 3) & 7] << 12;
411 for (j=0; j<chrFilterSize; j++) {
412 u += chrUSrc[j][i] * chrFilter[j];
413 v += chrVSrc[j][i] * chrFilter[j];
416 dest[2*i]= av_clip_uint8(u>>19);
417 dest[2*i+1]= av_clip_uint8(v>>19);
420 for (i=0; i<chrDstW; i++) {
421 int u = chrDither[i & 7] << 12;
422 int v = chrDither[(i + 3) & 7] << 12;
424 for (j=0; j<chrFilterSize; j++) {
425 u += chrUSrc[j][i] * chrFilter[j];
426 v += chrVSrc[j][i] * chrFilter[j];
429 dest[2*i]= av_clip_uint8(v>>19);
430 dest[2*i+1]= av_clip_uint8(u>>19);
434 #define output_pixel(pos, val) \
435 if (target == PIX_FMT_GRAY16BE) { \
441 static av_always_inline void
442 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
443 const int32_t **lumSrc, int lumFilterSize,
444 const int16_t *chrFilter, const int32_t **chrUSrc,
445 const int32_t **chrVSrc, int chrFilterSize,
446 const int32_t **alpSrc, uint16_t *dest, int dstW,
447 int y, enum PixelFormat target)
451 for (i = 0; i < (dstW >> 1); i++) {
453 int Y1 = (1 << 14) - 0x40000000;
454 int Y2 = (1 << 14) - 0x40000000;
456 for (j = 0; j < lumFilterSize; j++) {
457 Y1 += lumSrc[j][i * 2] * lumFilter[j];
458 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
462 Y1 = av_clip_int16(Y1);
463 Y2 = av_clip_int16(Y2);
464 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
465 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
469 static av_always_inline void
470 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
471 const int32_t *ubuf[2], const int32_t *vbuf[2],
472 const int32_t *abuf[2], uint16_t *dest, int dstW,
473 int yalpha, int uvalpha, int y,
474 enum PixelFormat target)
476 int yalpha1 = 4095 - yalpha;
478 const int32_t *buf0 = buf[0], *buf1 = buf[1];
480 for (i = 0; i < (dstW >> 1); i++) {
481 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
482 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
484 output_pixel(&dest[i * 2 + 0], Y1);
485 output_pixel(&dest[i * 2 + 1], Y2);
489 static av_always_inline void
490 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
491 const int32_t *ubuf[2], const int32_t *vbuf[2],
492 const int32_t *abuf0, uint16_t *dest, int dstW,
493 int uvalpha, int y, enum PixelFormat target)
497 for (i = 0; i < (dstW >> 1); i++) {
498 int Y1 = (buf0[i * 2 ]+4)>>3;
499 int Y2 = (buf0[i * 2 + 1]+4)>>3;
501 output_pixel(&dest[i * 2 + 0], Y1);
502 output_pixel(&dest[i * 2 + 1], Y2);
508 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
509 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
510 const int16_t **_lumSrc, int lumFilterSize, \
511 const int16_t *chrFilter, const int16_t **_chrUSrc, \
512 const int16_t **_chrVSrc, int chrFilterSize, \
513 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
516 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
517 **chrUSrc = (const int32_t **) _chrUSrc, \
518 **chrVSrc = (const int32_t **) _chrVSrc, \
519 **alpSrc = (const int32_t **) _alpSrc; \
520 uint16_t *dest = (uint16_t *) _dest; \
521 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
522 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
523 alpSrc, dest, dstW, y, fmt); \
526 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
527 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
528 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
529 int yalpha, int uvalpha, int y) \
531 const int32_t **buf = (const int32_t **) _buf, \
532 **ubuf = (const int32_t **) _ubuf, \
533 **vbuf = (const int32_t **) _vbuf, \
534 **abuf = (const int32_t **) _abuf; \
535 uint16_t *dest = (uint16_t *) _dest; \
536 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
537 dest, dstW, yalpha, uvalpha, y, fmt); \
540 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
541 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
542 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
543 int uvalpha, int y) \
545 const int32_t *buf0 = (const int32_t *) _buf0, \
546 **ubuf = (const int32_t **) _ubuf, \
547 **vbuf = (const int32_t **) _vbuf, \
548 *abuf0 = (const int32_t *) _abuf0; \
549 uint16_t *dest = (uint16_t *) _dest; \
550 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
551 dstW, uvalpha, y, fmt); \
554 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
555 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
557 #define output_pixel(pos, acc) \
558 if (target == PIX_FMT_MONOBLACK) { \
564 static av_always_inline void
565 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
566 const int16_t **lumSrc, int lumFilterSize,
567 const int16_t *chrFilter, const int16_t **chrUSrc,
568 const int16_t **chrVSrc, int chrFilterSize,
569 const int16_t **alpSrc, uint8_t *dest, int dstW,
570 int y, enum PixelFormat target)
572 const uint8_t * const d128=dither_8x8_220[y&7];
573 uint8_t *g = c->table_gU[128] + c->table_gV[128];
577 for (i = 0; i < dstW - 1; i += 2) {
582 for (j = 0; j < lumFilterSize; j++) {
583 Y1 += lumSrc[j][i] * lumFilter[j];
584 Y2 += lumSrc[j][i+1] * lumFilter[j];
588 if ((Y1 | Y2) & 0x100) {
589 Y1 = av_clip_uint8(Y1);
590 Y2 = av_clip_uint8(Y2);
592 acc += acc + g[Y1 + d128[(i + 0) & 7]];
593 acc += acc + g[Y2 + d128[(i + 1) & 7]];
595 output_pixel(*dest++, acc);
600 static av_always_inline void
601 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
602 const int16_t *ubuf[2], const int16_t *vbuf[2],
603 const int16_t *abuf[2], uint8_t *dest, int dstW,
604 int yalpha, int uvalpha, int y,
605 enum PixelFormat target)
607 const int16_t *buf0 = buf[0], *buf1 = buf[1];
608 const uint8_t * const d128 = dither_8x8_220[y & 7];
609 uint8_t *g = c->table_gU[128] + c->table_gV[128];
610 int yalpha1 = 4095 - yalpha;
613 for (i = 0; i < dstW - 7; i += 8) {
614 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
615 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
616 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
617 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
618 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
619 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
620 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
621 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
622 output_pixel(*dest++, acc);
626 static av_always_inline void
627 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
628 const int16_t *ubuf[2], const int16_t *vbuf[2],
629 const int16_t *abuf0, uint8_t *dest, int dstW,
630 int uvalpha, int y, enum PixelFormat target)
632 const uint8_t * const d128 = dither_8x8_220[y & 7];
633 uint8_t *g = c->table_gU[128] + c->table_gV[128];
636 for (i = 0; i < dstW - 7; i += 8) {
637 int acc = g[(buf0[i ] >> 7) + d128[0]];
638 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
639 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
640 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
641 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
642 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
643 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
644 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
645 output_pixel(*dest++, acc);
651 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
652 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
653 const int16_t **lumSrc, int lumFilterSize, \
654 const int16_t *chrFilter, const int16_t **chrUSrc, \
655 const int16_t **chrVSrc, int chrFilterSize, \
656 const int16_t **alpSrc, uint8_t *dest, int dstW, \
659 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
660 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
661 alpSrc, dest, dstW, y, fmt); \
664 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
665 const int16_t *ubuf[2], const int16_t *vbuf[2], \
666 const int16_t *abuf[2], uint8_t *dest, int dstW, \
667 int yalpha, int uvalpha, int y) \
669 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
670 dest, dstW, yalpha, uvalpha, y, fmt); \
673 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
674 const int16_t *ubuf[2], const int16_t *vbuf[2], \
675 const int16_t *abuf0, uint8_t *dest, int dstW, \
676 int uvalpha, int y) \
678 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
679 abuf0, dest, dstW, uvalpha, \
683 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
684 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
686 #define output_pixels(pos, Y1, U, Y2, V) \
687 if (target == PIX_FMT_YUYV422) { \
688 dest[pos + 0] = Y1; \
690 dest[pos + 2] = Y2; \
694 dest[pos + 1] = Y1; \
696 dest[pos + 3] = Y2; \
699 static av_always_inline void
700 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
701 const int16_t **lumSrc, int lumFilterSize,
702 const int16_t *chrFilter, const int16_t **chrUSrc,
703 const int16_t **chrVSrc, int chrFilterSize,
704 const int16_t **alpSrc, uint8_t *dest, int dstW,
705 int y, enum PixelFormat target)
709 for (i = 0; i < (dstW >> 1); i++) {
716 for (j = 0; j < lumFilterSize; j++) {
717 Y1 += lumSrc[j][i * 2] * lumFilter[j];
718 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
720 for (j = 0; j < chrFilterSize; j++) {
721 U += chrUSrc[j][i] * chrFilter[j];
722 V += chrVSrc[j][i] * chrFilter[j];
728 if ((Y1 | Y2 | U | V) & 0x100) {
729 Y1 = av_clip_uint8(Y1);
730 Y2 = av_clip_uint8(Y2);
731 U = av_clip_uint8(U);
732 V = av_clip_uint8(V);
734 output_pixels(4*i, Y1, U, Y2, V);
738 static av_always_inline void
739 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
740 const int16_t *ubuf[2], const int16_t *vbuf[2],
741 const int16_t *abuf[2], uint8_t *dest, int dstW,
742 int yalpha, int uvalpha, int y,
743 enum PixelFormat target)
745 const int16_t *buf0 = buf[0], *buf1 = buf[1],
746 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
747 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
748 int yalpha1 = 4095 - yalpha;
749 int uvalpha1 = 4095 - uvalpha;
752 for (i = 0; i < (dstW >> 1); i++) {
753 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
754 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
755 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
756 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
758 output_pixels(i * 4, Y1, U, Y2, V);
762 static av_always_inline void
763 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
764 const int16_t *ubuf[2], const int16_t *vbuf[2],
765 const int16_t *abuf0, uint8_t *dest, int dstW,
766 int uvalpha, int y, enum PixelFormat target)
768 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
769 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
772 if (uvalpha < 2048) {
773 for (i = 0; i < (dstW >> 1); i++) {
774 int Y1 = buf0[i * 2] >> 7;
775 int Y2 = buf0[i * 2 + 1] >> 7;
776 int U = ubuf1[i] >> 7;
777 int V = vbuf1[i] >> 7;
779 output_pixels(i * 4, Y1, U, Y2, V);
782 for (i = 0; i < (dstW >> 1); i++) {
783 int Y1 = buf0[i * 2] >> 7;
784 int Y2 = buf0[i * 2 + 1] >> 7;
785 int U = (ubuf0[i] + ubuf1[i]) >> 8;
786 int V = (vbuf0[i] + vbuf1[i]) >> 8;
788 output_pixels(i * 4, Y1, U, Y2, V);
795 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
796 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
798 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
799 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
800 #define output_pixel(pos, val) \
801 if (isBE(target)) { \
807 static av_always_inline void
808 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
809 const int32_t **lumSrc, int lumFilterSize,
810 const int16_t *chrFilter, const int32_t **chrUSrc,
811 const int32_t **chrVSrc, int chrFilterSize,
812 const int32_t **alpSrc, uint16_t *dest, int dstW,
813 int y, enum PixelFormat target)
817 for (i = 0; i < (dstW >> 1); i++) {
819 int Y1 = -0x40000000;
820 int Y2 = -0x40000000;
821 int U = -128 << 23; // 19
825 for (j = 0; j < lumFilterSize; j++) {
826 Y1 += lumSrc[j][i * 2] * lumFilter[j];
827 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
829 for (j = 0; j < chrFilterSize; j++) {
830 U += chrUSrc[j][i] * chrFilter[j];
831 V += chrVSrc[j][i] * chrFilter[j];
834 // 8bit: 12+15=27; 16-bit: 12+19=31
842 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
843 Y1 -= c->yuv2rgb_y_offset;
844 Y2 -= c->yuv2rgb_y_offset;
845 Y1 *= c->yuv2rgb_y_coeff;
846 Y2 *= c->yuv2rgb_y_coeff;
849 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
851 R = V * c->yuv2rgb_v2r_coeff;
852 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
853 B = U * c->yuv2rgb_u2b_coeff;
855 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
856 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
857 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
858 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
859 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
860 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
861 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
866 static av_always_inline void
867 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
868 const int32_t *ubuf[2], const int32_t *vbuf[2],
869 const int32_t *abuf[2], uint16_t *dest, int dstW,
870 int yalpha, int uvalpha, int y,
871 enum PixelFormat target)
873 const int32_t *buf0 = buf[0], *buf1 = buf[1],
874 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
875 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
876 int yalpha1 = 4095 - yalpha;
877 int uvalpha1 = 4095 - uvalpha;
880 for (i = 0; i < (dstW >> 1); i++) {
881 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
882 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
883 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
884 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
887 Y1 -= c->yuv2rgb_y_offset;
888 Y2 -= c->yuv2rgb_y_offset;
889 Y1 *= c->yuv2rgb_y_coeff;
890 Y2 *= c->yuv2rgb_y_coeff;
894 R = V * c->yuv2rgb_v2r_coeff;
895 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
896 B = U * c->yuv2rgb_u2b_coeff;
898 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
899 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
900 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
901 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
902 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
903 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
908 static av_always_inline void
909 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
910 const int32_t *ubuf[2], const int32_t *vbuf[2],
911 const int32_t *abuf0, uint16_t *dest, int dstW,
912 int uvalpha, int y, enum PixelFormat target)
914 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
915 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
918 if (uvalpha < 2048) {
919 for (i = 0; i < (dstW >> 1); i++) {
920 int Y1 = (buf0[i * 2] ) >> 2;
921 int Y2 = (buf0[i * 2 + 1]) >> 2;
922 int U = (ubuf0[i] + (-128 << 11)) >> 2;
923 int V = (vbuf0[i] + (-128 << 11)) >> 2;
926 Y1 -= c->yuv2rgb_y_offset;
927 Y2 -= c->yuv2rgb_y_offset;
928 Y1 *= c->yuv2rgb_y_coeff;
929 Y2 *= c->yuv2rgb_y_coeff;
933 R = V * c->yuv2rgb_v2r_coeff;
934 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
935 B = U * c->yuv2rgb_u2b_coeff;
937 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
938 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
939 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
940 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
941 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
942 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
946 for (i = 0; i < (dstW >> 1); i++) {
947 int Y1 = (buf0[i * 2] ) >> 2;
948 int Y2 = (buf0[i * 2 + 1]) >> 2;
949 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
950 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
953 Y1 -= c->yuv2rgb_y_offset;
954 Y2 -= c->yuv2rgb_y_offset;
955 Y1 *= c->yuv2rgb_y_coeff;
956 Y2 *= c->yuv2rgb_y_coeff;
960 R = V * c->yuv2rgb_v2r_coeff;
961 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
962 B = U * c->yuv2rgb_u2b_coeff;
964 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
965 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
966 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
967 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
968 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
969 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
979 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
980 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
981 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
982 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
985 * Write out 2 RGB pixels in the target pixel format. This function takes a
986 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
987 * things like endianness conversion and shifting. The caller takes care of
988 * setting the correct offset in these tables from the chroma (U/V) values.
989 * This function then uses the luminance (Y1/Y2) values to write out the
990 * correct RGB values into the destination buffer.
992 static av_always_inline void
993 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
994 unsigned A1, unsigned A2,
995 const void *_r, const void *_g, const void *_b, int y,
996 enum PixelFormat target, int hasAlpha)
998 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
999 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1000 uint32_t *dest = (uint32_t *) _dest;
1001 const uint32_t *r = (const uint32_t *) _r;
1002 const uint32_t *g = (const uint32_t *) _g;
1003 const uint32_t *b = (const uint32_t *) _b;
1006 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1008 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1009 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1012 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1014 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1015 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1017 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1018 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1021 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1022 uint8_t *dest = (uint8_t *) _dest;
1023 const uint8_t *r = (const uint8_t *) _r;
1024 const uint8_t *g = (const uint8_t *) _g;
1025 const uint8_t *b = (const uint8_t *) _b;
1027 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1028 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1030 dest[i * 6 + 0] = r_b[Y1];
1031 dest[i * 6 + 1] = g[Y1];
1032 dest[i * 6 + 2] = b_r[Y1];
1033 dest[i * 6 + 3] = r_b[Y2];
1034 dest[i * 6 + 4] = g[Y2];
1035 dest[i * 6 + 5] = b_r[Y2];
1038 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1039 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1040 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1041 uint16_t *dest = (uint16_t *) _dest;
1042 const uint16_t *r = (const uint16_t *) _r;
1043 const uint16_t *g = (const uint16_t *) _g;
1044 const uint16_t *b = (const uint16_t *) _b;
1045 int dr1, dg1, db1, dr2, dg2, db2;
1047 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1048 dr1 = dither_2x2_8[ y & 1 ][0];
1049 dg1 = dither_2x2_4[ y & 1 ][0];
1050 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1051 dr2 = dither_2x2_8[ y & 1 ][1];
1052 dg2 = dither_2x2_4[ y & 1 ][1];
1053 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1054 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1055 dr1 = dither_2x2_8[ y & 1 ][0];
1056 dg1 = dither_2x2_8[ y & 1 ][1];
1057 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1058 dr2 = dither_2x2_8[ y & 1 ][1];
1059 dg2 = dither_2x2_8[ y & 1 ][0];
1060 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1062 dr1 = dither_4x4_16[ y & 3 ][0];
1063 dg1 = dither_4x4_16[ y & 3 ][1];
1064 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1065 dr2 = dither_4x4_16[ y & 3 ][1];
1066 dg2 = dither_4x4_16[ y & 3 ][0];
1067 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1070 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1071 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1072 } else /* 8/4-bit */ {
1073 uint8_t *dest = (uint8_t *) _dest;
1074 const uint8_t *r = (const uint8_t *) _r;
1075 const uint8_t *g = (const uint8_t *) _g;
1076 const uint8_t *b = (const uint8_t *) _b;
1077 int dr1, dg1, db1, dr2, dg2, db2;
1079 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1080 const uint8_t * const d64 = dither_8x8_73[y & 7];
1081 const uint8_t * const d32 = dither_8x8_32[y & 7];
1082 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1083 db1 = d64[(i * 2 + 0) & 7];
1084 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1085 db2 = d64[(i * 2 + 1) & 7];
1087 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1088 const uint8_t * const d128 = dither_8x8_220[y & 7];
1089 dr1 = db1 = d128[(i * 2 + 0) & 7];
1090 dg1 = d64[(i * 2 + 0) & 7];
1091 dr2 = db2 = d128[(i * 2 + 1) & 7];
1092 dg2 = d64[(i * 2 + 1) & 7];
1095 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1096 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1097 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1099 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1100 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1105 static av_always_inline void
1106 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1107 const int16_t **lumSrc, int lumFilterSize,
1108 const int16_t *chrFilter, const int16_t **chrUSrc,
1109 const int16_t **chrVSrc, int chrFilterSize,
1110 const int16_t **alpSrc, uint8_t *dest, int dstW,
1111 int y, enum PixelFormat target, int hasAlpha)
1115 for (i = 0; i < (dstW >> 1); i++) {
1121 int av_unused A1, A2;
1122 const void *r, *g, *b;
1124 for (j = 0; j < lumFilterSize; j++) {
1125 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1126 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1128 for (j = 0; j < chrFilterSize; j++) {
1129 U += chrUSrc[j][i] * chrFilter[j];
1130 V += chrVSrc[j][i] * chrFilter[j];
1136 if ((Y1 | Y2 | U | V) & 0x100) {
1137 Y1 = av_clip_uint8(Y1);
1138 Y2 = av_clip_uint8(Y2);
1139 U = av_clip_uint8(U);
1140 V = av_clip_uint8(V);
1145 for (j = 0; j < lumFilterSize; j++) {
1146 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1147 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1151 if ((A1 | A2) & 0x100) {
1152 A1 = av_clip_uint8(A1);
1153 A2 = av_clip_uint8(A2);
1157 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1159 g = (c->table_gU[U] + c->table_gV[V]);
1162 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1163 r, g, b, y, target, hasAlpha);
1167 static av_always_inline void
1168 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1169 const int16_t *ubuf[2], const int16_t *vbuf[2],
1170 const int16_t *abuf[2], uint8_t *dest, int dstW,
1171 int yalpha, int uvalpha, int y,
1172 enum PixelFormat target, int hasAlpha)
1174 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1175 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1176 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1177 *abuf0 = hasAlpha ? abuf[0] : NULL,
1178 *abuf1 = hasAlpha ? abuf[1] : NULL;
1179 int yalpha1 = 4095 - yalpha;
1180 int uvalpha1 = 4095 - uvalpha;
1183 for (i = 0; i < (dstW >> 1); i++) {
1184 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1185 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1186 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1187 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1189 const void *r = c->table_rV[V],
1190 *g = (c->table_gU[U] + c->table_gV[V]),
1191 *b = c->table_bU[U];
1194 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1195 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1198 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1199 r, g, b, y, target, hasAlpha);
1203 static av_always_inline void
1204 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1205 const int16_t *ubuf[2], const int16_t *vbuf[2],
1206 const int16_t *abuf0, uint8_t *dest, int dstW,
1207 int uvalpha, int y, enum PixelFormat target,
1210 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1211 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1214 if (uvalpha < 2048) {
1215 for (i = 0; i < (dstW >> 1); i++) {
1216 int Y1 = buf0[i * 2] >> 7;
1217 int Y2 = buf0[i * 2 + 1] >> 7;
1218 int U = ubuf1[i] >> 7;
1219 int V = vbuf1[i] >> 7;
1221 const void *r = c->table_rV[V],
1222 *g = (c->table_gU[U] + c->table_gV[V]),
1223 *b = c->table_bU[U];
1226 A1 = abuf0[i * 2 ] >> 7;
1227 A2 = abuf0[i * 2 + 1] >> 7;
1230 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1231 r, g, b, y, target, hasAlpha);
1234 for (i = 0; i < (dstW >> 1); i++) {
1235 int Y1 = buf0[i * 2] >> 7;
1236 int Y2 = buf0[i * 2 + 1] >> 7;
1237 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1238 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1240 const void *r = c->table_rV[V],
1241 *g = (c->table_gU[U] + c->table_gV[V]),
1242 *b = c->table_bU[U];
1245 A1 = abuf0[i * 2 ] >> 7;
1246 A2 = abuf0[i * 2 + 1] >> 7;
1249 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1250 r, g, b, y, target, hasAlpha);
1255 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1256 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1257 const int16_t **lumSrc, int lumFilterSize, \
1258 const int16_t *chrFilter, const int16_t **chrUSrc, \
1259 const int16_t **chrVSrc, int chrFilterSize, \
1260 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1263 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1264 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1265 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1267 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1268 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1269 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1270 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1271 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1272 int yalpha, int uvalpha, int y) \
1274 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1275 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1278 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1279 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1280 const int16_t *abuf0, uint8_t *dest, int dstW, \
1281 int uvalpha, int y) \
1283 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1284 dstW, uvalpha, y, fmt, hasAlpha); \
1288 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1289 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1291 #if CONFIG_SWSCALE_ALPHA
1292 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1293 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1295 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1296 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1298 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1299 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1300 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1301 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1302 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1303 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1304 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1305 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1307 static av_always_inline void
1308 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1309 const int16_t **lumSrc, int lumFilterSize,
1310 const int16_t *chrFilter, const int16_t **chrUSrc,
1311 const int16_t **chrVSrc, int chrFilterSize,
1312 const int16_t **alpSrc, uint8_t *dest,
1313 int dstW, int y, enum PixelFormat target, int hasAlpha)
1316 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1318 for (i = 0; i < dstW; i++) {
1321 int U = (1<<9)-(128 << 19);
1322 int V = (1<<9)-(128 << 19);
1326 for (j = 0; j < lumFilterSize; j++) {
1327 Y += lumSrc[j][i] * lumFilter[j];
1329 for (j = 0; j < chrFilterSize; j++) {
1330 U += chrUSrc[j][i] * chrFilter[j];
1331 V += chrVSrc[j][i] * chrFilter[j];
1338 for (j = 0; j < lumFilterSize; j++) {
1339 A += alpSrc[j][i] * lumFilter[j];
1343 A = av_clip_uint8(A);
1345 Y -= c->yuv2rgb_y_offset;
1346 Y *= c->yuv2rgb_y_coeff;
1348 R = Y + V*c->yuv2rgb_v2r_coeff;
1349 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1350 B = Y + U*c->yuv2rgb_u2b_coeff;
1351 if ((R | G | B) & 0xC0000000) {
1352 R = av_clip_uintp2(R, 30);
1353 G = av_clip_uintp2(G, 30);
1354 B = av_clip_uintp2(B, 30);
1359 dest[0] = hasAlpha ? A : 255;
1373 dest[3] = hasAlpha ? A : 255;
1376 dest[0] = hasAlpha ? A : 255;
1390 dest[3] = hasAlpha ? A : 255;
1398 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1399 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1400 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1401 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1403 #if CONFIG_SWSCALE_ALPHA
1404 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1405 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1406 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1407 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1409 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1410 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1411 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1412 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1414 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1415 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1417 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1418 int width, int height,
1422 uint8_t *ptr = plane + stride*y;
1423 for (i=0; i<height; i++) {
1424 memset(ptr, val, width);
1429 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1431 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1432 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1434 static av_always_inline void
1435 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1436 enum PixelFormat origin)
1439 for (i = 0; i < width; i++) {
1440 unsigned int r_b = input_pixel(&src[i*3+0]);
1441 unsigned int g = input_pixel(&src[i*3+1]);
1442 unsigned int b_r = input_pixel(&src[i*3+2]);
1444 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1448 static av_always_inline void
1449 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1450 const uint16_t *src1, const uint16_t *src2,
1451 int width, enum PixelFormat origin)
1455 for (i = 0; i < width; i++) {
1456 int r_b = input_pixel(&src1[i*3+0]);
1457 int g = input_pixel(&src1[i*3+1]);
1458 int b_r = input_pixel(&src1[i*3+2]);
1460 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1461 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1465 static av_always_inline void
1466 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1467 const uint16_t *src1, const uint16_t *src2,
1468 int width, enum PixelFormat origin)
1472 for (i = 0; i < width; i++) {
1473 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1474 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1475 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1477 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1478 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1486 #define rgb48funcs(pattern, BE_LE, origin) \
1487 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
1488 int width, uint32_t *unused) \
1490 const uint16_t *src = (const uint16_t *) _src; \
1491 uint16_t *dst = (uint16_t *) _dst; \
1492 rgb48ToY_c_template(dst, src, width, origin); \
1495 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1496 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1497 int width, uint32_t *unused) \
1499 const uint16_t *src1 = (const uint16_t *) _src1, \
1500 *src2 = (const uint16_t *) _src2; \
1501 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1502 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1505 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1506 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
1507 int width, uint32_t *unused) \
1509 const uint16_t *src1 = (const uint16_t *) _src1, \
1510 *src2 = (const uint16_t *) _src2; \
1511 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1512 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1515 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1516 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1517 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1518 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1520 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1521 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1522 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1524 static av_always_inline void
1525 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1526 int width, enum PixelFormat origin,
1527 int shr, int shg, int shb, int shp,
1528 int maskr, int maskg, int maskb,
1529 int rsh, int gsh, int bsh, int S)
1531 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1532 const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
1535 for (i = 0; i < width; i++) {
1536 int px = input_pixel(i) >> shp;
1537 int b = (px & maskb) >> shb;
1538 int g = (px & maskg) >> shg;
1539 int r = (px & maskr) >> shr;
1541 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1545 static av_always_inline void
1546 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1547 const uint8_t *src, int width,
1548 enum PixelFormat origin,
1549 int shr, int shg, int shb, int shp,
1550 int maskr, int maskg, int maskb,
1551 int rsh, int gsh, int bsh, int S)
1553 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1554 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1555 const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
1558 for (i = 0; i < width; i++) {
1559 int px = input_pixel(i) >> shp;
1560 int b = (px & maskb) >> shb;
1561 int g = (px & maskg) >> shg;
1562 int r = (px & maskr) >> shr;
1564 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1565 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1569 static av_always_inline void
1570 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1571 const uint8_t *src, int width,
1572 enum PixelFormat origin,
1573 int shr, int shg, int shb, int shp,
1574 int maskr, int maskg, int maskb,
1575 int rsh, int gsh, int bsh, int S)
1577 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1578 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1579 maskgx = ~(maskr | maskb);
1580 const unsigned rnd = (256U<<(S)) + (1<<(S-6));
1583 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1584 for (i = 0; i < width; i++) {
1585 int px0 = input_pixel(2 * i + 0) >> shp;
1586 int px1 = input_pixel(2 * i + 1) >> shp;
1587 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1588 int rb = px0 + px1 - g;
1590 b = (rb & maskb) >> shb;
1591 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1592 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1595 g = (g & maskg) >> shg;
1597 r = (rb & maskr) >> shr;
1599 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1600 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1606 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1607 maskg, maskb, rsh, gsh, bsh, S) \
1608 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
1609 int width, uint32_t *unused) \
1611 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
1612 shr, shg, shb, shp, \
1613 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1616 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1617 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1618 int width, uint32_t *unused) \
1620 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1621 shr, shg, shb, shp, \
1622 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1625 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1626 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
1627 int width, uint32_t *unused) \
1629 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
1630 shr, shg, shb, shp, \
1631 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1634 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1635 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1636 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1637 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1638 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1639 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1640 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1641 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1642 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1643 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1644 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1645 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1646 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1647 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1648 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1649 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1651 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
1652 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
1653 int width, enum PixelFormat origin)
1656 for (i = 0; i < width; i++) {
1657 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
1658 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
1659 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
1661 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1662 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
1666 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1669 for (i=0; i<width; i++) {
1670 dst[i]= src[4*i]<<6;
1674 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1677 for (i=0; i<width; i++) {
1678 dst[i]= src[4*i+3]<<6;
1682 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
1685 for (i=0; i<width; i++) {
1688 dst[i]= (pal[d] >> 24)<<6;
1692 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
1695 for (i=0; i<width; i++) {
1698 dst[i]= (pal[d] & 0xFF)<<6;
1702 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1703 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1704 int width, uint32_t *pal)
1707 assert(src1 == src2);
1708 for (i=0; i<width; i++) {
1709 int p= pal[src1[i]];
1711 dstU[i]= (uint8_t)(p>> 8)<<6;
1712 dstV[i]= (uint8_t)(p>>16)<<6;
1716 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1719 for (i=0; i<width/8; i++) {
1722 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1726 for(j=0; j<(width&7); j++)
1727 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1731 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1734 for (i=0; i<width/8; i++) {
1737 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1741 for(j=0; j<(width&7); j++)
1742 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1746 //FIXME yuy2* can read up to 7 samples too much
1748 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1752 for (i=0; i<width; i++)
1756 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1757 const uint8_t *src2, int width, uint32_t *unused)
1760 for (i=0; i<width; i++) {
1761 dstU[i]= src1[4*i + 1];
1762 dstV[i]= src1[4*i + 3];
1764 assert(src1 == src2);
1767 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
1770 const uint16_t *src = (const uint16_t *) _src;
1771 uint16_t *dst = (uint16_t *) _dst;
1772 for (i=0; i<width; i++) {
1773 dst[i] = av_bswap16(src[i]);
1777 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
1778 const uint8_t *_src2, int width, uint32_t *unused)
1781 const uint16_t *src1 = (const uint16_t *) _src1,
1782 *src2 = (const uint16_t *) _src2;
1783 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1784 for (i=0; i<width; i++) {
1785 dstU[i] = av_bswap16(src1[i]);
1786 dstV[i] = av_bswap16(src2[i]);
1790 /* This is almost identical to the previous, end exists only because
1791 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1792 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1796 for (i=0; i<width; i++)
1800 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1801 const uint8_t *src2, int width, uint32_t *unused)
1804 for (i=0; i<width; i++) {
1805 dstU[i]= src1[4*i + 0];
1806 dstV[i]= src1[4*i + 2];
1808 assert(src1 == src2);
1811 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1812 const uint8_t *src, int width)
1815 for (i = 0; i < width; i++) {
1816 dst1[i] = src[2*i+0];
1817 dst2[i] = src[2*i+1];
1821 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1822 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1823 int width, uint32_t *unused)
1825 nvXXtoUV_c(dstU, dstV, src1, width);
1828 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1829 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
1830 int width, uint32_t *unused)
1832 nvXXtoUV_c(dstV, dstU, src1, width);
1835 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1837 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
1838 int width, uint32_t *unused)
1841 for (i=0; i<width; i++) {
1846 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1850 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1851 const uint8_t *src2, int width, uint32_t *unused)
1854 for (i=0; i<width; i++) {
1855 int b= src1[3*i + 0];
1856 int g= src1[3*i + 1];
1857 int r= src1[3*i + 2];
1859 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1860 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1862 assert(src1 == src2);
1865 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1866 const uint8_t *src2, int width, uint32_t *unused)
1869 for (i=0; i<width; i++) {
1870 int b= src1[6*i + 0] + src1[6*i + 3];
1871 int g= src1[6*i + 1] + src1[6*i + 4];
1872 int r= src1[6*i + 2] + src1[6*i + 5];
1874 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1875 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1877 assert(src1 == src2);
1880 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
1884 for (i=0; i<width; i++) {
1889 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1893 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1894 const uint8_t *src2, int width, uint32_t *unused)
1898 for (i=0; i<width; i++) {
1899 int r= src1[3*i + 0];
1900 int g= src1[3*i + 1];
1901 int b= src1[3*i + 2];
1903 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1904 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1908 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
1909 const uint8_t *src2, int width, uint32_t *unused)
1913 for (i=0; i<width; i++) {
1914 int r= src1[6*i + 0] + src1[6*i + 3];
1915 int g= src1[6*i + 1] + src1[6*i + 4];
1916 int b= src1[6*i + 2] + src1[6*i + 5];
1918 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1919 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1923 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
1926 for (i = 0; i < width; i++) {
1931 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1935 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1938 const uint16_t **src = (const uint16_t **) _src;
1939 uint16_t *dst = (uint16_t *) _dst;
1940 for (i = 0; i < width; i++) {
1941 int g = AV_RL16(src[0] + i);
1942 int b = AV_RL16(src[1] + i);
1943 int r = AV_RL16(src[2] + i);
1945 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1949 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1952 const uint16_t **src = (const uint16_t **) _src;
1953 uint16_t *dst = (uint16_t *) _dst;
1954 for (i = 0; i < width; i++) {
1955 int g = AV_RB16(src[0] + i);
1956 int b = AV_RB16(src[1] + i);
1957 int r = AV_RB16(src[2] + i);
1959 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1963 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
1966 for (i = 0; i < width; i++) {
1971 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1972 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
1976 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1979 const uint16_t **src = (const uint16_t **) _src;
1980 uint16_t *dstU = (uint16_t *) _dstU;
1981 uint16_t *dstV = (uint16_t *) _dstV;
1982 for (i = 0; i < width; i++) {
1983 int g = AV_RL16(src[0] + i);
1984 int b = AV_RL16(src[1] + i);
1985 int r = AV_RL16(src[2] + i);
1987 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1988 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1992 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1995 const uint16_t **src = (const uint16_t **) _src;
1996 uint16_t *dstU = (uint16_t *) _dstU;
1997 uint16_t *dstV = (uint16_t *) _dstV;
1998 for (i = 0; i < width; i++) {
1999 int g = AV_RB16(src[0] + i);
2000 int b = AV_RB16(src[1] + i);
2001 int r = AV_RB16(src[2] + i);
2003 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
2004 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
2008 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
2009 const int16_t *filter,
2010 const int16_t *filterPos, int filterSize)
2013 int32_t *dst = (int32_t *) _dst;
2014 const uint16_t *src = (const uint16_t *) _src;
2015 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2018 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2021 for (i = 0; i < dstW; i++) {
2023 int srcPos = filterPos[i];
2026 for (j = 0; j < filterSize; j++) {
2027 val += src[srcPos + j] * filter[filterSize * i + j];
2029 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2030 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2034 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2035 const int16_t *filter,
2036 const int16_t *filterPos, int filterSize)
2039 const uint16_t *src = (const uint16_t *) _src;
2040 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2043 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2045 for (i = 0; i < dstW; i++) {
2047 int srcPos = filterPos[i];
2050 for (j = 0; j < filterSize; j++) {
2051 val += src[srcPos + j] * filter[filterSize * i + j];
2053 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2054 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2058 // bilinear / bicubic scaling
2059 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2060 const int16_t *filter, const int16_t *filterPos,
2064 for (i=0; i<dstW; i++) {
2066 int srcPos= filterPos[i];
2068 for (j=0; j<filterSize; j++) {
2069 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2071 //filter += hFilterSize;
2072 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2077 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2078 const int16_t *filter, const int16_t *filterPos,
2082 int32_t *dst = (int32_t *) _dst;
2083 for (i=0; i<dstW; i++) {
2085 int srcPos= filterPos[i];
2087 for (j=0; j<filterSize; j++) {
2088 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2090 //filter += hFilterSize;
2091 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2096 //FIXME all pal and rgb srcFormats could do this convertion as well
2097 //FIXME all scalers more complex than bilinear could do half of this transform
2098 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2101 for (i = 0; i < width; i++) {
2102 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2103 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2106 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2109 for (i = 0; i < width; i++) {
2110 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2111 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2114 static void lumRangeToJpeg_c(int16_t *dst, int width)
2117 for (i = 0; i < width; i++)
2118 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2120 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2123 for (i = 0; i < width; i++)
2124 dst[i] = (dst[i]*14071 + 33561947)>>14;
2127 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2130 int32_t *dstU = (int32_t *) _dstU;
2131 int32_t *dstV = (int32_t *) _dstV;
2132 for (i = 0; i < width; i++) {
2133 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2134 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2137 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2140 int32_t *dstU = (int32_t *) _dstU;
2141 int32_t *dstV = (int32_t *) _dstV;
2142 for (i = 0; i < width; i++) {
2143 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2144 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2147 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2150 int32_t *dst = (int32_t *) _dst;
2151 for (i = 0; i < width; i++)
2152 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2154 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2157 int32_t *dst = (int32_t *) _dst;
2158 for (i = 0; i < width; i++)
2159 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2162 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2163 const uint8_t *src, int srcW, int xInc)
2166 unsigned int xpos=0;
2167 for (i=0;i<dstWidth;i++) {
2168 register unsigned int xx=xpos>>16;
2169 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2170 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2173 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2174 dst[i] = src[srcW-1]*128;
2177 // *** horizontal scale Y line to temp buffer
2178 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2179 const uint8_t *src_in[4], int srcW, int xInc,
2180 const int16_t *hLumFilter,
2181 const int16_t *hLumFilterPos, int hLumFilterSize,
2182 uint8_t *formatConvBuffer,
2183 uint32_t *pal, int isAlpha)
2185 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2186 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2187 const uint8_t *src = src_in[isAlpha ? 3 : 0];
2190 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
2191 src= formatConvBuffer;
2192 } else if (c->readLumPlanar && !isAlpha) {
2193 c->readLumPlanar(formatConvBuffer, src_in, srcW);
2194 src = formatConvBuffer;
2197 if (!c->hyscale_fast) {
2198 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2199 } else { // fast bilinear upscale / crap downscale
2200 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2204 convertRange(dst, dstWidth);
2207 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2208 int dstWidth, const uint8_t *src1,
2209 const uint8_t *src2, int srcW, int xInc)
2212 unsigned int xpos=0;
2213 for (i=0;i<dstWidth;i++) {
2214 register unsigned int xx=xpos>>16;
2215 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2216 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2217 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2220 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2221 dst1[i] = src1[srcW-1]*128;
2222 dst2[i] = src2[srcW-1]*128;
2226 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2227 const uint8_t *src_in[4],
2228 int srcW, int xInc, const int16_t *hChrFilter,
2229 const int16_t *hChrFilterPos, int hChrFilterSize,
2230 uint8_t *formatConvBuffer, uint32_t *pal)
2232 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2234 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2235 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
2236 src1= formatConvBuffer;
2238 } else if (c->readChrPlanar) {
2239 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2240 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2241 src1= formatConvBuffer;
2245 if (!c->hcscale_fast) {
2246 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2247 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2248 } else { // fast bilinear upscale / crap downscale
2249 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2252 if (c->chrConvertRange)
2253 c->chrConvertRange(dst1, dst2, dstWidth);
2256 static av_always_inline void
2257 find_c_packed_planar_out_funcs(SwsContext *c,
2258 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2259 yuv2interleavedX_fn *yuv2nv12cX,
2260 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2261 yuv2packedX_fn *yuv2packedX)
2263 enum PixelFormat dstFormat = c->dstFormat;
2265 if (is16BPS(dstFormat)) {
2266 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2267 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2268 } else if (is9_OR_10BPS(dstFormat)) {
2269 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2270 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2271 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2273 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2274 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2277 *yuv2plane1 = yuv2plane1_8_c;
2278 *yuv2planeX = yuv2planeX_8_c;
2279 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2280 *yuv2nv12cX = yuv2nv12cX_c;
2283 if(c->flags & SWS_FULL_CHR_H_INT) {
2284 switch (dstFormat) {
2287 *yuv2packedX = yuv2rgba32_full_X_c;
2289 #if CONFIG_SWSCALE_ALPHA
2291 *yuv2packedX = yuv2rgba32_full_X_c;
2293 #endif /* CONFIG_SWSCALE_ALPHA */
2295 *yuv2packedX = yuv2rgbx32_full_X_c;
2297 #endif /* !CONFIG_SMALL */
2301 *yuv2packedX = yuv2argb32_full_X_c;
2303 #if CONFIG_SWSCALE_ALPHA
2305 *yuv2packedX = yuv2argb32_full_X_c;
2307 #endif /* CONFIG_SWSCALE_ALPHA */
2309 *yuv2packedX = yuv2xrgb32_full_X_c;
2311 #endif /* !CONFIG_SMALL */
2315 *yuv2packedX = yuv2bgra32_full_X_c;
2317 #if CONFIG_SWSCALE_ALPHA
2319 *yuv2packedX = yuv2bgra32_full_X_c;
2321 #endif /* CONFIG_SWSCALE_ALPHA */
2323 *yuv2packedX = yuv2bgrx32_full_X_c;
2325 #endif /* !CONFIG_SMALL */
2329 *yuv2packedX = yuv2abgr32_full_X_c;
2331 #if CONFIG_SWSCALE_ALPHA
2333 *yuv2packedX = yuv2abgr32_full_X_c;
2335 #endif /* CONFIG_SWSCALE_ALPHA */
2337 *yuv2packedX = yuv2xbgr32_full_X_c;
2339 #endif /* !CONFIG_SMALL */
2342 *yuv2packedX = yuv2rgb24_full_X_c;
2345 *yuv2packedX = yuv2bgr24_full_X_c;
2352 switch (dstFormat) {
2353 case PIX_FMT_RGB48LE:
2354 *yuv2packed1 = yuv2rgb48le_1_c;
2355 *yuv2packed2 = yuv2rgb48le_2_c;
2356 *yuv2packedX = yuv2rgb48le_X_c;
2358 case PIX_FMT_RGB48BE:
2359 *yuv2packed1 = yuv2rgb48be_1_c;
2360 *yuv2packed2 = yuv2rgb48be_2_c;
2361 *yuv2packedX = yuv2rgb48be_X_c;
2363 case PIX_FMT_BGR48LE:
2364 *yuv2packed1 = yuv2bgr48le_1_c;
2365 *yuv2packed2 = yuv2bgr48le_2_c;
2366 *yuv2packedX = yuv2bgr48le_X_c;
2368 case PIX_FMT_BGR48BE:
2369 *yuv2packed1 = yuv2bgr48be_1_c;
2370 *yuv2packed2 = yuv2bgr48be_2_c;
2371 *yuv2packedX = yuv2bgr48be_X_c;
2376 *yuv2packed1 = yuv2rgb32_1_c;
2377 *yuv2packed2 = yuv2rgb32_2_c;
2378 *yuv2packedX = yuv2rgb32_X_c;
2380 #if CONFIG_SWSCALE_ALPHA
2382 *yuv2packed1 = yuv2rgba32_1_c;
2383 *yuv2packed2 = yuv2rgba32_2_c;
2384 *yuv2packedX = yuv2rgba32_X_c;
2386 #endif /* CONFIG_SWSCALE_ALPHA */
2388 *yuv2packed1 = yuv2rgbx32_1_c;
2389 *yuv2packed2 = yuv2rgbx32_2_c;
2390 *yuv2packedX = yuv2rgbx32_X_c;
2392 #endif /* !CONFIG_SMALL */
2394 case PIX_FMT_RGB32_1:
2395 case PIX_FMT_BGR32_1:
2397 *yuv2packed1 = yuv2rgb32_1_1_c;
2398 *yuv2packed2 = yuv2rgb32_1_2_c;
2399 *yuv2packedX = yuv2rgb32_1_X_c;
2401 #if CONFIG_SWSCALE_ALPHA
2403 *yuv2packed1 = yuv2rgba32_1_1_c;
2404 *yuv2packed2 = yuv2rgba32_1_2_c;
2405 *yuv2packedX = yuv2rgba32_1_X_c;
2407 #endif /* CONFIG_SWSCALE_ALPHA */
2409 *yuv2packed1 = yuv2rgbx32_1_1_c;
2410 *yuv2packed2 = yuv2rgbx32_1_2_c;
2411 *yuv2packedX = yuv2rgbx32_1_X_c;
2413 #endif /* !CONFIG_SMALL */
2416 *yuv2packed1 = yuv2rgb24_1_c;
2417 *yuv2packed2 = yuv2rgb24_2_c;
2418 *yuv2packedX = yuv2rgb24_X_c;
2421 *yuv2packed1 = yuv2bgr24_1_c;
2422 *yuv2packed2 = yuv2bgr24_2_c;
2423 *yuv2packedX = yuv2bgr24_X_c;
2425 case PIX_FMT_RGB565LE:
2426 case PIX_FMT_RGB565BE:
2427 case PIX_FMT_BGR565LE:
2428 case PIX_FMT_BGR565BE:
2429 *yuv2packed1 = yuv2rgb16_1_c;
2430 *yuv2packed2 = yuv2rgb16_2_c;
2431 *yuv2packedX = yuv2rgb16_X_c;
2433 case PIX_FMT_RGB555LE:
2434 case PIX_FMT_RGB555BE:
2435 case PIX_FMT_BGR555LE:
2436 case PIX_FMT_BGR555BE:
2437 *yuv2packed1 = yuv2rgb15_1_c;
2438 *yuv2packed2 = yuv2rgb15_2_c;
2439 *yuv2packedX = yuv2rgb15_X_c;
2441 case PIX_FMT_RGB444LE:
2442 case PIX_FMT_RGB444BE:
2443 case PIX_FMT_BGR444LE:
2444 case PIX_FMT_BGR444BE:
2445 *yuv2packed1 = yuv2rgb12_1_c;
2446 *yuv2packed2 = yuv2rgb12_2_c;
2447 *yuv2packedX = yuv2rgb12_X_c;
2451 *yuv2packed1 = yuv2rgb8_1_c;
2452 *yuv2packed2 = yuv2rgb8_2_c;
2453 *yuv2packedX = yuv2rgb8_X_c;
2457 *yuv2packed1 = yuv2rgb4_1_c;
2458 *yuv2packed2 = yuv2rgb4_2_c;
2459 *yuv2packedX = yuv2rgb4_X_c;
2461 case PIX_FMT_RGB4_BYTE:
2462 case PIX_FMT_BGR4_BYTE:
2463 *yuv2packed1 = yuv2rgb4b_1_c;
2464 *yuv2packed2 = yuv2rgb4b_2_c;
2465 *yuv2packedX = yuv2rgb4b_X_c;
2469 switch (dstFormat) {
2470 case PIX_FMT_GRAY16BE:
2471 *yuv2packed1 = yuv2gray16BE_1_c;
2472 *yuv2packed2 = yuv2gray16BE_2_c;
2473 *yuv2packedX = yuv2gray16BE_X_c;
2475 case PIX_FMT_GRAY16LE:
2476 *yuv2packed1 = yuv2gray16LE_1_c;
2477 *yuv2packed2 = yuv2gray16LE_2_c;
2478 *yuv2packedX = yuv2gray16LE_X_c;
2480 case PIX_FMT_MONOWHITE:
2481 *yuv2packed1 = yuv2monowhite_1_c;
2482 *yuv2packed2 = yuv2monowhite_2_c;
2483 *yuv2packedX = yuv2monowhite_X_c;
2485 case PIX_FMT_MONOBLACK:
2486 *yuv2packed1 = yuv2monoblack_1_c;
2487 *yuv2packed2 = yuv2monoblack_2_c;
2488 *yuv2packedX = yuv2monoblack_X_c;
2490 case PIX_FMT_YUYV422:
2491 *yuv2packed1 = yuv2yuyv422_1_c;
2492 *yuv2packed2 = yuv2yuyv422_2_c;
2493 *yuv2packedX = yuv2yuyv422_X_c;
2495 case PIX_FMT_UYVY422:
2496 *yuv2packed1 = yuv2uyvy422_1_c;
2497 *yuv2packed2 = yuv2uyvy422_2_c;
2498 *yuv2packedX = yuv2uyvy422_X_c;
2503 #define DEBUG_SWSCALE_BUFFERS 0
2504 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2506 static int swScale(SwsContext *c, const uint8_t* src[],
2507 int srcStride[], int srcSliceY,
2508 int srcSliceH, uint8_t* dst[], int dstStride[])
2510 /* load a few things into local vars to make the code more readable? and faster */
2511 const int srcW= c->srcW;
2512 const int dstW= c->dstW;
2513 const int dstH= c->dstH;
2514 const int chrDstW= c->chrDstW;
2515 const int chrSrcW= c->chrSrcW;
2516 const int lumXInc= c->lumXInc;
2517 const int chrXInc= c->chrXInc;
2518 const enum PixelFormat dstFormat= c->dstFormat;
2519 const int flags= c->flags;
2520 int16_t *vLumFilterPos= c->vLumFilterPos;
2521 int16_t *vChrFilterPos= c->vChrFilterPos;
2522 int16_t *hLumFilterPos= c->hLumFilterPos;
2523 int16_t *hChrFilterPos= c->hChrFilterPos;
2524 int16_t *hLumFilter= c->hLumFilter;
2525 int16_t *hChrFilter= c->hChrFilter;
2526 int32_t *lumMmxFilter= c->lumMmxFilter;
2527 int32_t *chrMmxFilter= c->chrMmxFilter;
2528 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2529 const int vLumFilterSize= c->vLumFilterSize;
2530 const int vChrFilterSize= c->vChrFilterSize;
2531 const int hLumFilterSize= c->hLumFilterSize;
2532 const int hChrFilterSize= c->hChrFilterSize;
2533 int16_t **lumPixBuf= c->lumPixBuf;
2534 int16_t **chrUPixBuf= c->chrUPixBuf;
2535 int16_t **chrVPixBuf= c->chrVPixBuf;
2536 int16_t **alpPixBuf= c->alpPixBuf;
2537 const int vLumBufSize= c->vLumBufSize;
2538 const int vChrBufSize= c->vChrBufSize;
2539 uint8_t *formatConvBuffer= c->formatConvBuffer;
2540 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2541 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2543 uint32_t *pal=c->pal_yuv;
2544 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2546 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2547 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2548 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2549 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2550 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2551 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2553 /* vars which will change and which we need to store back in the context */
2555 int lumBufIndex= c->lumBufIndex;
2556 int chrBufIndex= c->chrBufIndex;
2557 int lastInLumBuf= c->lastInLumBuf;
2558 int lastInChrBuf= c->lastInChrBuf;
2560 if (isPacked(c->srcFormat)) {
2568 srcStride[3]= srcStride[0];
2570 srcStride[1]<<= c->vChrDrop;
2571 srcStride[2]<<= c->vChrDrop;
2573 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2574 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2575 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2576 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2577 srcSliceY, srcSliceH, dstY, dstH);
2578 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2579 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2581 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
2582 static int warnedAlready=0; //FIXME move this into the context perhaps
2583 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2584 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2585 " ->cannot do aligned memory accesses anymore\n");
2590 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
2591 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
2592 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
2594 static int warnedAlready=0;
2595 int cpu_flags = av_get_cpu_flags();
2596 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
2597 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
2602 /* Note the user might start scaling the picture in the middle so this
2603 will not get executed. This is not really intended but works
2604 currently, so people might do it. */
2605 if (srcSliceY ==0) {
2613 if (!should_dither) {
2614 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2618 for (;dstY < dstH; dstY++) {
2619 const int chrDstY= dstY>>c->chrDstVSubSample;
2620 uint8_t *dest[4] = {
2621 dst[0] + dstStride[0] * dstY,
2622 dst[1] + dstStride[1] * chrDstY,
2623 dst[2] + dstStride[2] * chrDstY,
2624 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2626 int use_mmx_vfilter= c->use_mmx_vfilter;
2628 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2629 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2630 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2631 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2632 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2633 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2636 //handle holes (FAST_BILINEAR & weird filters)
2637 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2638 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2639 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2640 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2642 DEBUG_BUFFERS("dstY: %d\n", dstY);
2643 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2644 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2645 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2646 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2648 // Do we have enough lines in this slice to output the dstY line
2649 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2651 if (!enough_lines) {
2652 lastLumSrcY = srcSliceY + srcSliceH - 1;
2653 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2654 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2655 lastLumSrcY, lastChrSrcY);
2658 //Do horizontal scaling
2659 while(lastInLumBuf < lastLumSrcY) {
2660 const uint8_t *src1[4] = {
2661 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2662 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2663 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2664 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2667 assert(lumBufIndex < 2*vLumBufSize);
2668 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2669 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2670 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2671 hLumFilter, hLumFilterPos, hLumFilterSize,
2674 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2675 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2676 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2680 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2681 lumBufIndex, lastInLumBuf);
2683 while(lastInChrBuf < lastChrSrcY) {
2684 const uint8_t *src1[4] = {
2685 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2686 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2687 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2688 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2691 assert(chrBufIndex < 2*vChrBufSize);
2692 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2693 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2694 //FIXME replace parameters through context struct (some at least)
2696 if (c->needs_hcscale)
2697 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2698 chrDstW, src1, chrSrcW, chrXInc,
2699 hChrFilter, hChrFilterPos, hChrFilterSize,
2700 formatConvBuffer, pal);
2702 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2703 chrBufIndex, lastInChrBuf);
2705 //wrap buf index around to stay inside the ring buffer
2706 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2707 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2709 break; //we can't output a dstY line so let's try with the next slice
2712 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2714 if (should_dither) {
2715 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2716 c->lumDither8 = dither_8x8_128[dstY & 7];
2718 if (dstY >= dstH-2) {
2719 // hmm looks like we can't use MMX here without overwriting this array's tail
2720 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2721 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2726 const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2727 const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2728 const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2729 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2730 int16_t *vLumFilter= c->vLumFilter;
2731 int16_t *vChrFilter= c->vChrFilter;
2733 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2734 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2736 vLumFilter += dstY * vLumFilterSize;
2737 vChrFilter += chrDstY * vChrFilterSize;
2739 av_assert0(use_mmx_vfilter != (
2740 yuv2planeX == yuv2planeX_10BE_c
2741 || yuv2planeX == yuv2planeX_10LE_c
2742 || yuv2planeX == yuv2planeX_9BE_c
2743 || yuv2planeX == yuv2planeX_9LE_c
2744 || yuv2planeX == yuv2planeX_16BE_c
2745 || yuv2planeX == yuv2planeX_16LE_c
2746 || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
2748 if(use_mmx_vfilter){
2749 vLumFilter= c->lumMmxFilter;
2750 vChrFilter= c->chrMmxFilter;
2753 if (vLumFilterSize == 1) {
2754 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2756 yuv2planeX(vLumFilter, vLumFilterSize,
2757 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2760 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2762 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2763 } else if (vChrFilterSize == 1) {
2764 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2765 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2767 yuv2planeX(vChrFilter, vChrFilterSize,
2768 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2769 yuv2planeX(vChrFilter, vChrFilterSize,
2770 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
2774 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2775 if(use_mmx_vfilter){
2776 vLumFilter= c->alpMmxFilter;
2778 if (vLumFilterSize == 1) {
2779 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2781 yuv2planeX(vLumFilter, vLumFilterSize,
2782 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2786 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2787 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2788 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2789 int chrAlpha = vChrFilter[2 * dstY + 1];
2790 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2791 alpPixBuf ? *alpSrcPtr : NULL,
2792 dest[0], dstW, chrAlpha, dstY);
2793 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2794 int lumAlpha = vLumFilter[2 * dstY + 1];
2795 int chrAlpha = vChrFilter[2 * dstY + 1];
2797 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2799 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2800 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2801 alpPixBuf ? alpSrcPtr : NULL,
2802 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2803 } else { //general RGB
2804 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2805 lumSrcPtr, vLumFilterSize,
2806 vChrFilter + dstY * vChrFilterSize,
2807 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2808 alpSrcPtr, dest[0], dstW, dstY);
2814 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2815 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2818 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2819 __asm__ volatile("sfence":::"memory");
2823 /* store changed local vars back in the context */
2825 c->lumBufIndex= lumBufIndex;
2826 c->chrBufIndex= chrBufIndex;
2827 c->lastInLumBuf= lastInLumBuf;
2828 c->lastInChrBuf= lastInChrBuf;
2830 return dstY - lastDstY;
2833 static av_cold void sws_init_swScale_c(SwsContext *c)
2835 enum PixelFormat srcFormat = c->srcFormat;
2837 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2838 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2841 c->chrToYV12 = NULL;
2843 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2844 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2845 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2846 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2850 case PIX_FMT_BGR4_BYTE:
2851 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2852 case PIX_FMT_GBRP9LE:
2853 case PIX_FMT_GBRP10LE:
2854 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
2855 case PIX_FMT_GBRP9BE:
2856 case PIX_FMT_GBRP10BE:
2857 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
2858 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2860 case PIX_FMT_YUV444P9LE:
2861 case PIX_FMT_YUV422P9LE:
2862 case PIX_FMT_YUV420P9LE:
2863 case PIX_FMT_YUV422P10LE:
2864 case PIX_FMT_YUV420P10LE:
2865 case PIX_FMT_YUV444P10LE:
2866 case PIX_FMT_YUV420P16LE:
2867 case PIX_FMT_YUV422P16LE:
2868 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2870 case PIX_FMT_YUV444P9BE:
2871 case PIX_FMT_YUV422P9BE:
2872 case PIX_FMT_YUV420P9BE:
2873 case PIX_FMT_YUV444P10BE:
2874 case PIX_FMT_YUV422P10BE:
2875 case PIX_FMT_YUV420P10BE:
2876 case PIX_FMT_YUV420P16BE:
2877 case PIX_FMT_YUV422P16BE:
2878 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2881 if (c->chrSrcHSubSample) {
2883 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2884 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2885 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2886 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2887 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2888 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2889 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2890 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2891 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2892 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2893 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2894 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
2895 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
2896 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2897 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2898 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2899 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2900 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2901 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2902 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2903 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
2904 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
2905 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
2909 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2910 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2911 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2912 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2913 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2914 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2915 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2916 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2917 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2918 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2919 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2920 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
2921 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
2922 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2923 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2924 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2925 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2926 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2927 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2928 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2929 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
2930 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
2934 c->lumToYV12 = NULL;
2935 c->alpToYV12 = NULL;
2936 switch (srcFormat) {
2937 case PIX_FMT_GBRP9LE:
2938 case PIX_FMT_GBRP10LE:
2939 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2940 case PIX_FMT_GBRP9BE:
2941 case PIX_FMT_GBRP10BE:
2942 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2943 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2945 case PIX_FMT_YUV444P9LE:
2946 case PIX_FMT_YUV422P9LE:
2947 case PIX_FMT_YUV420P9LE:
2948 case PIX_FMT_YUV422P10LE:
2949 case PIX_FMT_YUV420P10LE:
2950 case PIX_FMT_YUV444P10LE:
2951 case PIX_FMT_YUV420P16LE:
2952 case PIX_FMT_YUV422P16LE:
2953 case PIX_FMT_YUV444P16LE:
2954 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2956 case PIX_FMT_YUV444P9BE:
2957 case PIX_FMT_YUV422P9BE:
2958 case PIX_FMT_YUV420P9BE:
2959 case PIX_FMT_YUV444P10BE:
2960 case PIX_FMT_YUV422P10BE:
2961 case PIX_FMT_YUV420P10BE:
2962 case PIX_FMT_YUV420P16BE:
2963 case PIX_FMT_YUV422P16BE:
2964 case PIX_FMT_YUV444P16BE:
2965 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2967 case PIX_FMT_YUYV422 :
2968 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2969 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2970 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2971 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2972 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2973 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2974 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2975 case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
2976 case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
2977 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2978 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2979 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2980 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2981 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2982 case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
2983 case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
2987 case PIX_FMT_BGR4_BYTE:
2988 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2989 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2990 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2991 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2992 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2993 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2994 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2995 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2996 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2997 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2998 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
3001 switch (srcFormat) {
3003 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
3005 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
3006 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
3007 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
3012 if (c->srcBpc == 8) {
3013 if (c->dstBpc <= 10) {
3014 c->hyScale = c->hcScale = hScale8To15_c;
3015 if (c->flags & SWS_FAST_BILINEAR) {
3016 c->hyscale_fast = hyscale_fast_c;
3017 c->hcscale_fast = hcscale_fast_c;
3020 c->hyScale = c->hcScale = hScale8To19_c;
3023 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
3026 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
3027 if (c->dstBpc <= 10) {
3029 c->lumConvertRange = lumRangeFromJpeg_c;
3030 c->chrConvertRange = chrRangeFromJpeg_c;
3032 c->lumConvertRange = lumRangeToJpeg_c;
3033 c->chrConvertRange = chrRangeToJpeg_c;
3037 c->lumConvertRange = lumRangeFromJpeg16_c;
3038 c->chrConvertRange = chrRangeFromJpeg16_c;
3040 c->lumConvertRange = lumRangeToJpeg16_c;
3041 c->chrConvertRange = chrRangeToJpeg16_c;
3046 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
3047 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
3048 c->needs_hcscale = 1;
3051 SwsFunc ff_getSwsFunc(SwsContext *c)
3053 sws_init_swScale_c(c);
3056 ff_sws_init_swScale_mmx(c);
3058 ff_sws_init_swScale_altivec(c);