2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
39 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
42 more intelligent misalignment avoidance for the horizontal scaler
43 write special vertical cubic upscale version
44 optimize C code (YV12 / minmax)
45 add support for packed pixel YUV input & output
46 add support for Y8 output
47 optimize BGR24 & BGR32
48 add BGR4 output support
49 write special BGR->BGR scaler
52 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
53 { 1, 3, 1, 3, 1, 3, 1, 3, },
54 { 2, 0, 2, 0, 2, 0, 2, 0, },
57 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
58 { 6, 2, 6, 2, 6, 2, 6, 2, },
59 { 0, 4, 0, 4, 0, 4, 0, 4, },
62 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
63 { 8, 4, 11, 7, 8, 4, 11, 7, },
64 { 2, 14, 1, 13, 2, 14, 1, 13, },
65 { 10, 6, 9, 5, 10, 6, 9, 5, },
66 { 0, 12, 3, 15, 0, 12, 3, 15, },
69 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
70 { 17, 9, 23, 15, 16, 8, 22, 14, },
71 { 5, 29, 3, 27, 4, 28, 2, 26, },
72 { 21, 13, 19, 11, 20, 12, 18, 10, },
73 { 0, 24, 6, 30, 1, 25, 7, 31, },
74 { 16, 8, 22, 14, 17, 9, 23, 15, },
75 { 4, 28, 2, 26, 5, 29, 3, 27, },
76 { 20, 12, 18, 10, 21, 13, 19, 11, },
77 { 1, 25, 7, 31, 0, 24, 6, 30, },
80 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
81 { 0, 55, 14, 68, 3, 58, 17, 72, },
82 { 37, 18, 50, 32, 40, 22, 54, 35, },
83 { 9, 64, 5, 59, 13, 67, 8, 63, },
84 { 46, 27, 41, 23, 49, 31, 44, 26, },
85 { 2, 57, 16, 71, 1, 56, 15, 70, },
86 { 39, 21, 52, 34, 38, 19, 51, 33, },
87 { 11, 66, 7, 62, 10, 65, 6, 60, },
88 { 48, 30, 43, 25, 47, 29, 42, 24, },
92 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
93 {117, 62, 158, 103, 113, 58, 155, 100, },
94 { 34, 199, 21, 186, 31, 196, 17, 182, },
95 {144, 89, 131, 76, 141, 86, 127, 72, },
96 { 0, 165, 41, 206, 10, 175, 52, 217, },
97 {110, 55, 151, 96, 120, 65, 162, 107, },
98 { 28, 193, 14, 179, 38, 203, 24, 189, },
99 {138, 83, 124, 69, 148, 93, 134, 79, },
100 { 7, 172, 48, 213, 3, 168, 45, 210, },
103 // tries to correct a gamma of 1.5
104 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
105 { 0, 143, 18, 200, 2, 156, 25, 215, },
106 { 78, 28, 125, 64, 89, 36, 138, 74, },
107 { 10, 180, 3, 161, 16, 195, 8, 175, },
108 {109, 51, 93, 38, 121, 60, 105, 47, },
109 { 1, 152, 23, 210, 0, 147, 20, 205, },
110 { 85, 33, 134, 71, 81, 30, 130, 67, },
111 { 14, 190, 6, 171, 12, 185, 5, 166, },
112 {117, 57, 101, 44, 113, 54, 97, 41, },
115 // tries to correct a gamma of 2.0
116 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
117 { 0, 124, 8, 193, 0, 140, 12, 213, },
118 { 55, 14, 104, 42, 66, 19, 119, 52, },
119 { 3, 168, 1, 145, 6, 187, 3, 162, },
120 { 86, 31, 70, 21, 99, 39, 82, 28, },
121 { 0, 134, 11, 206, 0, 129, 9, 200, },
122 { 62, 17, 114, 48, 58, 16, 109, 45, },
123 { 5, 181, 2, 157, 4, 175, 1, 151, },
124 { 95, 36, 78, 26, 90, 34, 74, 24, },
127 // tries to correct a gamma of 2.5
128 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
129 { 0, 107, 3, 187, 0, 125, 6, 212, },
130 { 39, 7, 86, 28, 49, 11, 102, 36, },
131 { 1, 158, 0, 131, 3, 180, 1, 151, },
132 { 68, 19, 52, 12, 81, 25, 64, 17, },
133 { 0, 119, 5, 203, 0, 113, 4, 195, },
134 { 45, 9, 96, 33, 42, 8, 91, 30, },
135 { 2, 172, 1, 144, 2, 165, 0, 137, },
136 { 77, 23, 60, 15, 72, 21, 56, 14, },
139 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
140 { 36, 68, 60, 92, 34, 66, 58, 90,},
141 { 100, 4,124, 28, 98, 2,122, 26,},
142 { 52, 84, 44, 76, 50, 82, 42, 74,},
143 { 116, 20,108, 12,114, 18,106, 10,},
144 { 32, 64, 56, 88, 38, 70, 62, 94,},
145 { 96, 0,120, 24,102, 6,126, 30,},
146 { 48, 80, 40, 72, 54, 86, 46, 78,},
147 { 112, 16,104, 8,118, 22,110, 14,},
149 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
150 { 64, 64, 64, 64, 64, 64, 64, 64 };
152 #define output_pixel(pos, val, bias, signedness) \
154 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
156 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
159 static av_always_inline void
160 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
161 int big_endian, int output_bits)
164 int shift = 19 - output_bits;
166 for (i = 0; i < dstW; i++) {
167 int val = src[i] + (1 << (shift - 1));
168 output_pixel(&dest[i], val, 0, uint);
172 static av_always_inline void
173 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
174 const int32_t **src, uint16_t *dest, int dstW,
175 int big_endian, int output_bits)
178 int shift = 15 + 16 - output_bits;
180 for (i = 0; i < dstW; i++) {
181 int val = 1 << (30-output_bits);
184 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
185 * filters (or anything with negative coeffs, the range can be slightly
186 * wider in both directions. To account for this overflow, we subtract
187 * a constant so it always fits in the signed range (assuming a
188 * reasonable filterSize), and re-add that at the end. */
190 for (j = 0; j < filterSize; j++)
191 val += src[j][i] * filter[j];
193 output_pixel(&dest[i], val, 0x8000, int);
199 #define output_pixel(pos, val) \
201 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
203 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
206 static av_always_inline void
207 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
208 int big_endian, int output_bits)
211 int shift = 15 - output_bits;
213 for (i = 0; i < dstW; i++) {
214 int val = src[i] + (1 << (shift - 1));
215 output_pixel(&dest[i], val);
219 static av_always_inline void
220 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
221 const int16_t **src, uint16_t *dest, int dstW,
222 int big_endian, int output_bits)
225 int shift = 11 + 16 - output_bits;
227 for (i = 0; i < dstW; i++) {
228 int val = 1 << (26-output_bits);
231 for (j = 0; j < filterSize; j++)
232 val += src[j][i] * filter[j];
234 output_pixel(&dest[i], val);
240 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
241 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
242 uint8_t *dest, int dstW, \
243 const uint8_t *dither, int offset)\
245 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
246 (uint16_t *) dest, dstW, is_be, bits); \
248 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
249 const int16_t **src, uint8_t *dest, int dstW, \
250 const uint8_t *dither, int offset)\
252 yuv2planeX_## template_size ## _c_template(filter, \
253 filterSize, (const typeX_t **) src, \
254 (uint16_t *) dest, dstW, is_be, bits); \
256 yuv2NBPS( 9, BE, 1, 10, int16_t)
257 yuv2NBPS( 9, LE, 0, 10, int16_t)
258 yuv2NBPS(10, BE, 1, 10, int16_t)
259 yuv2NBPS(10, LE, 0, 10, int16_t)
260 yuv2NBPS(16, BE, 1, 16, int32_t)
261 yuv2NBPS(16, LE, 0, 16, int32_t)
263 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
264 const int16_t **src, uint8_t *dest, int dstW,
265 const uint8_t *dither, int offset)
268 for (i=0; i<dstW; i++) {
269 int val = dither[(i + offset) & 7] << 12;
271 for (j=0; j<filterSize; j++)
272 val += src[j][i] * filter[j];
274 dest[i]= av_clip_uint8(val>>19);
278 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
279 const uint8_t *dither, int offset)
282 for (i=0; i<dstW; i++) {
283 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
284 dest[i]= av_clip_uint8(val);
288 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
289 const int16_t **chrUSrc, const int16_t **chrVSrc,
290 uint8_t *dest, int chrDstW)
292 enum PixelFormat dstFormat = c->dstFormat;
293 const uint8_t *chrDither = c->chrDither8;
296 if (dstFormat == PIX_FMT_NV12)
297 for (i=0; i<chrDstW; i++) {
298 int u = chrDither[i & 7] << 12;
299 int v = chrDither[(i + 3) & 7] << 12;
301 for (j=0; j<chrFilterSize; j++) {
302 u += chrUSrc[j][i] * chrFilter[j];
303 v += chrVSrc[j][i] * chrFilter[j];
306 dest[2*i]= av_clip_uint8(u>>19);
307 dest[2*i+1]= av_clip_uint8(v>>19);
310 for (i=0; i<chrDstW; i++) {
311 int u = chrDither[i & 7] << 12;
312 int v = chrDither[(i + 3) & 7] << 12;
314 for (j=0; j<chrFilterSize; j++) {
315 u += chrUSrc[j][i] * chrFilter[j];
316 v += chrVSrc[j][i] * chrFilter[j];
319 dest[2*i]= av_clip_uint8(v>>19);
320 dest[2*i+1]= av_clip_uint8(u>>19);
324 #define output_pixel(pos, val) \
325 if (target == PIX_FMT_GRAY16BE) { \
331 static av_always_inline void
332 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
333 const int32_t **lumSrc, int lumFilterSize,
334 const int16_t *chrFilter, const int32_t **chrUSrc,
335 const int32_t **chrVSrc, int chrFilterSize,
336 const int32_t **alpSrc, uint16_t *dest, int dstW,
337 int y, enum PixelFormat target)
341 for (i = 0; i < (dstW >> 1); i++) {
343 int Y1 = (1 << 14) - 0x40000000;
344 int Y2 = (1 << 14) - 0x40000000;
346 for (j = 0; j < lumFilterSize; j++) {
347 Y1 += lumSrc[j][i * 2] * lumFilter[j];
348 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
352 Y1 = av_clip_int16(Y1);
353 Y2 = av_clip_int16(Y2);
354 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
355 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
359 static av_always_inline void
360 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
361 const int32_t *ubuf[2], const int32_t *vbuf[2],
362 const int32_t *abuf[2], uint16_t *dest, int dstW,
363 int yalpha, int uvalpha, int y,
364 enum PixelFormat target)
366 int yalpha1 = 4095 - yalpha;
368 const int32_t *buf0 = buf[0], *buf1 = buf[1];
370 for (i = 0; i < (dstW >> 1); i++) {
371 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
372 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
374 output_pixel(&dest[i * 2 + 0], Y1);
375 output_pixel(&dest[i * 2 + 1], Y2);
379 static av_always_inline void
380 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
381 const int32_t *ubuf[2], const int32_t *vbuf[2],
382 const int32_t *abuf0, uint16_t *dest, int dstW,
383 int uvalpha, int y, enum PixelFormat target)
387 for (i = 0; i < (dstW >> 1); i++) {
388 int Y1 = buf0[i * 2 ] << 1;
389 int Y2 = buf0[i * 2 + 1] << 1;
391 output_pixel(&dest[i * 2 + 0], Y1);
392 output_pixel(&dest[i * 2 + 1], Y2);
398 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
399 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
400 const int16_t **_lumSrc, int lumFilterSize, \
401 const int16_t *chrFilter, const int16_t **_chrUSrc, \
402 const int16_t **_chrVSrc, int chrFilterSize, \
403 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
406 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
407 **chrUSrc = (const int32_t **) _chrUSrc, \
408 **chrVSrc = (const int32_t **) _chrVSrc, \
409 **alpSrc = (const int32_t **) _alpSrc; \
410 uint16_t *dest = (uint16_t *) _dest; \
411 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
412 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
413 alpSrc, dest, dstW, y, fmt); \
416 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
417 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
418 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
419 int yalpha, int uvalpha, int y) \
421 const int32_t **buf = (const int32_t **) _buf, \
422 **ubuf = (const int32_t **) _ubuf, \
423 **vbuf = (const int32_t **) _vbuf, \
424 **abuf = (const int32_t **) _abuf; \
425 uint16_t *dest = (uint16_t *) _dest; \
426 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
427 dest, dstW, yalpha, uvalpha, y, fmt); \
430 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
431 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
432 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
433 int uvalpha, int y) \
435 const int32_t *buf0 = (const int32_t *) _buf0, \
436 **ubuf = (const int32_t **) _ubuf, \
437 **vbuf = (const int32_t **) _vbuf, \
438 *abuf0 = (const int32_t *) _abuf0; \
439 uint16_t *dest = (uint16_t *) _dest; \
440 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
441 dstW, uvalpha, y, fmt); \
444 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
445 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
447 #define output_pixel(pos, acc) \
448 if (target == PIX_FMT_MONOBLACK) { \
454 static av_always_inline void
455 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
456 const int16_t **lumSrc, int lumFilterSize,
457 const int16_t *chrFilter, const int16_t **chrUSrc,
458 const int16_t **chrVSrc, int chrFilterSize,
459 const int16_t **alpSrc, uint8_t *dest, int dstW,
460 int y, enum PixelFormat target)
462 const uint8_t * const d128=dither_8x8_220[y&7];
463 uint8_t *g = c->table_gU[128] + c->table_gV[128];
467 for (i = 0; i < dstW - 1; i += 2) {
472 for (j = 0; j < lumFilterSize; j++) {
473 Y1 += lumSrc[j][i] * lumFilter[j];
474 Y2 += lumSrc[j][i+1] * lumFilter[j];
478 if ((Y1 | Y2) & 0x100) {
479 Y1 = av_clip_uint8(Y1);
480 Y2 = av_clip_uint8(Y2);
482 acc += acc + g[Y1 + d128[(i + 0) & 7]];
483 acc += acc + g[Y2 + d128[(i + 1) & 7]];
485 output_pixel(*dest++, acc);
490 static av_always_inline void
491 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
492 const int16_t *ubuf[2], const int16_t *vbuf[2],
493 const int16_t *abuf[2], uint8_t *dest, int dstW,
494 int yalpha, int uvalpha, int y,
495 enum PixelFormat target)
497 const int16_t *buf0 = buf[0], *buf1 = buf[1];
498 const uint8_t * const d128 = dither_8x8_220[y & 7];
499 uint8_t *g = c->table_gU[128] + c->table_gV[128];
500 int yalpha1 = 4095 - yalpha;
503 for (i = 0; i < dstW - 7; i += 8) {
504 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
505 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
506 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
507 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
508 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
509 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
510 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
511 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
512 output_pixel(*dest++, acc);
516 static av_always_inline void
517 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
518 const int16_t *ubuf[2], const int16_t *vbuf[2],
519 const int16_t *abuf0, uint8_t *dest, int dstW,
520 int uvalpha, int y, enum PixelFormat target)
522 const uint8_t * const d128 = dither_8x8_220[y & 7];
523 uint8_t *g = c->table_gU[128] + c->table_gV[128];
526 for (i = 0; i < dstW - 7; i += 8) {
527 int acc = g[(buf0[i ] >> 7) + d128[0]];
528 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
529 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
530 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
531 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
532 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
533 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
534 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
535 output_pixel(*dest++, acc);
541 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
542 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
543 const int16_t **lumSrc, int lumFilterSize, \
544 const int16_t *chrFilter, const int16_t **chrUSrc, \
545 const int16_t **chrVSrc, int chrFilterSize, \
546 const int16_t **alpSrc, uint8_t *dest, int dstW, \
549 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
550 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
551 alpSrc, dest, dstW, y, fmt); \
554 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
555 const int16_t *ubuf[2], const int16_t *vbuf[2], \
556 const int16_t *abuf[2], uint8_t *dest, int dstW, \
557 int yalpha, int uvalpha, int y) \
559 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
560 dest, dstW, yalpha, uvalpha, y, fmt); \
563 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
564 const int16_t *ubuf[2], const int16_t *vbuf[2], \
565 const int16_t *abuf0, uint8_t *dest, int dstW, \
566 int uvalpha, int y) \
568 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
569 abuf0, dest, dstW, uvalpha, \
573 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
574 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
576 #define output_pixels(pos, Y1, U, Y2, V) \
577 if (target == PIX_FMT_YUYV422) { \
578 dest[pos + 0] = Y1; \
580 dest[pos + 2] = Y2; \
584 dest[pos + 1] = Y1; \
586 dest[pos + 3] = Y2; \
589 static av_always_inline void
590 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
591 const int16_t **lumSrc, int lumFilterSize,
592 const int16_t *chrFilter, const int16_t **chrUSrc,
593 const int16_t **chrVSrc, int chrFilterSize,
594 const int16_t **alpSrc, uint8_t *dest, int dstW,
595 int y, enum PixelFormat target)
599 for (i = 0; i < (dstW >> 1); i++) {
606 for (j = 0; j < lumFilterSize; j++) {
607 Y1 += lumSrc[j][i * 2] * lumFilter[j];
608 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
610 for (j = 0; j < chrFilterSize; j++) {
611 U += chrUSrc[j][i] * chrFilter[j];
612 V += chrVSrc[j][i] * chrFilter[j];
618 if ((Y1 | Y2 | U | V) & 0x100) {
619 Y1 = av_clip_uint8(Y1);
620 Y2 = av_clip_uint8(Y2);
621 U = av_clip_uint8(U);
622 V = av_clip_uint8(V);
624 output_pixels(4*i, Y1, U, Y2, V);
628 static av_always_inline void
629 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
630 const int16_t *ubuf[2], const int16_t *vbuf[2],
631 const int16_t *abuf[2], uint8_t *dest, int dstW,
632 int yalpha, int uvalpha, int y,
633 enum PixelFormat target)
635 const int16_t *buf0 = buf[0], *buf1 = buf[1],
636 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
637 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
638 int yalpha1 = 4095 - yalpha;
639 int uvalpha1 = 4095 - uvalpha;
642 for (i = 0; i < (dstW >> 1); i++) {
643 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
644 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
645 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
646 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
648 output_pixels(i * 4, Y1, U, Y2, V);
652 static av_always_inline void
653 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
654 const int16_t *ubuf[2], const int16_t *vbuf[2],
655 const int16_t *abuf0, uint8_t *dest, int dstW,
656 int uvalpha, int y, enum PixelFormat target)
658 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
659 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
662 if (uvalpha < 2048) {
663 for (i = 0; i < (dstW >> 1); i++) {
664 int Y1 = buf0[i * 2] >> 7;
665 int Y2 = buf0[i * 2 + 1] >> 7;
666 int U = ubuf1[i] >> 7;
667 int V = vbuf1[i] >> 7;
669 output_pixels(i * 4, Y1, U, Y2, V);
672 for (i = 0; i < (dstW >> 1); i++) {
673 int Y1 = buf0[i * 2] >> 7;
674 int Y2 = buf0[i * 2 + 1] >> 7;
675 int U = (ubuf0[i] + ubuf1[i]) >> 8;
676 int V = (vbuf0[i] + vbuf1[i]) >> 8;
678 output_pixels(i * 4, Y1, U, Y2, V);
685 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
686 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
688 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
689 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
690 #define output_pixel(pos, val) \
691 if (isBE(target)) { \
697 static av_always_inline void
698 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
699 const int32_t **lumSrc, int lumFilterSize,
700 const int16_t *chrFilter, const int32_t **chrUSrc,
701 const int32_t **chrVSrc, int chrFilterSize,
702 const int32_t **alpSrc, uint16_t *dest, int dstW,
703 int y, enum PixelFormat target)
707 for (i = 0; i < (dstW >> 1); i++) {
709 int Y1 = -0x40000000;
710 int Y2 = -0x40000000;
711 int U = -128 << 23; // 19
715 for (j = 0; j < lumFilterSize; j++) {
716 Y1 += lumSrc[j][i * 2] * lumFilter[j];
717 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
719 for (j = 0; j < chrFilterSize; j++) {
720 U += chrUSrc[j][i] * chrFilter[j];
721 V += chrVSrc[j][i] * chrFilter[j];
724 // 8bit: 12+15=27; 16-bit: 12+19=31
732 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
733 Y1 -= c->yuv2rgb_y_offset;
734 Y2 -= c->yuv2rgb_y_offset;
735 Y1 *= c->yuv2rgb_y_coeff;
736 Y2 *= c->yuv2rgb_y_coeff;
739 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
741 R = V * c->yuv2rgb_v2r_coeff;
742 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
743 B = U * c->yuv2rgb_u2b_coeff;
745 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
746 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
747 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
748 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
749 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
750 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
751 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
756 static av_always_inline void
757 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
758 const int32_t *ubuf[2], const int32_t *vbuf[2],
759 const int32_t *abuf[2], uint16_t *dest, int dstW,
760 int yalpha, int uvalpha, int y,
761 enum PixelFormat target)
763 const int32_t *buf0 = buf[0], *buf1 = buf[1],
764 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
765 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
766 int yalpha1 = 4095 - yalpha;
767 int uvalpha1 = 4095 - uvalpha;
770 for (i = 0; i < (dstW >> 1); i++) {
771 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
772 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
773 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
774 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
777 Y1 -= c->yuv2rgb_y_offset;
778 Y2 -= c->yuv2rgb_y_offset;
779 Y1 *= c->yuv2rgb_y_coeff;
780 Y2 *= c->yuv2rgb_y_coeff;
784 R = V * c->yuv2rgb_v2r_coeff;
785 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
786 B = U * c->yuv2rgb_u2b_coeff;
788 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
789 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
790 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
791 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
792 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
793 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
798 static av_always_inline void
799 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
800 const int32_t *ubuf[2], const int32_t *vbuf[2],
801 const int32_t *abuf0, uint16_t *dest, int dstW,
802 int uvalpha, int y, enum PixelFormat target)
804 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
805 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
808 if (uvalpha < 2048) {
809 for (i = 0; i < (dstW >> 1); i++) {
810 int Y1 = (buf0[i * 2] ) >> 2;
811 int Y2 = (buf0[i * 2 + 1]) >> 2;
812 int U = (ubuf0[i] + (-128 << 11)) >> 2;
813 int V = (vbuf0[i] + (-128 << 11)) >> 2;
816 Y1 -= c->yuv2rgb_y_offset;
817 Y2 -= c->yuv2rgb_y_offset;
818 Y1 *= c->yuv2rgb_y_coeff;
819 Y2 *= c->yuv2rgb_y_coeff;
823 R = V * c->yuv2rgb_v2r_coeff;
824 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
825 B = U * c->yuv2rgb_u2b_coeff;
827 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
828 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
829 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
830 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
831 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
832 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
836 for (i = 0; i < (dstW >> 1); i++) {
837 int Y1 = (buf0[i * 2] ) >> 2;
838 int Y2 = (buf0[i * 2 + 1]) >> 2;
839 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
840 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
843 Y1 -= c->yuv2rgb_y_offset;
844 Y2 -= c->yuv2rgb_y_offset;
845 Y1 *= c->yuv2rgb_y_coeff;
846 Y2 *= c->yuv2rgb_y_coeff;
850 R = V * c->yuv2rgb_v2r_coeff;
851 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
852 B = U * c->yuv2rgb_u2b_coeff;
854 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
855 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
856 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
857 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
858 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
859 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
869 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
870 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
871 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
872 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
875 * Write out 2 RGB pixels in the target pixel format. This function takes a
876 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
877 * things like endianness conversion and shifting. The caller takes care of
878 * setting the correct offset in these tables from the chroma (U/V) values.
879 * This function then uses the luminance (Y1/Y2) values to write out the
880 * correct RGB values into the destination buffer.
882 static av_always_inline void
883 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
884 unsigned A1, unsigned A2,
885 const void *_r, const void *_g, const void *_b, int y,
886 enum PixelFormat target, int hasAlpha)
888 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
889 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
890 uint32_t *dest = (uint32_t *) _dest;
891 const uint32_t *r = (const uint32_t *) _r;
892 const uint32_t *g = (const uint32_t *) _g;
893 const uint32_t *b = (const uint32_t *) _b;
896 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
898 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
899 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
902 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
904 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
905 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
907 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
908 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
911 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
912 uint8_t *dest = (uint8_t *) _dest;
913 const uint8_t *r = (const uint8_t *) _r;
914 const uint8_t *g = (const uint8_t *) _g;
915 const uint8_t *b = (const uint8_t *) _b;
917 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
918 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
919 dest[i * 6 + 0] = r_b[Y1];
920 dest[i * 6 + 1] = g[Y1];
921 dest[i * 6 + 2] = b_r[Y1];
922 dest[i * 6 + 3] = r_b[Y2];
923 dest[i * 6 + 4] = g[Y2];
924 dest[i * 6 + 5] = b_r[Y2];
927 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
928 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
929 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
930 uint16_t *dest = (uint16_t *) _dest;
931 const uint16_t *r = (const uint16_t *) _r;
932 const uint16_t *g = (const uint16_t *) _g;
933 const uint16_t *b = (const uint16_t *) _b;
934 int dr1, dg1, db1, dr2, dg2, db2;
936 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
937 dr1 = dither_2x2_8[ y & 1 ][0];
938 dg1 = dither_2x2_4[ y & 1 ][0];
939 db1 = dither_2x2_8[(y & 1) ^ 1][0];
940 dr2 = dither_2x2_8[ y & 1 ][1];
941 dg2 = dither_2x2_4[ y & 1 ][1];
942 db2 = dither_2x2_8[(y & 1) ^ 1][1];
943 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
944 dr1 = dither_2x2_8[ y & 1 ][0];
945 dg1 = dither_2x2_8[ y & 1 ][1];
946 db1 = dither_2x2_8[(y & 1) ^ 1][0];
947 dr2 = dither_2x2_8[ y & 1 ][1];
948 dg2 = dither_2x2_8[ y & 1 ][0];
949 db2 = dither_2x2_8[(y & 1) ^ 1][1];
951 dr1 = dither_4x4_16[ y & 3 ][0];
952 dg1 = dither_4x4_16[ y & 3 ][1];
953 db1 = dither_4x4_16[(y & 3) ^ 3][0];
954 dr2 = dither_4x4_16[ y & 3 ][1];
955 dg2 = dither_4x4_16[ y & 3 ][0];
956 db2 = dither_4x4_16[(y & 3) ^ 3][1];
959 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
960 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
961 } else /* 8/4-bit */ {
962 uint8_t *dest = (uint8_t *) _dest;
963 const uint8_t *r = (const uint8_t *) _r;
964 const uint8_t *g = (const uint8_t *) _g;
965 const uint8_t *b = (const uint8_t *) _b;
966 int dr1, dg1, db1, dr2, dg2, db2;
968 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
969 const uint8_t * const d64 = dither_8x8_73[y & 7];
970 const uint8_t * const d32 = dither_8x8_32[y & 7];
971 dr1 = dg1 = d32[(i * 2 + 0) & 7];
972 db1 = d64[(i * 2 + 0) & 7];
973 dr2 = dg2 = d32[(i * 2 + 1) & 7];
974 db2 = d64[(i * 2 + 1) & 7];
976 const uint8_t * const d64 = dither_8x8_73 [y & 7];
977 const uint8_t * const d128 = dither_8x8_220[y & 7];
978 dr1 = db1 = d128[(i * 2 + 0) & 7];
979 dg1 = d64[(i * 2 + 0) & 7];
980 dr2 = db2 = d128[(i * 2 + 1) & 7];
981 dg2 = d64[(i * 2 + 1) & 7];
984 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
985 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
986 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
988 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
989 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
994 static av_always_inline void
995 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
996 const int16_t **lumSrc, int lumFilterSize,
997 const int16_t *chrFilter, const int16_t **chrUSrc,
998 const int16_t **chrVSrc, int chrFilterSize,
999 const int16_t **alpSrc, uint8_t *dest, int dstW,
1000 int y, enum PixelFormat target, int hasAlpha)
1004 for (i = 0; i < (dstW >> 1); i++) {
1010 int av_unused A1, A2;
1011 const void *r, *g, *b;
1013 for (j = 0; j < lumFilterSize; j++) {
1014 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1015 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1017 for (j = 0; j < chrFilterSize; j++) {
1018 U += chrUSrc[j][i] * chrFilter[j];
1019 V += chrVSrc[j][i] * chrFilter[j];
1025 if ((Y1 | Y2 | U | V) & 0x100) {
1026 Y1 = av_clip_uint8(Y1);
1027 Y2 = av_clip_uint8(Y2);
1028 U = av_clip_uint8(U);
1029 V = av_clip_uint8(V);
1034 for (j = 0; j < lumFilterSize; j++) {
1035 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1036 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1040 if ((A1 | A2) & 0x100) {
1041 A1 = av_clip_uint8(A1);
1042 A2 = av_clip_uint8(A2);
1046 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1048 g = (c->table_gU[U] + c->table_gV[V]);
1051 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1052 r, g, b, y, target, hasAlpha);
1056 static av_always_inline void
1057 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1058 const int16_t *ubuf[2], const int16_t *vbuf[2],
1059 const int16_t *abuf[2], uint8_t *dest, int dstW,
1060 int yalpha, int uvalpha, int y,
1061 enum PixelFormat target, int hasAlpha)
1063 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1064 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1065 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1066 *abuf0 = hasAlpha ? abuf[0] : NULL,
1067 *abuf1 = hasAlpha ? abuf[1] : NULL;
1068 int yalpha1 = 4095 - yalpha;
1069 int uvalpha1 = 4095 - uvalpha;
1072 for (i = 0; i < (dstW >> 1); i++) {
1073 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1074 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1075 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1076 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1078 const void *r = c->table_rV[V],
1079 *g = (c->table_gU[U] + c->table_gV[V]),
1080 *b = c->table_bU[U];
1083 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1084 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1087 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1088 r, g, b, y, target, hasAlpha);
1092 static av_always_inline void
1093 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1094 const int16_t *ubuf[2], const int16_t *vbuf[2],
1095 const int16_t *abuf0, uint8_t *dest, int dstW,
1096 int uvalpha, int y, enum PixelFormat target,
1099 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1100 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1103 if (uvalpha < 2048) {
1104 for (i = 0; i < (dstW >> 1); i++) {
1105 int Y1 = buf0[i * 2] >> 7;
1106 int Y2 = buf0[i * 2 + 1] >> 7;
1107 int U = ubuf1[i] >> 7;
1108 int V = vbuf1[i] >> 7;
1110 const void *r = c->table_rV[V],
1111 *g = (c->table_gU[U] + c->table_gV[V]),
1112 *b = c->table_bU[U];
1115 A1 = abuf0[i * 2 ] >> 7;
1116 A2 = abuf0[i * 2 + 1] >> 7;
1119 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1120 r, g, b, y, target, hasAlpha);
1123 for (i = 0; i < (dstW >> 1); i++) {
1124 int Y1 = buf0[i * 2] >> 7;
1125 int Y2 = buf0[i * 2 + 1] >> 7;
1126 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1127 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1129 const void *r = c->table_rV[V],
1130 *g = (c->table_gU[U] + c->table_gV[V]),
1131 *b = c->table_bU[U];
1134 A1 = abuf0[i * 2 ] >> 7;
1135 A2 = abuf0[i * 2 + 1] >> 7;
1138 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1139 r, g, b, y, target, hasAlpha);
1144 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1145 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1146 const int16_t **lumSrc, int lumFilterSize, \
1147 const int16_t *chrFilter, const int16_t **chrUSrc, \
1148 const int16_t **chrVSrc, int chrFilterSize, \
1149 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1152 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1153 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1154 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1156 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1157 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1159 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1160 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1161 int yalpha, int uvalpha, int y) \
1163 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1164 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1167 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1168 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1169 const int16_t *abuf0, uint8_t *dest, int dstW, \
1170 int uvalpha, int y) \
1172 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1173 dstW, uvalpha, y, fmt, hasAlpha); \
1177 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1178 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1180 #if CONFIG_SWSCALE_ALPHA
1181 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1182 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1184 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1185 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1187 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1188 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1189 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1190 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1191 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1192 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1193 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1194 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1196 static av_always_inline void
1197 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1198 const int16_t **lumSrc, int lumFilterSize,
1199 const int16_t *chrFilter, const int16_t **chrUSrc,
1200 const int16_t **chrVSrc, int chrFilterSize,
1201 const int16_t **alpSrc, uint8_t *dest,
1202 int dstW, int y, enum PixelFormat target, int hasAlpha)
1205 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1207 for (i = 0; i < dstW; i++) {
1215 for (j = 0; j < lumFilterSize; j++) {
1216 Y += lumSrc[j][i] * lumFilter[j];
1218 for (j = 0; j < chrFilterSize; j++) {
1219 U += chrUSrc[j][i] * chrFilter[j];
1220 V += chrVSrc[j][i] * chrFilter[j];
1227 for (j = 0; j < lumFilterSize; j++) {
1228 A += alpSrc[j][i] * lumFilter[j];
1232 A = av_clip_uint8(A);
1234 Y -= c->yuv2rgb_y_offset;
1235 Y *= c->yuv2rgb_y_coeff;
1237 R = Y + V*c->yuv2rgb_v2r_coeff;
1238 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1239 B = Y + U*c->yuv2rgb_u2b_coeff;
1240 if ((R | G | B) & 0xC0000000) {
1241 R = av_clip_uintp2(R, 30);
1242 G = av_clip_uintp2(G, 30);
1243 B = av_clip_uintp2(B, 30);
1248 dest[0] = hasAlpha ? A : 255;
1262 dest[3] = hasAlpha ? A : 255;
1265 dest[0] = hasAlpha ? A : 255;
1280 dest[3] = hasAlpha ? A : 255;
1288 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1289 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1290 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1291 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1293 #if CONFIG_SWSCALE_ALPHA
1294 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1295 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1296 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1297 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1299 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1300 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1305 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1307 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1308 int width, int height,
1312 uint8_t *ptr = plane + stride*y;
1313 for (i=0; i<height; i++) {
1314 memset(ptr, val, width);
1319 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1320 const int16_t *filter,
1321 const int16_t *filterPos, int filterSize)
1324 int32_t *dst = (int32_t *) _dst;
1325 const uint16_t *src = (const uint16_t *) _src;
1326 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1329 for (i = 0; i < dstW; i++) {
1331 int srcPos = filterPos[i];
1334 for (j = 0; j < filterSize; j++) {
1335 val += src[srcPos + j] * filter[filterSize * i + j];
1337 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1338 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1342 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1343 const int16_t *filter,
1344 const int16_t *filterPos, int filterSize)
1347 const uint16_t *src = (const uint16_t *) _src;
1348 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1350 for (i = 0; i < dstW; i++) {
1352 int srcPos = filterPos[i];
1355 for (j = 0; j < filterSize; j++) {
1356 val += src[srcPos + j] * filter[filterSize * i + j];
1358 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1359 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1363 // bilinear / bicubic scaling
1364 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1365 const int16_t *filter, const int16_t *filterPos,
1369 for (i=0; i<dstW; i++) {
1371 int srcPos= filterPos[i];
1373 for (j=0; j<filterSize; j++) {
1374 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1376 //filter += hFilterSize;
1377 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1382 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1383 const int16_t *filter, const int16_t *filterPos,
1387 int32_t *dst = (int32_t *) _dst;
1388 for (i=0; i<dstW; i++) {
1390 int srcPos= filterPos[i];
1392 for (j=0; j<filterSize; j++) {
1393 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1395 //filter += hFilterSize;
1396 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1401 //FIXME all pal and rgb srcFormats could do this convertion as well
1402 //FIXME all scalers more complex than bilinear could do half of this transform
1403 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1406 for (i = 0; i < width; i++) {
1407 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1408 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1411 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1414 for (i = 0; i < width; i++) {
1415 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1416 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1419 static void lumRangeToJpeg_c(int16_t *dst, int width)
1422 for (i = 0; i < width; i++)
1423 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1425 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1428 for (i = 0; i < width; i++)
1429 dst[i] = (dst[i]*14071 + 33561947)>>14;
1432 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1435 int32_t *dstU = (int32_t *) _dstU;
1436 int32_t *dstV = (int32_t *) _dstV;
1437 for (i = 0; i < width; i++) {
1438 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1439 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1442 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1445 int32_t *dstU = (int32_t *) _dstU;
1446 int32_t *dstV = (int32_t *) _dstV;
1447 for (i = 0; i < width; i++) {
1448 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1449 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1452 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1455 int32_t *dst = (int32_t *) _dst;
1456 for (i = 0; i < width; i++)
1457 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1459 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1462 int32_t *dst = (int32_t *) _dst;
1463 for (i = 0; i < width; i++)
1464 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1467 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1468 const uint8_t *src, int srcW, int xInc)
1471 unsigned int xpos=0;
1472 for (i=0;i<dstWidth;i++) {
1473 register unsigned int xx=xpos>>16;
1474 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1475 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1480 // *** horizontal scale Y line to temp buffer
1481 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1482 const uint8_t *src_in[4], int srcW, int xInc,
1483 const int16_t *hLumFilter,
1484 const int16_t *hLumFilterPos, int hLumFilterSize,
1485 uint8_t *formatConvBuffer,
1486 uint32_t *pal, int isAlpha)
1488 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1489 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1490 const uint8_t *src = src_in[isAlpha ? 3 : 0];
1493 toYV12(formatConvBuffer, src, srcW, pal);
1494 src= formatConvBuffer;
1495 } else if (c->readLumPlanar && !isAlpha) {
1496 c->readLumPlanar(formatConvBuffer, src_in, srcW);
1497 src = formatConvBuffer;
1500 if (!c->hyscale_fast) {
1501 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1502 } else { // fast bilinear upscale / crap downscale
1503 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1507 convertRange(dst, dstWidth);
1510 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1511 int dstWidth, const uint8_t *src1,
1512 const uint8_t *src2, int srcW, int xInc)
1515 unsigned int xpos=0;
1516 for (i=0;i<dstWidth;i++) {
1517 register unsigned int xx=xpos>>16;
1518 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1519 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1520 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1525 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
1526 const uint8_t *src_in[4],
1527 int srcW, int xInc, const int16_t *hChrFilter,
1528 const int16_t *hChrFilterPos, int hChrFilterSize,
1529 uint8_t *formatConvBuffer, uint32_t *pal)
1531 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
1533 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
1534 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1535 src1= formatConvBuffer;
1537 } else if (c->readChrPlanar) {
1538 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
1539 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
1540 src1= formatConvBuffer;
1544 if (!c->hcscale_fast) {
1545 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
1546 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
1547 } else { // fast bilinear upscale / crap downscale
1548 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1551 if (c->chrConvertRange)
1552 c->chrConvertRange(dst1, dst2, dstWidth);
1555 static av_always_inline void
1556 find_c_packed_planar_out_funcs(SwsContext *c,
1557 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
1558 yuv2interleavedX_fn *yuv2nv12cX,
1559 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
1560 yuv2packedX_fn *yuv2packedX)
1562 enum PixelFormat dstFormat = c->dstFormat;
1564 if (is16BPS(dstFormat)) {
1565 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
1566 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
1567 } else if (is9_OR_10BPS(dstFormat)) {
1568 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
1569 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
1570 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
1572 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
1573 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
1576 *yuv2plane1 = yuv2plane1_8_c;
1577 *yuv2planeX = yuv2planeX_8_c;
1578 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
1579 *yuv2nv12cX = yuv2nv12cX_c;
1582 if(c->flags & SWS_FULL_CHR_H_INT) {
1583 switch (dstFormat) {
1586 *yuv2packedX = yuv2rgba32_full_X_c;
1588 #if CONFIG_SWSCALE_ALPHA
1590 *yuv2packedX = yuv2rgba32_full_X_c;
1592 #endif /* CONFIG_SWSCALE_ALPHA */
1594 *yuv2packedX = yuv2rgbx32_full_X_c;
1596 #endif /* !CONFIG_SMALL */
1600 *yuv2packedX = yuv2argb32_full_X_c;
1602 #if CONFIG_SWSCALE_ALPHA
1604 *yuv2packedX = yuv2argb32_full_X_c;
1606 #endif /* CONFIG_SWSCALE_ALPHA */
1608 *yuv2packedX = yuv2xrgb32_full_X_c;
1610 #endif /* !CONFIG_SMALL */
1614 *yuv2packedX = yuv2bgra32_full_X_c;
1616 #if CONFIG_SWSCALE_ALPHA
1618 *yuv2packedX = yuv2bgra32_full_X_c;
1620 #endif /* CONFIG_SWSCALE_ALPHA */
1622 *yuv2packedX = yuv2bgrx32_full_X_c;
1624 #endif /* !CONFIG_SMALL */
1628 *yuv2packedX = yuv2abgr32_full_X_c;
1630 #if CONFIG_SWSCALE_ALPHA
1632 *yuv2packedX = yuv2abgr32_full_X_c;
1634 #endif /* CONFIG_SWSCALE_ALPHA */
1636 *yuv2packedX = yuv2xbgr32_full_X_c;
1638 #endif /* !CONFIG_SMALL */
1641 *yuv2packedX = yuv2rgb24_full_X_c;
1644 *yuv2packedX = yuv2bgr24_full_X_c;
1648 switch (dstFormat) {
1649 case PIX_FMT_RGB48LE:
1650 *yuv2packed1 = yuv2rgb48le_1_c;
1651 *yuv2packed2 = yuv2rgb48le_2_c;
1652 *yuv2packedX = yuv2rgb48le_X_c;
1654 case PIX_FMT_RGB48BE:
1655 *yuv2packed1 = yuv2rgb48be_1_c;
1656 *yuv2packed2 = yuv2rgb48be_2_c;
1657 *yuv2packedX = yuv2rgb48be_X_c;
1659 case PIX_FMT_BGR48LE:
1660 *yuv2packed1 = yuv2bgr48le_1_c;
1661 *yuv2packed2 = yuv2bgr48le_2_c;
1662 *yuv2packedX = yuv2bgr48le_X_c;
1664 case PIX_FMT_BGR48BE:
1665 *yuv2packed1 = yuv2bgr48be_1_c;
1666 *yuv2packed2 = yuv2bgr48be_2_c;
1667 *yuv2packedX = yuv2bgr48be_X_c;
1672 *yuv2packed1 = yuv2rgb32_1_c;
1673 *yuv2packed2 = yuv2rgb32_2_c;
1674 *yuv2packedX = yuv2rgb32_X_c;
1676 #if CONFIG_SWSCALE_ALPHA
1678 *yuv2packed1 = yuv2rgba32_1_c;
1679 *yuv2packed2 = yuv2rgba32_2_c;
1680 *yuv2packedX = yuv2rgba32_X_c;
1682 #endif /* CONFIG_SWSCALE_ALPHA */
1684 *yuv2packed1 = yuv2rgbx32_1_c;
1685 *yuv2packed2 = yuv2rgbx32_2_c;
1686 *yuv2packedX = yuv2rgbx32_X_c;
1688 #endif /* !CONFIG_SMALL */
1690 case PIX_FMT_RGB32_1:
1691 case PIX_FMT_BGR32_1:
1693 *yuv2packed1 = yuv2rgb32_1_1_c;
1694 *yuv2packed2 = yuv2rgb32_1_2_c;
1695 *yuv2packedX = yuv2rgb32_1_X_c;
1697 #if CONFIG_SWSCALE_ALPHA
1699 *yuv2packed1 = yuv2rgba32_1_1_c;
1700 *yuv2packed2 = yuv2rgba32_1_2_c;
1701 *yuv2packedX = yuv2rgba32_1_X_c;
1703 #endif /* CONFIG_SWSCALE_ALPHA */
1705 *yuv2packed1 = yuv2rgbx32_1_1_c;
1706 *yuv2packed2 = yuv2rgbx32_1_2_c;
1707 *yuv2packedX = yuv2rgbx32_1_X_c;
1709 #endif /* !CONFIG_SMALL */
1712 *yuv2packed1 = yuv2rgb24_1_c;
1713 *yuv2packed2 = yuv2rgb24_2_c;
1714 *yuv2packedX = yuv2rgb24_X_c;
1717 *yuv2packed1 = yuv2bgr24_1_c;
1718 *yuv2packed2 = yuv2bgr24_2_c;
1719 *yuv2packedX = yuv2bgr24_X_c;
1721 case PIX_FMT_RGB565LE:
1722 case PIX_FMT_RGB565BE:
1723 case PIX_FMT_BGR565LE:
1724 case PIX_FMT_BGR565BE:
1725 *yuv2packed1 = yuv2rgb16_1_c;
1726 *yuv2packed2 = yuv2rgb16_2_c;
1727 *yuv2packedX = yuv2rgb16_X_c;
1729 case PIX_FMT_RGB555LE:
1730 case PIX_FMT_RGB555BE:
1731 case PIX_FMT_BGR555LE:
1732 case PIX_FMT_BGR555BE:
1733 *yuv2packed1 = yuv2rgb15_1_c;
1734 *yuv2packed2 = yuv2rgb15_2_c;
1735 *yuv2packedX = yuv2rgb15_X_c;
1737 case PIX_FMT_RGB444LE:
1738 case PIX_FMT_RGB444BE:
1739 case PIX_FMT_BGR444LE:
1740 case PIX_FMT_BGR444BE:
1741 *yuv2packed1 = yuv2rgb12_1_c;
1742 *yuv2packed2 = yuv2rgb12_2_c;
1743 *yuv2packedX = yuv2rgb12_X_c;
1747 *yuv2packed1 = yuv2rgb8_1_c;
1748 *yuv2packed2 = yuv2rgb8_2_c;
1749 *yuv2packedX = yuv2rgb8_X_c;
1753 *yuv2packed1 = yuv2rgb4_1_c;
1754 *yuv2packed2 = yuv2rgb4_2_c;
1755 *yuv2packedX = yuv2rgb4_X_c;
1757 case PIX_FMT_RGB4_BYTE:
1758 case PIX_FMT_BGR4_BYTE:
1759 *yuv2packed1 = yuv2rgb4b_1_c;
1760 *yuv2packed2 = yuv2rgb4b_2_c;
1761 *yuv2packedX = yuv2rgb4b_X_c;
1765 switch (dstFormat) {
1766 case PIX_FMT_GRAY16BE:
1767 *yuv2packed1 = yuv2gray16BE_1_c;
1768 *yuv2packed2 = yuv2gray16BE_2_c;
1769 *yuv2packedX = yuv2gray16BE_X_c;
1771 case PIX_FMT_GRAY16LE:
1772 *yuv2packed1 = yuv2gray16LE_1_c;
1773 *yuv2packed2 = yuv2gray16LE_2_c;
1774 *yuv2packedX = yuv2gray16LE_X_c;
1776 case PIX_FMT_MONOWHITE:
1777 *yuv2packed1 = yuv2monowhite_1_c;
1778 *yuv2packed2 = yuv2monowhite_2_c;
1779 *yuv2packedX = yuv2monowhite_X_c;
1781 case PIX_FMT_MONOBLACK:
1782 *yuv2packed1 = yuv2monoblack_1_c;
1783 *yuv2packed2 = yuv2monoblack_2_c;
1784 *yuv2packedX = yuv2monoblack_X_c;
1786 case PIX_FMT_YUYV422:
1787 *yuv2packed1 = yuv2yuyv422_1_c;
1788 *yuv2packed2 = yuv2yuyv422_2_c;
1789 *yuv2packedX = yuv2yuyv422_X_c;
1791 case PIX_FMT_UYVY422:
1792 *yuv2packed1 = yuv2uyvy422_1_c;
1793 *yuv2packed2 = yuv2uyvy422_2_c;
1794 *yuv2packedX = yuv2uyvy422_X_c;
1799 #define DEBUG_SWSCALE_BUFFERS 0
1800 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
1802 static int swScale(SwsContext *c, const uint8_t* src[],
1803 int srcStride[], int srcSliceY,
1804 int srcSliceH, uint8_t* dst[], int dstStride[])
1806 /* load a few things into local vars to make the code more readable? and faster */
1807 const int srcW= c->srcW;
1808 const int dstW= c->dstW;
1809 const int dstH= c->dstH;
1810 const int chrDstW= c->chrDstW;
1811 const int chrSrcW= c->chrSrcW;
1812 const int lumXInc= c->lumXInc;
1813 const int chrXInc= c->chrXInc;
1814 const enum PixelFormat dstFormat= c->dstFormat;
1815 const int flags= c->flags;
1816 int16_t *vLumFilterPos= c->vLumFilterPos;
1817 int16_t *vChrFilterPos= c->vChrFilterPos;
1818 int16_t *hLumFilterPos= c->hLumFilterPos;
1819 int16_t *hChrFilterPos= c->hChrFilterPos;
1820 int16_t *vLumFilter= c->vLumFilter;
1821 int16_t *vChrFilter= c->vChrFilter;
1822 int16_t *hLumFilter= c->hLumFilter;
1823 int16_t *hChrFilter= c->hChrFilter;
1824 int32_t *lumMmxFilter= c->lumMmxFilter;
1825 int32_t *chrMmxFilter= c->chrMmxFilter;
1826 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
1827 const int vLumFilterSize= c->vLumFilterSize;
1828 const int vChrFilterSize= c->vChrFilterSize;
1829 const int hLumFilterSize= c->hLumFilterSize;
1830 const int hChrFilterSize= c->hChrFilterSize;
1831 int16_t **lumPixBuf= c->lumPixBuf;
1832 int16_t **chrUPixBuf= c->chrUPixBuf;
1833 int16_t **chrVPixBuf= c->chrVPixBuf;
1834 int16_t **alpPixBuf= c->alpPixBuf;
1835 const int vLumBufSize= c->vLumBufSize;
1836 const int vChrBufSize= c->vChrBufSize;
1837 uint8_t *formatConvBuffer= c->formatConvBuffer;
1838 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
1839 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
1841 uint32_t *pal=c->pal_yuv;
1842 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
1843 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
1844 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
1845 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
1846 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
1847 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
1848 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
1850 /* vars which will change and which we need to store back in the context */
1852 int lumBufIndex= c->lumBufIndex;
1853 int chrBufIndex= c->chrBufIndex;
1854 int lastInLumBuf= c->lastInLumBuf;
1855 int lastInChrBuf= c->lastInChrBuf;
1857 if (isPacked(c->srcFormat)) {
1865 srcStride[3]= srcStride[0];
1867 srcStride[1]<<= c->vChrDrop;
1868 srcStride[2]<<= c->vChrDrop;
1870 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
1871 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
1872 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
1873 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
1874 srcSliceY, srcSliceH, dstY, dstH);
1875 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
1876 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
1878 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
1879 static int warnedAlready=0; //FIXME move this into the context perhaps
1880 if (flags & SWS_PRINT_INFO && !warnedAlready) {
1881 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
1882 " ->cannot do aligned memory accesses anymore\n");
1887 /* Note the user might start scaling the picture in the middle so this
1888 will not get executed. This is not really intended but works
1889 currently, so people might do it. */
1890 if (srcSliceY ==0) {
1898 if (!should_dither) {
1899 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
1903 for (;dstY < dstH; dstY++) {
1904 const int chrDstY= dstY>>c->chrDstVSubSample;
1905 uint8_t *dest[4] = {
1906 dst[0] + dstStride[0] * dstY,
1907 dst[1] + dstStride[1] * chrDstY,
1908 dst[2] + dstStride[2] * chrDstY,
1909 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
1912 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
1913 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
1914 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
1916 // Last line needed as input
1917 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
1918 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
1919 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
1922 //handle holes (FAST_BILINEAR & weird filters)
1923 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
1924 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
1925 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
1926 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
1928 DEBUG_BUFFERS("dstY: %d\n", dstY);
1929 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
1930 firstLumSrcY, lastLumSrcY, lastInLumBuf);
1931 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
1932 firstChrSrcY, lastChrSrcY, lastInChrBuf);
1934 // Do we have enough lines in this slice to output the dstY line
1935 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
1937 if (!enough_lines) {
1938 lastLumSrcY = srcSliceY + srcSliceH - 1;
1939 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
1940 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
1941 lastLumSrcY, lastChrSrcY);
1944 //Do horizontal scaling
1945 while(lastInLumBuf < lastLumSrcY) {
1946 const uint8_t *src1[4] = {
1947 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
1948 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
1949 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
1950 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
1953 assert(lumBufIndex < 2*vLumBufSize);
1954 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
1955 assert(lastInLumBuf + 1 - srcSliceY >= 0);
1956 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
1957 hLumFilter, hLumFilterPos, hLumFilterSize,
1960 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
1961 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
1962 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
1966 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
1967 lumBufIndex, lastInLumBuf);
1969 while(lastInChrBuf < lastChrSrcY) {
1970 const uint8_t *src1[4] = {
1971 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
1972 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
1973 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
1974 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
1977 assert(chrBufIndex < 2*vChrBufSize);
1978 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
1979 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
1980 //FIXME replace parameters through context struct (some at least)
1982 if (c->needs_hcscale)
1983 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
1984 chrDstW, src1, chrSrcW, chrXInc,
1985 hChrFilter, hChrFilterPos, hChrFilterSize,
1986 formatConvBuffer, pal);
1988 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
1989 chrBufIndex, lastInChrBuf);
1991 //wrap buf index around to stay inside the ring buffer
1992 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
1993 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
1995 break; //we can't output a dstY line so let's try with the next slice
1998 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2000 if (should_dither) {
2001 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2002 c->lumDither8 = dither_8x8_128[dstY & 7];
2004 if (dstY >= dstH-2) {
2005 // hmm looks like we can't use MMX here without overwriting this array's tail
2006 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2007 &yuv2packed1, &yuv2packed2, &yuv2packedX);
2011 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2012 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2013 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2014 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2016 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2017 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2018 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2019 for (i = 0; i < neg; i++)
2020 tmpY[i] = lumSrcPtr[neg];
2021 for ( ; i < end; i++)
2022 tmpY[i] = lumSrcPtr[i];
2023 for ( ; i < vLumFilterSize; i++)
2024 tmpY[i] = tmpY[i-1];
2028 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2029 for (i = 0; i < neg; i++)
2030 tmpA[i] = alpSrcPtr[neg];
2031 for ( ; i < end; i++)
2032 tmpA[i] = alpSrcPtr[i];
2033 for ( ; i < vLumFilterSize; i++)
2034 tmpA[i] = tmpA[i - 1];
2038 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2039 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2040 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2041 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2042 for (i = 0; i < neg; i++) {
2043 tmpU[i] = chrUSrcPtr[neg];
2044 tmpV[i] = chrVSrcPtr[neg];
2046 for ( ; i < end; i++) {
2047 tmpU[i] = chrUSrcPtr[i];
2048 tmpV[i] = chrVSrcPtr[i];
2050 for ( ; i < vChrFilterSize; i++) {
2051 tmpU[i] = tmpU[i - 1];
2052 tmpV[i] = tmpV[i - 1];
2058 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2059 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2061 if (vLumFilterSize == 1) {
2062 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2064 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2065 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2068 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2070 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2071 } else if (vChrFilterSize == 1) {
2072 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2073 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2075 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2076 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2077 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2078 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2082 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2083 if (vLumFilterSize == 1) {
2084 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2086 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2087 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2091 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2092 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2093 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2094 int chrAlpha = vChrFilter[2 * dstY + 1];
2095 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2096 alpPixBuf ? *alpSrcPtr : NULL,
2097 dest[0], dstW, chrAlpha, dstY);
2098 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2099 int lumAlpha = vLumFilter[2 * dstY + 1];
2100 int chrAlpha = vChrFilter[2 * dstY + 1];
2102 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2104 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2105 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2106 alpPixBuf ? alpSrcPtr : NULL,
2107 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2108 } else { //general RGB
2109 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2110 lumSrcPtr, vLumFilterSize,
2111 vChrFilter + dstY * vChrFilterSize,
2112 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2113 alpSrcPtr, dest[0], dstW, dstY);
2119 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
2120 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2123 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2124 __asm__ volatile("sfence":::"memory");
2128 /* store changed local vars back in the context */
2130 c->lumBufIndex= lumBufIndex;
2131 c->chrBufIndex= chrBufIndex;
2132 c->lastInLumBuf= lastInLumBuf;
2133 c->lastInChrBuf= lastInChrBuf;
2135 return dstY - lastDstY;
2138 static av_cold void sws_init_swScale_c(SwsContext *c)
2140 enum PixelFormat srcFormat = c->srcFormat;
2142 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2143 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2146 ff_sws_init_input_funcs(c);
2148 if (c->srcBpc == 8) {
2149 if (c->dstBpc <= 10) {
2150 c->hyScale = c->hcScale = hScale8To15_c;
2151 if (c->flags & SWS_FAST_BILINEAR) {
2152 c->hyscale_fast = hyscale_fast_c;
2153 c->hcscale_fast = hcscale_fast_c;
2156 c->hyScale = c->hcScale = hScale8To19_c;
2159 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2162 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2163 if (c->dstBpc <= 10) {
2165 c->lumConvertRange = lumRangeFromJpeg_c;
2166 c->chrConvertRange = chrRangeFromJpeg_c;
2168 c->lumConvertRange = lumRangeToJpeg_c;
2169 c->chrConvertRange = chrRangeToJpeg_c;
2173 c->lumConvertRange = lumRangeFromJpeg16_c;
2174 c->chrConvertRange = chrRangeFromJpeg16_c;
2176 c->lumConvertRange = lumRangeToJpeg16_c;
2177 c->chrConvertRange = chrRangeToJpeg16_c;
2182 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2183 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2184 c->needs_hcscale = 1;
2187 SwsFunc ff_getSwsFunc(SwsContext *c)
2189 sws_init_swScale_c(c);
2192 ff_sws_init_swScale_mmx(c);
2194 ff_sws_init_swScale_altivec(c);