2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 static av_always_inline void
199 yuv2yuvX16_c_template(const int16_t *filter, int filterSize,
200 const int32_t **src, uint16_t *dest, int dstW,
201 int big_endian, int output_bits)
203 #define output_pixel(pos, val) \
205 AV_WB16(pos, av_clip_uint16(val >> shift)); \
207 AV_WL16(pos, av_clip_uint16(val >> shift)); \
211 int shift = 15 + 16 - output_bits - 1;
213 for (i = 0; i < dstW; i++) {
214 int val = 1 << (30-output_bits - 1);
217 for (j = 0; j < filterSize; j++)
218 val += (src[j][i] * filter[j]) >> 1;
220 output_pixel(&dest[i], val);
225 #define output_pixel(pos, val) \
227 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
229 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
232 static av_always_inline void
233 yuv2yuvX10_c_template(const int16_t *filter, int filterSize,
234 const int16_t **src, uint16_t *dest, int dstW,
235 int big_endian, int output_bits)
238 int shift = 11 + 16 - output_bits;
240 for (i = 0; i < dstW; i++) {
241 int val = 1 << (26-output_bits);
244 for (j = 0; j < filterSize; j++)
245 val += src[j][i] * filter[j];
247 output_pixel(&dest[i], val);
253 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
254 static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
255 const int16_t **src, uint16_t *dest, int dstW, \
256 const uint8_t *dither, int offset)\
258 yuv2yuvX_template_fn(filter, filterSize, (const typeX_t **) src, \
259 dest, dstW, is_be, bits); \
261 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
262 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
263 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
264 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
265 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
266 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
268 static void yuv2yuvX_c(const int16_t *filter, int filterSize,
269 const int16_t **src, uint8_t *dest, int dstW,
270 const uint8_t *dither, int offset)
273 for (i=0; i<dstW; i++) {
274 int val = dither[(i + offset) & 7] << 12;
276 for (j=0; j<filterSize; j++)
277 val += src[j][i] * filter[j];
279 dest[i]= av_clip_uint8(val>>19);
283 static void yuv2yuv1_c(const int16_t *src, uint8_t *dest, int dstW,
284 const uint8_t *dither, int offset)
287 for (i=0; i<dstW; i++) {
288 int val = (src[i] + dither[(i + offset) & 7]) >> 7;
289 dest[i]= av_clip_uint8(val);
293 static void yuv2nv12X_chroma_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
294 const int16_t **chrUSrc, const int16_t **chrVSrc,
295 uint8_t *dest, int chrDstW)
297 enum PixelFormat dstFormat = c->dstFormat;
298 const uint8_t *chrDither = c->chrDither8;
301 if (dstFormat == PIX_FMT_NV12)
302 for (i=0; i<chrDstW; i++) {
303 int u = chrDither[i & 7] << 12;
304 int v = chrDither[(i + 3) & 7] << 12;
306 for (j=0; j<chrFilterSize; j++) {
307 u += chrUSrc[j][i] * chrFilter[j];
308 v += chrVSrc[j][i] * chrFilter[j];
311 dest[2*i]= av_clip_uint8(u>>19);
312 dest[2*i+1]= av_clip_uint8(v>>19);
315 for (i=0; i<chrDstW; i++) {
316 int u = chrDither[i & 7] << 12;
317 int v = chrDither[(i + 3) & 7] << 12;
319 for (j=0; j<chrFilterSize; j++) {
320 u += chrUSrc[j][i] * chrFilter[j];
321 v += chrVSrc[j][i] * chrFilter[j];
324 dest[2*i]= av_clip_uint8(v>>19);
325 dest[2*i+1]= av_clip_uint8(u>>19);
329 #define output_pixel(pos, val) \
330 if (target == PIX_FMT_GRAY16BE) { \
336 static av_always_inline void
337 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
338 const int32_t **lumSrc, int lumFilterSize,
339 const int16_t *chrFilter, const int32_t **chrUSrc,
340 const int32_t **chrVSrc, int chrFilterSize,
341 const int32_t **alpSrc, uint16_t *dest, int dstW,
342 int y, enum PixelFormat target)
346 for (i = 0; i < (dstW >> 1); i++) {
351 for (j = 0; j < lumFilterSize; j++) {
352 Y1 += lumSrc[j][i * 2] * lumFilter[j];
353 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
357 if ((Y1 | Y2) & 0x10000) {
358 Y1 = av_clip_uint16(Y1);
359 Y2 = av_clip_uint16(Y2);
361 output_pixel(&dest[i * 2 + 0], Y1);
362 output_pixel(&dest[i * 2 + 1], Y2);
366 static av_always_inline void
367 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
368 const int32_t *ubuf[2], const int32_t *vbuf[2],
369 const int32_t *abuf[2], uint16_t *dest, int dstW,
370 int yalpha, int uvalpha, int y,
371 enum PixelFormat target)
373 int yalpha1 = 4095 - yalpha;
375 const int32_t *buf0 = buf[0], *buf1 = buf[1];
377 for (i = 0; i < (dstW >> 1); i++) {
378 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
379 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
381 output_pixel(&dest[i * 2 + 0], Y1);
382 output_pixel(&dest[i * 2 + 1], Y2);
386 static av_always_inline void
387 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
388 const int32_t *ubuf[2], const int32_t *vbuf[2],
389 const int32_t *abuf0, uint16_t *dest, int dstW,
390 int uvalpha, int y, enum PixelFormat target)
394 for (i = 0; i < (dstW >> 1); i++) {
395 int Y1 = buf0[i * 2 ] << 1;
396 int Y2 = buf0[i * 2 + 1] << 1;
398 output_pixel(&dest[i * 2 + 0], Y1);
399 output_pixel(&dest[i * 2 + 1], Y2);
405 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
406 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
407 const int16_t **_lumSrc, int lumFilterSize, \
408 const int16_t *chrFilter, const int16_t **_chrUSrc, \
409 const int16_t **_chrVSrc, int chrFilterSize, \
410 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
413 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
414 **chrUSrc = (const int32_t **) _chrUSrc, \
415 **chrVSrc = (const int32_t **) _chrVSrc, \
416 **alpSrc = (const int32_t **) _alpSrc; \
417 uint16_t *dest = (uint16_t *) _dest; \
418 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
419 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
420 alpSrc, dest, dstW, y, fmt); \
423 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
424 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
425 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
426 int yalpha, int uvalpha, int y) \
428 const int32_t **buf = (const int32_t **) _buf, \
429 **ubuf = (const int32_t **) _ubuf, \
430 **vbuf = (const int32_t **) _vbuf, \
431 **abuf = (const int32_t **) _abuf; \
432 uint16_t *dest = (uint16_t *) _dest; \
433 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
434 dest, dstW, yalpha, uvalpha, y, fmt); \
437 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
438 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
439 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
440 int uvalpha, int y) \
442 const int32_t *buf0 = (const int32_t *) _buf0, \
443 **ubuf = (const int32_t **) _ubuf, \
444 **vbuf = (const int32_t **) _vbuf, \
445 *abuf0 = (const int32_t *) _abuf0; \
446 uint16_t *dest = (uint16_t *) _dest; \
447 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
448 dstW, uvalpha, y, fmt); \
451 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
452 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
454 #define output_pixel(pos, acc) \
455 if (target == PIX_FMT_MONOBLACK) { \
461 static av_always_inline void
462 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
463 const int16_t **lumSrc, int lumFilterSize,
464 const int16_t *chrFilter, const int16_t **chrUSrc,
465 const int16_t **chrVSrc, int chrFilterSize,
466 const int16_t **alpSrc, uint8_t *dest, int dstW,
467 int y, enum PixelFormat target)
469 const uint8_t * const d128=dither_8x8_220[y&7];
470 uint8_t *g = c->table_gU[128] + c->table_gV[128];
474 for (i = 0; i < dstW - 1; i += 2) {
479 for (j = 0; j < lumFilterSize; j++) {
480 Y1 += lumSrc[j][i] * lumFilter[j];
481 Y2 += lumSrc[j][i+1] * lumFilter[j];
485 if ((Y1 | Y2) & 0x100) {
486 Y1 = av_clip_uint8(Y1);
487 Y2 = av_clip_uint8(Y2);
489 acc += acc + g[Y1 + d128[(i + 0) & 7]];
490 acc += acc + g[Y2 + d128[(i + 1) & 7]];
492 output_pixel(*dest++, acc);
497 static av_always_inline void
498 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
499 const int16_t *ubuf[2], const int16_t *vbuf[2],
500 const int16_t *abuf[2], uint8_t *dest, int dstW,
501 int yalpha, int uvalpha, int y,
502 enum PixelFormat target)
504 const int16_t *buf0 = buf[0], *buf1 = buf[1];
505 const uint8_t * const d128 = dither_8x8_220[y & 7];
506 uint8_t *g = c->table_gU[128] + c->table_gV[128];
507 int yalpha1 = 4095 - yalpha;
510 for (i = 0; i < dstW - 7; i += 8) {
511 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
512 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
513 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
514 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
515 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
516 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
517 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
518 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
519 output_pixel(*dest++, acc);
523 static av_always_inline void
524 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
525 const int16_t *ubuf[2], const int16_t *vbuf[2],
526 const int16_t *abuf0, uint8_t *dest, int dstW,
527 int uvalpha, int y, enum PixelFormat target)
529 const uint8_t * const d128 = dither_8x8_220[y & 7];
530 uint8_t *g = c->table_gU[128] + c->table_gV[128];
533 for (i = 0; i < dstW - 7; i += 8) {
534 int acc = g[(buf0[i ] >> 7) + d128[0]];
535 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
536 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
537 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
538 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
539 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
540 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
541 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
542 output_pixel(*dest++, acc);
548 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
549 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
550 const int16_t **lumSrc, int lumFilterSize, \
551 const int16_t *chrFilter, const int16_t **chrUSrc, \
552 const int16_t **chrVSrc, int chrFilterSize, \
553 const int16_t **alpSrc, uint8_t *dest, int dstW, \
556 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
557 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
558 alpSrc, dest, dstW, y, fmt); \
561 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
562 const int16_t *ubuf[2], const int16_t *vbuf[2], \
563 const int16_t *abuf[2], uint8_t *dest, int dstW, \
564 int yalpha, int uvalpha, int y) \
566 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
567 dest, dstW, yalpha, uvalpha, y, fmt); \
570 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
571 const int16_t *ubuf[2], const int16_t *vbuf[2], \
572 const int16_t *abuf0, uint8_t *dest, int dstW, \
573 int uvalpha, int y) \
575 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
576 abuf0, dest, dstW, uvalpha, \
580 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
581 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
583 #define output_pixels(pos, Y1, U, Y2, V) \
584 if (target == PIX_FMT_YUYV422) { \
585 dest[pos + 0] = Y1; \
587 dest[pos + 2] = Y2; \
591 dest[pos + 1] = Y1; \
593 dest[pos + 3] = Y2; \
596 static av_always_inline void
597 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
598 const int16_t **lumSrc, int lumFilterSize,
599 const int16_t *chrFilter, const int16_t **chrUSrc,
600 const int16_t **chrVSrc, int chrFilterSize,
601 const int16_t **alpSrc, uint8_t *dest, int dstW,
602 int y, enum PixelFormat target)
606 for (i = 0; i < (dstW >> 1); i++) {
613 for (j = 0; j < lumFilterSize; j++) {
614 Y1 += lumSrc[j][i * 2] * lumFilter[j];
615 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
617 for (j = 0; j < chrFilterSize; j++) {
618 U += chrUSrc[j][i] * chrFilter[j];
619 V += chrVSrc[j][i] * chrFilter[j];
625 if ((Y1 | Y2 | U | V) & 0x100) {
626 Y1 = av_clip_uint8(Y1);
627 Y2 = av_clip_uint8(Y2);
628 U = av_clip_uint8(U);
629 V = av_clip_uint8(V);
631 output_pixels(4*i, Y1, U, Y2, V);
635 static av_always_inline void
636 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
637 const int16_t *ubuf[2], const int16_t *vbuf[2],
638 const int16_t *abuf[2], uint8_t *dest, int dstW,
639 int yalpha, int uvalpha, int y,
640 enum PixelFormat target)
642 const int16_t *buf0 = buf[0], *buf1 = buf[1],
643 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
644 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
645 int yalpha1 = 4095 - yalpha;
646 int uvalpha1 = 4095 - uvalpha;
649 for (i = 0; i < (dstW >> 1); i++) {
650 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
651 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
652 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
653 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
655 output_pixels(i * 4, Y1, U, Y2, V);
659 static av_always_inline void
660 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
661 const int16_t *ubuf[2], const int16_t *vbuf[2],
662 const int16_t *abuf0, uint8_t *dest, int dstW,
663 int uvalpha, int y, enum PixelFormat target)
665 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
666 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
669 if (uvalpha < 2048) {
670 for (i = 0; i < (dstW >> 1); i++) {
671 int Y1 = buf0[i * 2] >> 7;
672 int Y2 = buf0[i * 2 + 1] >> 7;
673 int U = ubuf1[i] >> 7;
674 int V = vbuf1[i] >> 7;
676 output_pixels(i * 4, Y1, U, Y2, V);
679 for (i = 0; i < (dstW >> 1); i++) {
680 int Y1 = buf0[i * 2] >> 7;
681 int Y2 = buf0[i * 2 + 1] >> 7;
682 int U = (ubuf0[i] + ubuf1[i]) >> 8;
683 int V = (vbuf0[i] + vbuf1[i]) >> 8;
685 output_pixels(i * 4, Y1, U, Y2, V);
692 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
693 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
695 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
696 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
697 #define output_pixel(pos, val) \
698 if (isBE(target)) { \
704 static av_always_inline void
705 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
706 const int32_t **lumSrc, int lumFilterSize,
707 const int16_t *chrFilter, const int32_t **chrUSrc,
708 const int32_t **chrVSrc, int chrFilterSize,
709 const int32_t **alpSrc, uint16_t *dest, int dstW,
710 int y, enum PixelFormat target)
714 for (i = 0; i < (dstW >> 1); i++) {
718 int U = -128 << 23; // 19
722 for (j = 0; j < lumFilterSize; j++) {
723 Y1 += lumSrc[j][i * 2] * lumFilter[j];
724 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
726 for (j = 0; j < chrFilterSize; j++) {
727 U += chrUSrc[j][i] * chrFilter[j];
728 V += chrVSrc[j][i] * chrFilter[j];
731 // 8bit: 12+15=27; 16-bit: 12+19=31
737 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
738 Y1 -= c->yuv2rgb_y_offset;
739 Y2 -= c->yuv2rgb_y_offset;
740 Y1 *= c->yuv2rgb_y_coeff;
741 Y2 *= c->yuv2rgb_y_coeff;
744 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
746 R = V * c->yuv2rgb_v2r_coeff;
747 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
748 B = U * c->yuv2rgb_u2b_coeff;
750 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
751 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
752 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
753 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
754 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
755 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
756 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
761 static av_always_inline void
762 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
763 const int32_t *ubuf[2], const int32_t *vbuf[2],
764 const int32_t *abuf[2], uint16_t *dest, int dstW,
765 int yalpha, int uvalpha, int y,
766 enum PixelFormat target)
768 const int32_t *buf0 = buf[0], *buf1 = buf[1],
769 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
770 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
771 int yalpha1 = 4095 - yalpha;
772 int uvalpha1 = 4095 - uvalpha;
775 for (i = 0; i < (dstW >> 1); i++) {
776 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
777 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
778 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
779 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
782 Y1 -= c->yuv2rgb_y_offset;
783 Y2 -= c->yuv2rgb_y_offset;
784 Y1 *= c->yuv2rgb_y_coeff;
785 Y2 *= c->yuv2rgb_y_coeff;
789 R = V * c->yuv2rgb_v2r_coeff;
790 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
791 B = U * c->yuv2rgb_u2b_coeff;
793 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
794 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
795 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
796 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
797 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
798 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
803 static av_always_inline void
804 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
805 const int32_t *ubuf[2], const int32_t *vbuf[2],
806 const int32_t *abuf0, uint16_t *dest, int dstW,
807 int uvalpha, int y, enum PixelFormat target)
809 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
810 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
813 if (uvalpha < 2048) {
814 for (i = 0; i < (dstW >> 1); i++) {
815 int Y1 = (buf0[i * 2] ) >> 2;
816 int Y2 = (buf0[i * 2 + 1]) >> 2;
817 int U = (ubuf0[i] + (-128 << 11)) >> 2;
818 int V = (vbuf0[i] + (-128 << 11)) >> 2;
821 Y1 -= c->yuv2rgb_y_offset;
822 Y2 -= c->yuv2rgb_y_offset;
823 Y1 *= c->yuv2rgb_y_coeff;
824 Y2 *= c->yuv2rgb_y_coeff;
828 R = V * c->yuv2rgb_v2r_coeff;
829 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
830 B = U * c->yuv2rgb_u2b_coeff;
832 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
833 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
834 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
835 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
836 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
837 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
841 for (i = 0; i < (dstW >> 1); i++) {
842 int Y1 = (buf0[i * 2] ) >> 2;
843 int Y2 = (buf0[i * 2 + 1]) >> 2;
844 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
845 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
848 Y1 -= c->yuv2rgb_y_offset;
849 Y2 -= c->yuv2rgb_y_offset;
850 Y1 *= c->yuv2rgb_y_coeff;
851 Y2 *= c->yuv2rgb_y_coeff;
855 R = V * c->yuv2rgb_v2r_coeff;
856 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
857 B = U * c->yuv2rgb_u2b_coeff;
859 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
860 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
861 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
862 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
863 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
864 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
874 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
875 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
876 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
877 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
879 static av_always_inline void
880 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
881 int U, int V, int A1, int A2,
882 const void *_r, const void *_g, const void *_b, int y,
883 enum PixelFormat target, int hasAlpha)
885 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
886 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
887 uint32_t *dest = (uint32_t *) _dest;
888 const uint32_t *r = (const uint32_t *) _r;
889 const uint32_t *g = (const uint32_t *) _g;
890 const uint32_t *b = (const uint32_t *) _b;
893 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
895 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
896 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
899 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
901 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
902 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
904 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
905 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
908 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
909 uint8_t *dest = (uint8_t *) _dest;
910 const uint8_t *r = (const uint8_t *) _r;
911 const uint8_t *g = (const uint8_t *) _g;
912 const uint8_t *b = (const uint8_t *) _b;
914 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
915 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
916 dest[i * 6 + 0] = r_b[Y1];
917 dest[i * 6 + 1] = g[Y1];
918 dest[i * 6 + 2] = b_r[Y1];
919 dest[i * 6 + 3] = r_b[Y2];
920 dest[i * 6 + 4] = g[Y2];
921 dest[i * 6 + 5] = b_r[Y2];
924 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
925 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
926 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
927 uint16_t *dest = (uint16_t *) _dest;
928 const uint16_t *r = (const uint16_t *) _r;
929 const uint16_t *g = (const uint16_t *) _g;
930 const uint16_t *b = (const uint16_t *) _b;
931 int dr1, dg1, db1, dr2, dg2, db2;
933 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
934 dr1 = dither_2x2_8[ y & 1 ][0];
935 dg1 = dither_2x2_4[ y & 1 ][0];
936 db1 = dither_2x2_8[(y & 1) ^ 1][0];
937 dr2 = dither_2x2_8[ y & 1 ][1];
938 dg2 = dither_2x2_4[ y & 1 ][1];
939 db2 = dither_2x2_8[(y & 1) ^ 1][1];
940 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
941 dr1 = dither_2x2_8[ y & 1 ][0];
942 dg1 = dither_2x2_8[ y & 1 ][1];
943 db1 = dither_2x2_8[(y & 1) ^ 1][0];
944 dr2 = dither_2x2_8[ y & 1 ][1];
945 dg2 = dither_2x2_8[ y & 1 ][0];
946 db2 = dither_2x2_8[(y & 1) ^ 1][1];
948 dr1 = dither_4x4_16[ y & 3 ][0];
949 dg1 = dither_4x4_16[ y & 3 ][1];
950 db1 = dither_4x4_16[(y & 3) ^ 3][0];
951 dr2 = dither_4x4_16[ y & 3 ][1];
952 dg2 = dither_4x4_16[ y & 3 ][0];
953 db2 = dither_4x4_16[(y & 3) ^ 3][1];
956 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
957 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
958 } else /* 8/4-bit */ {
959 uint8_t *dest = (uint8_t *) _dest;
960 const uint8_t *r = (const uint8_t *) _r;
961 const uint8_t *g = (const uint8_t *) _g;
962 const uint8_t *b = (const uint8_t *) _b;
963 int dr1, dg1, db1, dr2, dg2, db2;
965 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
966 const uint8_t * const d64 = dither_8x8_73[y & 7];
967 const uint8_t * const d32 = dither_8x8_32[y & 7];
968 dr1 = dg1 = d32[(i * 2 + 0) & 7];
969 db1 = d64[(i * 2 + 0) & 7];
970 dr2 = dg2 = d32[(i * 2 + 1) & 7];
971 db2 = d64[(i * 2 + 1) & 7];
973 const uint8_t * const d64 = dither_8x8_73 [y & 7];
974 const uint8_t * const d128 = dither_8x8_220[y & 7];
975 dr1 = db1 = d128[(i * 2 + 0) & 7];
976 dg1 = d64[(i * 2 + 0) & 7];
977 dr2 = db2 = d128[(i * 2 + 1) & 7];
978 dg2 = d64[(i * 2 + 1) & 7];
981 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
982 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
983 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
985 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
986 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
991 static av_always_inline void
992 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
993 const int16_t **lumSrc, int lumFilterSize,
994 const int16_t *chrFilter, const int16_t **chrUSrc,
995 const int16_t **chrVSrc, int chrFilterSize,
996 const int16_t **alpSrc, uint8_t *dest, int dstW,
997 int y, enum PixelFormat target, int hasAlpha)
1001 for (i = 0; i < (dstW >> 1); i++) {
1007 int av_unused A1, A2;
1008 const void *r, *g, *b;
1010 for (j = 0; j < lumFilterSize; j++) {
1011 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1012 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1014 for (j = 0; j < chrFilterSize; j++) {
1015 U += chrUSrc[j][i] * chrFilter[j];
1016 V += chrVSrc[j][i] * chrFilter[j];
1022 if ((Y1 | Y2 | U | V) & 0x100) {
1023 Y1 = av_clip_uint8(Y1);
1024 Y2 = av_clip_uint8(Y2);
1025 U = av_clip_uint8(U);
1026 V = av_clip_uint8(V);
1031 for (j = 0; j < lumFilterSize; j++) {
1032 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1033 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1037 if ((A1 | A2) & 0x100) {
1038 A1 = av_clip_uint8(A1);
1039 A2 = av_clip_uint8(A2);
1043 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1045 g = (c->table_gU[U] + c->table_gV[V]);
1048 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1049 r, g, b, y, target, hasAlpha);
1053 static av_always_inline void
1054 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1055 const int16_t *ubuf[2], const int16_t *vbuf[2],
1056 const int16_t *abuf[2], uint8_t *dest, int dstW,
1057 int yalpha, int uvalpha, int y,
1058 enum PixelFormat target, int hasAlpha)
1060 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1061 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1062 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1063 *abuf0 = hasAlpha ? abuf[0] : NULL,
1064 *abuf1 = hasAlpha ? abuf[1] : NULL;
1065 int yalpha1 = 4095 - yalpha;
1066 int uvalpha1 = 4095 - uvalpha;
1069 for (i = 0; i < (dstW >> 1); i++) {
1070 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1071 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1072 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1073 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1075 const void *r = c->table_rV[V],
1076 *g = (c->table_gU[U] + c->table_gV[V]),
1077 *b = c->table_bU[U];
1080 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1081 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1084 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1085 r, g, b, y, target, hasAlpha);
1089 static av_always_inline void
1090 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1091 const int16_t *ubuf[2], const int16_t *vbuf[2],
1092 const int16_t *abuf0, uint8_t *dest, int dstW,
1093 int uvalpha, int y, enum PixelFormat target,
1096 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1097 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1100 if (uvalpha < 2048) {
1101 for (i = 0; i < (dstW >> 1); i++) {
1102 int Y1 = buf0[i * 2] >> 7;
1103 int Y2 = buf0[i * 2 + 1] >> 7;
1104 int U = ubuf1[i] >> 7;
1105 int V = vbuf1[i] >> 7;
1107 const void *r = c->table_rV[V],
1108 *g = (c->table_gU[U] + c->table_gV[V]),
1109 *b = c->table_bU[U];
1112 A1 = abuf0[i * 2 ] >> 7;
1113 A2 = abuf0[i * 2 + 1] >> 7;
1116 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1117 r, g, b, y, target, hasAlpha);
1120 for (i = 0; i < (dstW >> 1); i++) {
1121 int Y1 = buf0[i * 2] >> 7;
1122 int Y2 = buf0[i * 2 + 1] >> 7;
1123 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1124 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1126 const void *r = c->table_rV[V],
1127 *g = (c->table_gU[U] + c->table_gV[V]),
1128 *b = c->table_bU[U];
1131 A1 = abuf0[i * 2 ] >> 7;
1132 A2 = abuf0[i * 2 + 1] >> 7;
1135 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1136 r, g, b, y, target, hasAlpha);
1141 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1142 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1143 const int16_t **lumSrc, int lumFilterSize, \
1144 const int16_t *chrFilter, const int16_t **chrUSrc, \
1145 const int16_t **chrVSrc, int chrFilterSize, \
1146 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1149 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1150 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1151 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1153 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1154 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1155 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1156 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1157 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1158 int yalpha, int uvalpha, int y) \
1160 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1161 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1164 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1165 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1166 const int16_t *abuf0, uint8_t *dest, int dstW, \
1167 int uvalpha, int y) \
1169 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1170 dstW, uvalpha, y, fmt, hasAlpha); \
1174 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1175 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1177 #if CONFIG_SWSCALE_ALPHA
1178 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1179 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1181 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1182 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1184 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1185 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1186 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1187 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1188 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1189 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1190 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1191 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1193 static av_always_inline void
1194 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1195 const int16_t **lumSrc, int lumFilterSize,
1196 const int16_t *chrFilter, const int16_t **chrUSrc,
1197 const int16_t **chrVSrc, int chrFilterSize,
1198 const int16_t **alpSrc, uint8_t *dest,
1199 int dstW, int y, enum PixelFormat target, int hasAlpha)
1202 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1204 for (i = 0; i < dstW; i++) {
1212 for (j = 0; j < lumFilterSize; j++) {
1213 Y += lumSrc[j][i] * lumFilter[j];
1215 for (j = 0; j < chrFilterSize; j++) {
1216 U += chrUSrc[j][i] * chrFilter[j];
1217 V += chrVSrc[j][i] * chrFilter[j];
1224 for (j = 0; j < lumFilterSize; j++) {
1225 A += alpSrc[j][i] * lumFilter[j];
1229 A = av_clip_uint8(A);
1231 Y -= c->yuv2rgb_y_offset;
1232 Y *= c->yuv2rgb_y_coeff;
1234 R = Y + V*c->yuv2rgb_v2r_coeff;
1235 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1236 B = Y + U*c->yuv2rgb_u2b_coeff;
1237 if ((R | G | B) & 0xC0000000) {
1238 R = av_clip_uintp2(R, 30);
1239 G = av_clip_uintp2(G, 30);
1240 B = av_clip_uintp2(B, 30);
1245 dest[0] = hasAlpha ? A : 255;
1259 dest[3] = hasAlpha ? A : 255;
1262 dest[0] = hasAlpha ? A : 255;
1277 dest[3] = hasAlpha ? A : 255;
1285 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1286 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1287 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1288 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1290 #if CONFIG_SWSCALE_ALPHA
1291 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1292 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1293 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1294 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1296 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1297 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1298 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1299 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1304 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1305 int width, int height,
1309 uint8_t *ptr = plane + stride*y;
1310 for (i=0; i<height; i++) {
1311 memset(ptr, val, width);
1316 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1318 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1319 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1321 static av_always_inline void
1322 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1323 enum PixelFormat origin)
1326 for (i = 0; i < width; i++) {
1327 unsigned int r_b = input_pixel(&src[i*3+0]);
1328 unsigned int g = input_pixel(&src[i*3+1]);
1329 unsigned int b_r = input_pixel(&src[i*3+2]);
1331 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1335 static av_always_inline void
1336 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1337 const uint16_t *src1, const uint16_t *src2,
1338 int width, enum PixelFormat origin)
1342 for (i = 0; i < width; i++) {
1343 int r_b = input_pixel(&src1[i*3+0]);
1344 int g = input_pixel(&src1[i*3+1]);
1345 int b_r = input_pixel(&src1[i*3+2]);
1347 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1348 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1352 static av_always_inline void
1353 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1354 const uint16_t *src1, const uint16_t *src2,
1355 int width, enum PixelFormat origin)
1359 for (i = 0; i < width; i++) {
1360 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1361 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1362 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1364 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1365 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1373 #define rgb48funcs(pattern, BE_LE, origin) \
1374 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1375 int width, uint32_t *unused) \
1377 const uint16_t *src = (const uint16_t *) _src; \
1378 uint16_t *dst = (uint16_t *) _dst; \
1379 rgb48ToY_c_template(dst, src, width, origin); \
1382 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1383 const uint8_t *_src1, const uint8_t *_src2, \
1384 int width, uint32_t *unused) \
1386 const uint16_t *src1 = (const uint16_t *) _src1, \
1387 *src2 = (const uint16_t *) _src2; \
1388 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1389 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1392 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1393 const uint8_t *_src1, const uint8_t *_src2, \
1394 int width, uint32_t *unused) \
1396 const uint16_t *src1 = (const uint16_t *) _src1, \
1397 *src2 = (const uint16_t *) _src2; \
1398 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1399 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1402 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1403 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1404 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1405 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1407 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1408 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1409 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1411 static av_always_inline void
1412 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1413 int width, enum PixelFormat origin,
1414 int shr, int shg, int shb, int shp,
1415 int maskr, int maskg, int maskb,
1416 int rsh, int gsh, int bsh, int S)
1418 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1419 rnd = 33 << (S - 1);
1422 for (i = 0; i < width; i++) {
1423 int px = input_pixel(i) >> shp;
1424 int b = (px & maskb) >> shb;
1425 int g = (px & maskg) >> shg;
1426 int r = (px & maskr) >> shr;
1428 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1432 static av_always_inline void
1433 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1434 const uint8_t *src, int width,
1435 enum PixelFormat origin,
1436 int shr, int shg, int shb, int shp,
1437 int maskr, int maskg, int maskb,
1438 int rsh, int gsh, int bsh, int S)
1440 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1441 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1442 rnd = 257 << (S - 1);
1445 for (i = 0; i < width; i++) {
1446 int px = input_pixel(i) >> shp;
1447 int b = (px & maskb) >> shb;
1448 int g = (px & maskg) >> shg;
1449 int r = (px & maskr) >> shr;
1451 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1452 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1456 static av_always_inline void
1457 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1458 const uint8_t *src, int width,
1459 enum PixelFormat origin,
1460 int shr, int shg, int shb, int shp,
1461 int maskr, int maskg, int maskb,
1462 int rsh, int gsh, int bsh, int S)
1464 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1465 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1466 rnd = 257 << S, maskgx = ~(maskr | maskb);
1469 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1470 for (i = 0; i < width; i++) {
1471 int px0 = input_pixel(2 * i + 0) >> shp;
1472 int px1 = input_pixel(2 * i + 1) >> shp;
1473 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1474 int rb = px0 + px1 - g;
1476 b = (rb & maskb) >> shb;
1477 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1478 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1481 g = (g & maskg) >> shg;
1483 r = (rb & maskr) >> shr;
1485 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1486 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1492 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1493 maskg, maskb, rsh, gsh, bsh, S) \
1494 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1495 int width, uint32_t *unused) \
1497 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1498 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1501 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1502 const uint8_t *src, const uint8_t *dummy, \
1503 int width, uint32_t *unused) \
1505 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1506 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1509 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1510 const uint8_t *src, const uint8_t *dummy, \
1511 int width, uint32_t *unused) \
1513 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1514 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1517 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1518 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1519 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1520 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1521 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1522 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1523 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1524 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1525 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1526 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1527 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1528 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1530 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1533 for (i=0; i<width; i++) {
1538 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1541 for (i=0; i<width; i++) {
1546 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1549 for (i=0; i<width; i++) {
1552 dst[i]= pal[d] & 0xFF;
1556 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1557 const uint8_t *src1, const uint8_t *src2,
1558 int width, uint32_t *pal)
1561 assert(src1 == src2);
1562 for (i=0; i<width; i++) {
1563 int p= pal[src1[i]];
1570 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1571 int width, uint32_t *unused)
1574 for (i=0; i<width/8; i++) {
1577 dst[8*i+j]= ((d>>(7-j))&1)*255;
1581 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1582 int width, uint32_t *unused)
1585 for (i=0; i<width/8; i++) {
1588 dst[8*i+j]= ((d>>(7-j))&1)*255;
1592 //FIXME yuy2* can read up to 7 samples too much
1594 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1598 for (i=0; i<width; i++)
1602 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1603 const uint8_t *src2, int width, uint32_t *unused)
1606 for (i=0; i<width; i++) {
1607 dstU[i]= src1[4*i + 1];
1608 dstV[i]= src1[4*i + 3];
1610 assert(src1 == src2);
1613 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1616 const uint16_t *src = (const uint16_t *) _src;
1617 uint16_t *dst = (uint16_t *) _dst;
1618 for (i=0; i<width; i++) {
1619 dst[i] = av_bswap16(src[i]);
1623 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1624 const uint8_t *_src2, int width, uint32_t *unused)
1627 const uint16_t *src1 = (const uint16_t *) _src1,
1628 *src2 = (const uint16_t *) _src2;
1629 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1630 for (i=0; i<width; i++) {
1631 dstU[i] = av_bswap16(src1[i]);
1632 dstV[i] = av_bswap16(src2[i]);
1636 /* This is almost identical to the previous, end exists only because
1637 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1638 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1642 for (i=0; i<width; i++)
1646 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1647 const uint8_t *src2, int width, uint32_t *unused)
1650 for (i=0; i<width; i++) {
1651 dstU[i]= src1[4*i + 0];
1652 dstV[i]= src1[4*i + 2];
1654 assert(src1 == src2);
1657 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1658 const uint8_t *src, int width)
1661 for (i = 0; i < width; i++) {
1662 dst1[i] = src[2*i+0];
1663 dst2[i] = src[2*i+1];
1667 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1668 const uint8_t *src1, const uint8_t *src2,
1669 int width, uint32_t *unused)
1671 nvXXtoUV_c(dstU, dstV, src1, width);
1674 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1675 const uint8_t *src1, const uint8_t *src2,
1676 int width, uint32_t *unused)
1678 nvXXtoUV_c(dstV, dstU, src1, width);
1681 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1683 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1684 int width, uint32_t *unused)
1687 for (i=0; i<width; i++) {
1692 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1696 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1697 const uint8_t *src2, int width, uint32_t *unused)
1700 for (i=0; i<width; i++) {
1701 int b= src1[3*i + 0];
1702 int g= src1[3*i + 1];
1703 int r= src1[3*i + 2];
1705 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1706 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1708 assert(src1 == src2);
1711 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1712 const uint8_t *src2, int width, uint32_t *unused)
1715 for (i=0; i<width; i++) {
1716 int b= src1[6*i + 0] + src1[6*i + 3];
1717 int g= src1[6*i + 1] + src1[6*i + 4];
1718 int r= src1[6*i + 2] + src1[6*i + 5];
1720 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1721 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1723 assert(src1 == src2);
1726 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1730 for (i=0; i<width; i++) {
1735 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1739 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1740 const uint8_t *src2, int width, uint32_t *unused)
1744 for (i=0; i<width; i++) {
1745 int r= src1[3*i + 0];
1746 int g= src1[3*i + 1];
1747 int b= src1[3*i + 2];
1749 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1750 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1754 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1755 const uint8_t *src2, int width, uint32_t *unused)
1759 for (i=0; i<width; i++) {
1760 int r= src1[6*i + 0] + src1[6*i + 3];
1761 int g= src1[6*i + 1] + src1[6*i + 4];
1762 int b= src1[6*i + 2] + src1[6*i + 5];
1764 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1765 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1769 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1770 const int16_t *filter,
1771 const int16_t *filterPos, int filterSize)
1774 int32_t *dst = (int32_t *) _dst;
1775 const uint16_t *src = (const uint16_t *) _src;
1776 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1779 for (i = 0; i < dstW; i++) {
1781 int srcPos = filterPos[i];
1784 for (j = 0; j < filterSize; j++) {
1785 val += src[srcPos + j] * filter[filterSize * i + j];
1787 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1788 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1792 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1793 const int16_t *filter,
1794 const int16_t *filterPos, int filterSize)
1797 const uint16_t *src = (const uint16_t *) _src;
1798 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1800 for (i = 0; i < dstW; i++) {
1802 int srcPos = filterPos[i];
1805 for (j = 0; j < filterSize; j++) {
1806 val += src[srcPos + j] * filter[filterSize * i + j];
1808 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1809 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1813 // bilinear / bicubic scaling
1814 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1815 const int16_t *filter, const int16_t *filterPos,
1819 for (i=0; i<dstW; i++) {
1821 int srcPos= filterPos[i];
1823 for (j=0; j<filterSize; j++) {
1824 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1826 //filter += hFilterSize;
1827 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1832 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1833 const int16_t *filter, const int16_t *filterPos,
1837 int32_t *dst = (int32_t *) _dst;
1838 for (i=0; i<dstW; i++) {
1840 int srcPos= filterPos[i];
1842 for (j=0; j<filterSize; j++) {
1843 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1845 //filter += hFilterSize;
1846 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1851 //FIXME all pal and rgb srcFormats could do this convertion as well
1852 //FIXME all scalers more complex than bilinear could do half of this transform
1853 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1856 for (i = 0; i < width; i++) {
1857 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1858 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1861 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1864 for (i = 0; i < width; i++) {
1865 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1866 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1869 static void lumRangeToJpeg_c(int16_t *dst, int width)
1872 for (i = 0; i < width; i++)
1873 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1875 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1878 for (i = 0; i < width; i++)
1879 dst[i] = (dst[i]*14071 + 33561947)>>14;
1882 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1885 int32_t *dstU = (int32_t *) _dstU;
1886 int32_t *dstV = (int32_t *) _dstV;
1887 for (i = 0; i < width; i++) {
1888 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1889 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1892 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1895 int32_t *dstU = (int32_t *) _dstU;
1896 int32_t *dstV = (int32_t *) _dstV;
1897 for (i = 0; i < width; i++) {
1898 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1899 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1902 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1905 int32_t *dst = (int32_t *) _dst;
1906 for (i = 0; i < width; i++)
1907 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1909 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1912 int32_t *dst = (int32_t *) _dst;
1913 for (i = 0; i < width; i++)
1914 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1917 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1918 const uint8_t *src, int srcW, int xInc)
1921 unsigned int xpos=0;
1922 for (i=0;i<dstWidth;i++) {
1923 register unsigned int xx=xpos>>16;
1924 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1925 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1930 // *** horizontal scale Y line to temp buffer
1931 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1932 const uint8_t *src, int srcW, int xInc,
1933 const int16_t *hLumFilter,
1934 const int16_t *hLumFilterPos, int hLumFilterSize,
1935 uint8_t *formatConvBuffer,
1936 uint32_t *pal, int isAlpha)
1938 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1939 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1942 toYV12(formatConvBuffer, src, srcW, pal);
1943 src= formatConvBuffer;
1946 if (!c->hyscale_fast) {
1947 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
1948 } else { // fast bilinear upscale / crap downscale
1949 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
1953 convertRange(dst, dstWidth);
1956 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
1957 int dstWidth, const uint8_t *src1,
1958 const uint8_t *src2, int srcW, int xInc)
1961 unsigned int xpos=0;
1962 for (i=0;i<dstWidth;i++) {
1963 register unsigned int xx=xpos>>16;
1964 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1965 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
1966 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
1971 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
1972 const uint8_t *src1, const uint8_t *src2,
1973 int srcW, int xInc, const int16_t *hChrFilter,
1974 const int16_t *hChrFilterPos, int hChrFilterSize,
1975 uint8_t *formatConvBuffer, uint32_t *pal)
1978 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
1979 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
1980 src1= formatConvBuffer;
1984 if (!c->hcscale_fast) {
1985 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
1986 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
1987 } else { // fast bilinear upscale / crap downscale
1988 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
1991 if (c->chrConvertRange)
1992 c->chrConvertRange(dst1, dst2, dstWidth);
1995 static av_always_inline void
1996 find_c_packed_planar_out_funcs(SwsContext *c,
1997 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2planeX_luma,
1998 yuv2planarX_fn *yuv2planeX_chroma, yuv2interleavedX_fn *yuv2nv12X_chroma,
1999 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2000 yuv2packedX_fn *yuv2packedX)
2002 enum PixelFormat dstFormat = c->dstFormat;
2004 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2005 *yuv2planeX_luma = yuv2yuvX_c;
2006 *yuv2nv12X_chroma = yuv2nv12X_chroma_c;
2007 } else if (is16BPS(dstFormat)) {
2008 *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2009 } else if (is9_OR_10BPS(dstFormat)) {
2010 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2011 *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2013 *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2016 *yuv2yuv1 = yuv2yuv1_c;
2017 *yuv2planeX_luma = *yuv2planeX_chroma = yuv2yuvX_c;
2020 if(c->flags & SWS_FULL_CHR_H_INT) {
2021 switch (dstFormat) {
2024 *yuv2packedX = yuv2rgba32_full_X_c;
2026 #if CONFIG_SWSCALE_ALPHA
2028 *yuv2packedX = yuv2rgba32_full_X_c;
2030 #endif /* CONFIG_SWSCALE_ALPHA */
2032 *yuv2packedX = yuv2rgbx32_full_X_c;
2034 #endif /* !CONFIG_SMALL */
2038 *yuv2packedX = yuv2argb32_full_X_c;
2040 #if CONFIG_SWSCALE_ALPHA
2042 *yuv2packedX = yuv2argb32_full_X_c;
2044 #endif /* CONFIG_SWSCALE_ALPHA */
2046 *yuv2packedX = yuv2xrgb32_full_X_c;
2048 #endif /* !CONFIG_SMALL */
2052 *yuv2packedX = yuv2bgra32_full_X_c;
2054 #if CONFIG_SWSCALE_ALPHA
2056 *yuv2packedX = yuv2bgra32_full_X_c;
2058 #endif /* CONFIG_SWSCALE_ALPHA */
2060 *yuv2packedX = yuv2bgrx32_full_X_c;
2062 #endif /* !CONFIG_SMALL */
2066 *yuv2packedX = yuv2abgr32_full_X_c;
2068 #if CONFIG_SWSCALE_ALPHA
2070 *yuv2packedX = yuv2abgr32_full_X_c;
2072 #endif /* CONFIG_SWSCALE_ALPHA */
2074 *yuv2packedX = yuv2xbgr32_full_X_c;
2076 #endif /* !CONFIG_SMALL */
2079 *yuv2packedX = yuv2rgb24_full_X_c;
2082 *yuv2packedX = yuv2bgr24_full_X_c;
2086 switch (dstFormat) {
2087 case PIX_FMT_GRAY16BE:
2088 *yuv2packed1 = yuv2gray16BE_1_c;
2089 *yuv2packed2 = yuv2gray16BE_2_c;
2090 *yuv2packedX = yuv2gray16BE_X_c;
2092 case PIX_FMT_GRAY16LE:
2093 *yuv2packed1 = yuv2gray16LE_1_c;
2094 *yuv2packed2 = yuv2gray16LE_2_c;
2095 *yuv2packedX = yuv2gray16LE_X_c;
2097 case PIX_FMT_MONOWHITE:
2098 *yuv2packed1 = yuv2monowhite_1_c;
2099 *yuv2packed2 = yuv2monowhite_2_c;
2100 *yuv2packedX = yuv2monowhite_X_c;
2102 case PIX_FMT_MONOBLACK:
2103 *yuv2packed1 = yuv2monoblack_1_c;
2104 *yuv2packed2 = yuv2monoblack_2_c;
2105 *yuv2packedX = yuv2monoblack_X_c;
2107 case PIX_FMT_YUYV422:
2108 *yuv2packed1 = yuv2yuyv422_1_c;
2109 *yuv2packed2 = yuv2yuyv422_2_c;
2110 *yuv2packedX = yuv2yuyv422_X_c;
2112 case PIX_FMT_UYVY422:
2113 *yuv2packed1 = yuv2uyvy422_1_c;
2114 *yuv2packed2 = yuv2uyvy422_2_c;
2115 *yuv2packedX = yuv2uyvy422_X_c;
2117 case PIX_FMT_RGB48LE:
2118 *yuv2packed1 = yuv2rgb48le_1_c;
2119 *yuv2packed2 = yuv2rgb48le_2_c;
2120 *yuv2packedX = yuv2rgb48le_X_c;
2122 case PIX_FMT_RGB48BE:
2123 *yuv2packed1 = yuv2rgb48be_1_c;
2124 *yuv2packed2 = yuv2rgb48be_2_c;
2125 *yuv2packedX = yuv2rgb48be_X_c;
2127 case PIX_FMT_BGR48LE:
2128 *yuv2packed1 = yuv2bgr48le_1_c;
2129 *yuv2packed2 = yuv2bgr48le_2_c;
2130 *yuv2packedX = yuv2bgr48le_X_c;
2132 case PIX_FMT_BGR48BE:
2133 *yuv2packed1 = yuv2bgr48be_1_c;
2134 *yuv2packed2 = yuv2bgr48be_2_c;
2135 *yuv2packedX = yuv2bgr48be_X_c;
2140 *yuv2packed1 = yuv2rgb32_1_c;
2141 *yuv2packed2 = yuv2rgb32_2_c;
2142 *yuv2packedX = yuv2rgb32_X_c;
2144 #if CONFIG_SWSCALE_ALPHA
2146 *yuv2packed1 = yuv2rgba32_1_c;
2147 *yuv2packed2 = yuv2rgba32_2_c;
2148 *yuv2packedX = yuv2rgba32_X_c;
2150 #endif /* CONFIG_SWSCALE_ALPHA */
2152 *yuv2packed1 = yuv2rgbx32_1_c;
2153 *yuv2packed2 = yuv2rgbx32_2_c;
2154 *yuv2packedX = yuv2rgbx32_X_c;
2156 #endif /* !CONFIG_SMALL */
2158 case PIX_FMT_RGB32_1:
2159 case PIX_FMT_BGR32_1:
2161 *yuv2packed1 = yuv2rgb32_1_1_c;
2162 *yuv2packed2 = yuv2rgb32_1_2_c;
2163 *yuv2packedX = yuv2rgb32_1_X_c;
2165 #if CONFIG_SWSCALE_ALPHA
2167 *yuv2packed1 = yuv2rgba32_1_1_c;
2168 *yuv2packed2 = yuv2rgba32_1_2_c;
2169 *yuv2packedX = yuv2rgba32_1_X_c;
2171 #endif /* CONFIG_SWSCALE_ALPHA */
2173 *yuv2packed1 = yuv2rgbx32_1_1_c;
2174 *yuv2packed2 = yuv2rgbx32_1_2_c;
2175 *yuv2packedX = yuv2rgbx32_1_X_c;
2177 #endif /* !CONFIG_SMALL */
2180 *yuv2packed1 = yuv2rgb24_1_c;
2181 *yuv2packed2 = yuv2rgb24_2_c;
2182 *yuv2packedX = yuv2rgb24_X_c;
2185 *yuv2packed1 = yuv2bgr24_1_c;
2186 *yuv2packed2 = yuv2bgr24_2_c;
2187 *yuv2packedX = yuv2bgr24_X_c;
2189 case PIX_FMT_RGB565LE:
2190 case PIX_FMT_RGB565BE:
2191 case PIX_FMT_BGR565LE:
2192 case PIX_FMT_BGR565BE:
2193 *yuv2packed1 = yuv2rgb16_1_c;
2194 *yuv2packed2 = yuv2rgb16_2_c;
2195 *yuv2packedX = yuv2rgb16_X_c;
2197 case PIX_FMT_RGB555LE:
2198 case PIX_FMT_RGB555BE:
2199 case PIX_FMT_BGR555LE:
2200 case PIX_FMT_BGR555BE:
2201 *yuv2packed1 = yuv2rgb15_1_c;
2202 *yuv2packed2 = yuv2rgb15_2_c;
2203 *yuv2packedX = yuv2rgb15_X_c;
2205 case PIX_FMT_RGB444LE:
2206 case PIX_FMT_RGB444BE:
2207 case PIX_FMT_BGR444LE:
2208 case PIX_FMT_BGR444BE:
2209 *yuv2packed1 = yuv2rgb12_1_c;
2210 *yuv2packed2 = yuv2rgb12_2_c;
2211 *yuv2packedX = yuv2rgb12_X_c;
2215 *yuv2packed1 = yuv2rgb8_1_c;
2216 *yuv2packed2 = yuv2rgb8_2_c;
2217 *yuv2packedX = yuv2rgb8_X_c;
2221 *yuv2packed1 = yuv2rgb4_1_c;
2222 *yuv2packed2 = yuv2rgb4_2_c;
2223 *yuv2packedX = yuv2rgb4_X_c;
2225 case PIX_FMT_RGB4_BYTE:
2226 case PIX_FMT_BGR4_BYTE:
2227 *yuv2packed1 = yuv2rgb4b_1_c;
2228 *yuv2packed2 = yuv2rgb4b_2_c;
2229 *yuv2packedX = yuv2rgb4b_X_c;
2235 #define DEBUG_SWSCALE_BUFFERS 0
2236 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2238 static int swScale(SwsContext *c, const uint8_t* src[],
2239 int srcStride[], int srcSliceY,
2240 int srcSliceH, uint8_t* dst[], int dstStride[])
2242 /* load a few things into local vars to make the code more readable? and faster */
2243 const int srcW= c->srcW;
2244 const int dstW= c->dstW;
2245 const int dstH= c->dstH;
2246 const int chrDstW= c->chrDstW;
2247 const int chrSrcW= c->chrSrcW;
2248 const int lumXInc= c->lumXInc;
2249 const int chrXInc= c->chrXInc;
2250 const enum PixelFormat dstFormat= c->dstFormat;
2251 const int flags= c->flags;
2252 int16_t *vLumFilterPos= c->vLumFilterPos;
2253 int16_t *vChrFilterPos= c->vChrFilterPos;
2254 int16_t *hLumFilterPos= c->hLumFilterPos;
2255 int16_t *hChrFilterPos= c->hChrFilterPos;
2256 int16_t *vLumFilter= c->vLumFilter;
2257 int16_t *vChrFilter= c->vChrFilter;
2258 int16_t *hLumFilter= c->hLumFilter;
2259 int16_t *hChrFilter= c->hChrFilter;
2260 int32_t *lumMmxFilter= c->lumMmxFilter;
2261 int32_t *chrMmxFilter= c->chrMmxFilter;
2262 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2263 const int vLumFilterSize= c->vLumFilterSize;
2264 const int vChrFilterSize= c->vChrFilterSize;
2265 const int hLumFilterSize= c->hLumFilterSize;
2266 const int hChrFilterSize= c->hChrFilterSize;
2267 int16_t **lumPixBuf= c->lumPixBuf;
2268 int16_t **chrUPixBuf= c->chrUPixBuf;
2269 int16_t **chrVPixBuf= c->chrVPixBuf;
2270 int16_t **alpPixBuf= c->alpPixBuf;
2271 const int vLumBufSize= c->vLumBufSize;
2272 const int vChrBufSize= c->vChrBufSize;
2273 uint8_t *formatConvBuffer= c->formatConvBuffer;
2274 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2275 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2277 uint32_t *pal=c->pal_yuv;
2278 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2279 yuv2planarX_fn yuv2planeX_luma = c->yuv2planeX_luma;
2280 yuv2planarX_fn yuv2planeX_chroma = c->yuv2planeX_chroma;
2281 yuv2interleavedX_fn yuv2nv12X_chroma = c->yuv2nv12X_chroma;
2282 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2283 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2284 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2285 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2287 /* vars which will change and which we need to store back in the context */
2289 int lumBufIndex= c->lumBufIndex;
2290 int chrBufIndex= c->chrBufIndex;
2291 int lastInLumBuf= c->lastInLumBuf;
2292 int lastInChrBuf= c->lastInChrBuf;
2294 if (isPacked(c->srcFormat)) {
2302 srcStride[3]= srcStride[0];
2304 srcStride[1]<<= c->vChrDrop;
2305 srcStride[2]<<= c->vChrDrop;
2307 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2308 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2309 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2310 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2311 srcSliceY, srcSliceH, dstY, dstH);
2312 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2313 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2315 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2316 static int warnedAlready=0; //FIXME move this into the context perhaps
2317 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2318 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2319 " ->cannot do aligned memory accesses anymore\n");
2324 /* Note the user might start scaling the picture in the middle so this
2325 will not get executed. This is not really intended but works
2326 currently, so people might do it. */
2327 if (srcSliceY ==0) {
2335 if (!should_dither) {
2336 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2340 for (;dstY < dstH; dstY++) {
2341 const int chrDstY= dstY>>c->chrDstVSubSample;
2342 uint8_t *dest[4] = {
2343 dst[0] + dstStride[0] * dstY,
2344 dst[1] + dstStride[1] * chrDstY,
2345 dst[2] + dstStride[2] * chrDstY,
2346 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2349 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2350 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2351 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2352 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2353 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2354 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2357 //handle holes (FAST_BILINEAR & weird filters)
2358 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2359 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2360 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2361 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2363 DEBUG_BUFFERS("dstY: %d\n", dstY);
2364 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2365 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2366 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2367 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2369 // Do we have enough lines in this slice to output the dstY line
2370 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2372 if (!enough_lines) {
2373 lastLumSrcY = srcSliceY + srcSliceH - 1;
2374 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2375 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2376 lastLumSrcY, lastChrSrcY);
2379 //Do horizontal scaling
2380 while(lastInLumBuf < lastLumSrcY) {
2381 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2382 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2384 assert(lumBufIndex < 2*vLumBufSize);
2385 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2386 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2387 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2388 hLumFilter, hLumFilterPos, hLumFilterSize,
2391 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2392 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2393 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2397 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2398 lumBufIndex, lastInLumBuf);
2400 while(lastInChrBuf < lastChrSrcY) {
2401 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2402 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2404 assert(chrBufIndex < 2*vChrBufSize);
2405 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2406 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2407 //FIXME replace parameters through context struct (some at least)
2409 if (c->needs_hcscale)
2410 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2411 chrDstW, src1, src2, chrSrcW, chrXInc,
2412 hChrFilter, hChrFilterPos, hChrFilterSize,
2413 formatConvBuffer, pal);
2415 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2416 chrBufIndex, lastInChrBuf);
2418 //wrap buf index around to stay inside the ring buffer
2419 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2420 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2422 break; //we can't output a dstY line so let's try with the next slice
2425 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2427 if (should_dither) {
2428 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2429 c->lumDither8 = dither_8x8_128[dstY & 7];
2431 if (dstY >= dstH-2) {
2432 // hmm looks like we can't use MMX here without overwriting this array's tail
2433 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2planeX_luma, &yuv2planeX_chroma,
2434 &yuv2nv12X_chroma, &yuv2packed1, &yuv2packed2,
2439 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2440 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2441 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2442 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2443 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2444 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2445 if ((dstY&chrSkipMask) || isGray(dstFormat))
2446 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2447 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2449 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2450 yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2453 yuv2nv12X_chroma(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2455 } else if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2456 yuv2yuv1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2459 yuv2yuv1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2460 yuv2yuv1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2463 if (alpBuf && dest[3])
2464 yuv2yuv1(alpBuf, dest[3], dstW, c->lumDither8, 0);
2465 } else { //General YV12
2466 yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2469 yuv2planeX_chroma(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2470 yuv2planeX_chroma(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2473 if (alpBuf && dest[3])
2474 yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2477 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2478 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2479 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2480 int chrAlpha = vChrFilter[2 * dstY + 1];
2481 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2482 alpPixBuf ? *alpSrcPtr : NULL,
2483 dest[0], dstW, chrAlpha, dstY);
2484 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2485 int lumAlpha = vLumFilter[2 * dstY + 1];
2486 int chrAlpha = vChrFilter[2 * dstY + 1];
2488 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2490 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2491 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2492 alpPixBuf ? alpSrcPtr : NULL,
2493 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2494 } else { //general RGB
2495 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2496 lumSrcPtr, vLumFilterSize,
2497 vChrFilter + dstY * vChrFilterSize,
2498 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2499 alpSrcPtr, dest[0], dstW, dstY);
2505 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2506 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2509 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2510 __asm__ volatile("sfence":::"memory");
2514 /* store changed local vars back in the context */
2516 c->lumBufIndex= lumBufIndex;
2517 c->chrBufIndex= chrBufIndex;
2518 c->lastInLumBuf= lastInLumBuf;
2519 c->lastInChrBuf= lastInChrBuf;
2521 return dstY - lastDstY;
2524 static av_cold void sws_init_swScale_c(SwsContext *c)
2526 enum PixelFormat srcFormat = c->srcFormat;
2528 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2planeX_luma, &c->yuv2planeX_chroma,
2529 &c->yuv2nv12X_chroma, &c->yuv2packed1, &c->yuv2packed2,
2532 c->chrToYV12 = NULL;
2534 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2535 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2536 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2537 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2541 case PIX_FMT_BGR4_BYTE:
2542 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2544 case PIX_FMT_YUV444P9LE:
2545 case PIX_FMT_YUV422P9LE:
2546 case PIX_FMT_YUV420P9LE:
2547 case PIX_FMT_YUV422P10LE:
2548 case PIX_FMT_YUV444P10LE:
2549 case PIX_FMT_YUV420P10LE:
2550 case PIX_FMT_YUV420P16LE:
2551 case PIX_FMT_YUV422P16LE:
2552 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2554 case PIX_FMT_YUV444P9BE:
2555 case PIX_FMT_YUV422P9BE:
2556 case PIX_FMT_YUV420P9BE:
2557 case PIX_FMT_YUV444P10BE:
2558 case PIX_FMT_YUV422P10BE:
2559 case PIX_FMT_YUV420P10BE:
2560 case PIX_FMT_YUV420P16BE:
2561 case PIX_FMT_YUV422P16BE:
2562 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2565 if (c->chrSrcHSubSample) {
2567 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2568 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2569 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2570 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2571 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2572 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2573 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2574 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2575 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2576 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2577 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2578 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2579 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2580 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2581 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2582 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2583 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2584 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2588 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2589 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2590 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2591 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2592 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2593 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2594 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2595 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2596 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2597 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2598 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2599 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2600 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2601 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2602 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2603 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2604 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2605 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2609 c->lumToYV12 = NULL;
2610 c->alpToYV12 = NULL;
2611 switch (srcFormat) {
2613 case PIX_FMT_YUV444P9LE:
2614 case PIX_FMT_YUV422P9LE:
2615 case PIX_FMT_YUV420P9LE:
2616 case PIX_FMT_YUV444P10LE:
2617 case PIX_FMT_YUV422P10LE:
2618 case PIX_FMT_YUV420P10LE:
2619 case PIX_FMT_YUV420P16LE:
2620 case PIX_FMT_YUV422P16LE:
2621 case PIX_FMT_YUV444P16LE:
2622 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2624 case PIX_FMT_YUV444P9BE:
2625 case PIX_FMT_YUV422P9BE:
2626 case PIX_FMT_YUV420P9BE:
2627 case PIX_FMT_YUV444P10BE:
2628 case PIX_FMT_YUV422P10BE:
2629 case PIX_FMT_YUV420P10BE:
2630 case PIX_FMT_YUV420P16BE:
2631 case PIX_FMT_YUV422P16BE:
2632 case PIX_FMT_YUV444P16BE:
2633 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2635 case PIX_FMT_YUYV422 :
2636 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2637 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2638 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2639 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2640 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2641 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2642 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2643 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2644 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2645 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2646 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2647 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2651 case PIX_FMT_BGR4_BYTE:
2652 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2653 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2654 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2655 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2656 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2657 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2658 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2659 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2660 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2661 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2662 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2665 switch (srcFormat) {
2667 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2669 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2670 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2674 if (c->srcBpc == 8) {
2675 if (c->dstBpc <= 10) {
2676 c->hyScale = c->hcScale = hScale8To15_c;
2677 if (c->flags & SWS_FAST_BILINEAR) {
2678 c->hyscale_fast = hyscale_fast_c;
2679 c->hcscale_fast = hcscale_fast_c;
2682 c->hyScale = c->hcScale = hScale8To19_c;
2685 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2688 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2689 if (c->dstBpc <= 10) {
2691 c->lumConvertRange = lumRangeFromJpeg_c;
2692 c->chrConvertRange = chrRangeFromJpeg_c;
2694 c->lumConvertRange = lumRangeToJpeg_c;
2695 c->chrConvertRange = chrRangeToJpeg_c;
2699 c->lumConvertRange = lumRangeFromJpeg16_c;
2700 c->chrConvertRange = chrRangeFromJpeg16_c;
2702 c->lumConvertRange = lumRangeToJpeg16_c;
2703 c->chrConvertRange = chrRangeToJpeg16_c;
2708 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2709 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2710 c->needs_hcscale = 1;
2713 SwsFunc ff_getSwsFunc(SwsContext *c)
2715 sws_init_swScale_c(c);
2718 ff_sws_init_swScale_mmx(c);
2720 ff_sws_init_swScale_altivec(c);