2 * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/avutil.h"
28 #include "libavutil/bswap.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/pixdesc.h"
36 #include "swscale_internal.h"
38 #define RGB2YUV_SHIFT 15
39 #define BY ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
40 #define BV (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
41 #define BU ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
42 #define GY ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
43 #define GV (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
44 #define GU (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
45 #define RY ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
46 #define RV ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
47 #define RU (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
49 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
51 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
52 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
54 static av_always_inline void
55 rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
56 enum PixelFormat origin)
59 for (i = 0; i < width; i++) {
60 unsigned int r_b = input_pixel(&src[i*4+0]);
61 unsigned int g = input_pixel(&src[i*4+1]);
62 unsigned int b_r = input_pixel(&src[i*4+2]);
64 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
68 static av_always_inline void
69 rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
70 const uint16_t *src1, const uint16_t *src2,
71 int width, enum PixelFormat origin)
75 for (i = 0; i < width; i++) {
76 int r_b = input_pixel(&src1[i*4+0]);
77 int g = input_pixel(&src1[i*4+1]);
78 int b_r = input_pixel(&src1[i*4+2]);
80 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
81 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
85 static av_always_inline void
86 rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
87 const uint16_t *src1, const uint16_t *src2,
88 int width, enum PixelFormat origin)
92 for (i = 0; i < width; i++) {
93 int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
94 int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
95 int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
97 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
98 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
102 #define rgb64funcs(pattern, BE_LE, origin) \
103 static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
104 int width, uint32_t *unused) \
106 const uint16_t *src = (const uint16_t *) _src; \
107 uint16_t *dst = (uint16_t *) _dst; \
108 rgb64ToY_c_template(dst, src, width, origin); \
111 static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
112 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
113 int width, uint32_t *unused) \
115 const uint16_t *src1 = (const uint16_t *) _src1, \
116 *src2 = (const uint16_t *) _src2; \
117 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
118 rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
121 static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
122 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
123 int width, uint32_t *unused) \
125 const uint16_t *src1 = (const uint16_t *) _src1, \
126 *src2 = (const uint16_t *) _src2; \
127 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
128 rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
131 rgb64funcs(rgb, LE, PIX_FMT_RGBA64LE)
132 rgb64funcs(rgb, BE, PIX_FMT_RGBA64BE)
134 static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
135 const uint16_t *src, int width,
136 enum PixelFormat origin)
139 for (i = 0; i < width; i++) {
140 unsigned int r_b = input_pixel(&src[i * 3 + 0]);
141 unsigned int g = input_pixel(&src[i * 3 + 1]);
142 unsigned int b_r = input_pixel(&src[i * 3 + 2]);
144 dst[i] = (RY * r + GY * g + BY * b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
148 static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
150 const uint16_t *src1,
151 const uint16_t *src2,
153 enum PixelFormat origin)
156 assert(src1 == src2);
157 for (i = 0; i < width; i++) {
158 int r_b = input_pixel(&src1[i * 3 + 0]);
159 int g = input_pixel(&src1[i * 3 + 1]);
160 int b_r = input_pixel(&src1[i * 3 + 2]);
162 dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
163 dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
167 static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
169 const uint16_t *src1,
170 const uint16_t *src2,
172 enum PixelFormat origin)
175 assert(src1 == src2);
176 for (i = 0; i < width; i++) {
177 int r_b = (input_pixel(&src1[6 * i + 0]) +
178 input_pixel(&src1[6 * i + 3]) + 1) >> 1;
179 int g = (input_pixel(&src1[6 * i + 1]) +
180 input_pixel(&src1[6 * i + 4]) + 1) >> 1;
181 int b_r = (input_pixel(&src1[6 * i + 2]) +
182 input_pixel(&src1[6 * i + 5]) + 1) >> 1;
184 dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
185 dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
193 #define rgb48funcs(pattern, BE_LE, origin) \
194 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
195 const uint8_t *_src, \
196 const uint8_t *unused0, const uint8_t *unused1,\
200 const uint16_t *src = (const uint16_t *)_src; \
201 uint16_t *dst = (uint16_t *)_dst; \
202 rgb48ToY_c_template(dst, src, width, origin); \
205 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
207 const uint8_t *unused0, \
208 const uint8_t *_src1, \
209 const uint8_t *_src2, \
213 const uint16_t *src1 = (const uint16_t *)_src1, \
214 *src2 = (const uint16_t *)_src2; \
215 uint16_t *dstU = (uint16_t *)_dstU, \
216 *dstV = (uint16_t *)_dstV; \
217 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
220 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
222 const uint8_t *unused0, \
223 const uint8_t *_src1, \
224 const uint8_t *_src2, \
228 const uint16_t *src1 = (const uint16_t *)_src1, \
229 *src2 = (const uint16_t *)_src2; \
230 uint16_t *dstU = (uint16_t *)_dstU, \
231 *dstV = (uint16_t *)_dstV; \
232 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
235 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
236 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
237 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
238 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
240 #define input_pixel(i) ((origin == PIX_FMT_RGBA || \
241 origin == PIX_FMT_BGRA || \
242 origin == PIX_FMT_ARGB || \
243 origin == PIX_FMT_ABGR) \
244 ? AV_RN32A(&src[(i) * 4]) \
245 : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \
246 : AV_RL16(&src[(i) * 2])))
248 static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
251 enum PixelFormat origin,
254 int maskr, int maskg,
256 int gsh, int bsh, int S)
258 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
259 const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
262 for (i = 0; i < width; i++) {
263 int px = input_pixel(i) >> shp;
264 int b = (px & maskb) >> shb;
265 int g = (px & maskg) >> shg;
266 int r = (px & maskr) >> shr;
268 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
272 static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
276 enum PixelFormat origin,
279 int maskr, int maskg,
281 int gsh, int bsh, int S)
283 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
284 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
285 const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
288 for (i = 0; i < width; i++) {
289 int px = input_pixel(i) >> shp;
290 int b = (px & maskb) >> shb;
291 int g = (px & maskg) >> shg;
292 int r = (px & maskr) >> shr;
294 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
295 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
299 static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
303 enum PixelFormat origin,
306 int maskr, int maskg,
308 int gsh, int bsh, int S)
310 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
311 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
312 maskgx = ~(maskr | maskb);
313 const unsigned rnd = (256U<<(S)) + (1<<(S-6));
319 for (i = 0; i < width; i++) {
320 int px0 = input_pixel(2 * i + 0) >> shp;
321 int px1 = input_pixel(2 * i + 1) >> shp;
322 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
323 int rb = px0 + px1 - g;
325 b = (rb & maskb) >> shb;
327 origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
328 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
331 g = (g & maskg) >> shg;
333 r = (rb & maskr) >> shr;
335 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
336 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
342 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
343 maskg, maskb, rsh, gsh, bsh, S) \
344 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
345 int width, uint32_t *unused) \
347 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \
348 maskr, maskg, maskb, rsh, gsh, bsh, S); \
351 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
352 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
353 int width, uint32_t *unused) \
355 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
356 shr, shg, shb, shp, \
357 maskr, maskg, maskb, rsh, gsh, bsh, S); \
360 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
361 const uint8_t *unused0, const uint8_t *src, \
362 const uint8_t *dummy, \
363 int width, uint32_t *unused) \
365 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
366 shr, shg, shb, shp, \
367 maskr, maskg, maskb, \
371 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
372 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
373 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
374 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
375 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
376 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
377 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
378 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
379 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
380 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
381 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
382 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
383 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
384 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
385 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
386 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
388 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
389 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
390 int width, enum PixelFormat origin)
393 for (i = 0; i < width; i++) {
394 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
395 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
396 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
398 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
399 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
403 static void rgba64ToA_c(int16_t *dst, const uint16_t *src, const uint8_t *unused1,
404 const uint8_t *unused2, int width, uint32_t *unused)
407 for (i = 0; i < width; i++)
408 dst[i] = src[4 * i + 3];
411 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
414 for (i=0; i<width; i++) {
419 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
422 for (i=0; i<width; i++) {
423 dst[i]= src[4*i+3]<<6;
427 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
430 for (i=0; i<width; i++) {
433 dst[i]= (pal[d] >> 24)<<6;
437 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
440 for (i = 0; i < width; i++) {
443 dst[i] = (pal[d] & 0xFF)<<6;
447 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
448 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
449 int width, uint32_t *pal)
452 assert(src1 == src2);
453 for (i = 0; i < width; i++) {
454 int p = pal[src1[i]];
456 dstU[i] = (uint8_t)(p>> 8)<<6;
457 dstV[i] = (uint8_t)(p>>16)<<6;
461 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
464 width = (width + 7) >> 3;
465 for (i = 0; i < width; i++) {
467 for (j = 0; j < 8; j++)
468 dst[8*i+j]= ((d>>(7-j))&1) * 16383;
472 for (j = 0; j < (width&7); j++)
473 dst[8*i+j]= ((d>>(7-j))&1) * 16383;
477 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
480 width = (width + 7) >> 3;
481 for (i = 0; i < width; i++) {
483 for (j = 0; j < 8; j++)
484 dst[8*i+j]= ((d>>(7-j))&1) * 16383;
488 for (j = 0; j < (width&7); j++)
489 dst[8*i+j] = ((d>>(7-j))&1) * 16383;
493 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
497 for (i = 0; i < width; i++)
501 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
502 const uint8_t *src2, int width, uint32_t *unused)
505 for (i = 0; i < width; i++) {
506 dstU[i] = src1[4 * i + 1];
507 dstV[i] = src1[4 * i + 3];
509 assert(src1 == src2);
512 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width,
516 const uint16_t *src = (const uint16_t *)_src;
517 uint16_t *dst = (uint16_t *)_dst;
518 for (i = 0; i < width; i++)
519 dst[i] = av_bswap16(src[i]);
522 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
523 const uint8_t *_src2, int width, uint32_t *unused)
526 const uint16_t *src1 = (const uint16_t *)_src1,
527 *src2 = (const uint16_t *)_src2;
528 uint16_t *dstU = (uint16_t *)_dstU, *dstV = (uint16_t *)_dstV;
529 for (i = 0; i < width; i++) {
530 dstU[i] = av_bswap16(src1[i]);
531 dstV[i] = av_bswap16(src2[i]);
535 /* This is almost identical to the previous, end exists only because
536 * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */
537 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
541 for (i = 0; i < width; i++)
542 dst[i] = src[2 * i + 1];
545 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
546 const uint8_t *src2, int width, uint32_t *unused)
549 for (i = 0; i < width; i++) {
550 dstU[i] = src1[4 * i + 0];
551 dstV[i] = src1[4 * i + 2];
553 assert(src1 == src2);
556 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
557 const uint8_t *src, int width)
560 for (i = 0; i < width; i++) {
561 dst1[i] = src[2 * i + 0];
562 dst2[i] = src[2 * i + 1];
566 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
567 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
568 int width, uint32_t *unused)
570 nvXXtoUV_c(dstU, dstV, src1, width);
573 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
574 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
575 int width, uint32_t *unused)
577 nvXXtoUV_c(dstV, dstU, src1, width);
580 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
582 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
583 int width, uint32_t *unused)
586 for (i = 0; i < width; i++) {
587 int b = src[i * 3 + 0];
588 int g = src[i * 3 + 1];
589 int r = src[i * 3 + 2];
591 dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
595 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
596 const uint8_t *src2, int width, uint32_t *unused)
599 for (i = 0; i < width; i++) {
600 int b = src1[3 * i + 0];
601 int g = src1[3 * i + 1];
602 int r = src1[3 * i + 2];
604 dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
605 dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
607 assert(src1 == src2);
610 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
611 const uint8_t *src2, int width, uint32_t *unused)
614 for (i = 0; i < width; i++) {
615 int b = src1[6 * i + 0] + src1[6 * i + 3];
616 int g = src1[6 * i + 1] + src1[6 * i + 4];
617 int r = src1[6 * i + 2] + src1[6 * i + 5];
619 dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
620 dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
622 assert(src1 == src2);
625 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
629 for (i = 0; i < width; i++) {
630 int r = src[i * 3 + 0];
631 int g = src[i * 3 + 1];
632 int b = src[i * 3 + 2];
634 dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
638 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
639 const uint8_t *src2, int width, uint32_t *unused)
642 assert(src1 == src2);
643 for (i = 0; i < width; i++) {
644 int r = src1[3 * i + 0];
645 int g = src1[3 * i + 1];
646 int b = src1[3 * i + 2];
648 dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
649 dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
653 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
654 const uint8_t *src2, int width, uint32_t *unused)
657 assert(src1 == src2);
658 for (i = 0; i < width; i++) {
659 int r = src1[6 * i + 0] + src1[6 * i + 3];
660 int g = src1[6 * i + 1] + src1[6 * i + 4];
661 int b = src1[6 * i + 2] + src1[6 * i + 5];
663 dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
664 dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
668 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
671 for (i = 0; i < width; i++) {
676 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
680 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
683 for (i = 0; i < width; i++) {
688 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
689 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
694 is_be ? AV_RB16(src) : AV_RL16(src)
695 static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4],
696 int width, int bpc, int is_be)
699 const uint16_t **src = (const uint16_t **)_src;
700 uint16_t *dst = (uint16_t *)_dst;
701 for (i = 0; i < width; i++) {
702 int g = rdpx(src[0] + i);
703 int b = rdpx(src[1] + i);
704 int r = rdpx(src[2] + i);
706 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> RGB2YUV_SHIFT);
710 static void planar_rgb9le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
712 planar_rgb16_to_y(dst, src, w, 9, 0);
715 static void planar_rgb9be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
717 planar_rgb16_to_y(dst, src, w, 9, 1);
720 static void planar_rgb10le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
722 planar_rgb16_to_y(dst, src, w, 10, 0);
725 static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
727 planar_rgb16_to_y(dst, src, w, 10, 1);
730 static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
732 planar_rgb16_to_y(dst, src, w, 16, 0);
735 static void planar_rgb16be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
737 planar_rgb16_to_y(dst, src, w, 16, 1);
740 static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV,
741 const uint8_t *_src[4], int width,
745 const uint16_t **src = (const uint16_t **)_src;
746 uint16_t *dstU = (uint16_t *)_dstU;
747 uint16_t *dstV = (uint16_t *)_dstV;
748 for (i = 0; i < width; i++) {
749 int g = rdpx(src[0] + i);
750 int b = rdpx(src[1] + i);
751 int r = rdpx(src[2] + i);
753 dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> RGB2YUV_SHIFT;
754 dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> RGB2YUV_SHIFT;
759 static void planar_rgb9le_to_uv(uint8_t *dstU, uint8_t *dstV,
760 const uint8_t *src[4], int w)
762 planar_rgb16_to_uv(dstU, dstV, src, w, 9, 0);
765 static void planar_rgb9be_to_uv(uint8_t *dstU, uint8_t *dstV,
766 const uint8_t *src[4], int w)
768 planar_rgb16_to_uv(dstU, dstV, src, w, 9, 1);
771 static void planar_rgb10le_to_uv(uint8_t *dstU, uint8_t *dstV,
772 const uint8_t *src[4], int w)
774 planar_rgb16_to_uv(dstU, dstV, src, w, 10, 0);
777 static void planar_rgb10be_to_uv(uint8_t *dstU, uint8_t *dstV,
778 const uint8_t *src[4], int w)
780 planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1);
783 static void planar_rgb16le_to_uv(uint8_t *dstU, uint8_t *dstV,
784 const uint8_t *src[4], int w)
786 planar_rgb16_to_uv(dstU, dstV, src, w, 16, 0);
789 static void planar_rgb16be_to_uv(uint8_t *dstU, uint8_t *dstV,
790 const uint8_t *src[4], int w)
792 planar_rgb16_to_uv(dstU, dstV, src, w, 16, 1);
795 av_cold void ff_sws_init_input_funcs(SwsContext *c)
797 enum PixelFormat srcFormat = c->srcFormat;
801 case PIX_FMT_YUYV422:
802 c->chrToYV12 = yuy2ToUV_c;
804 case PIX_FMT_UYVY422:
805 c->chrToYV12 = uyvyToUV_c;
808 c->chrToYV12 = nv12ToUV_c;
811 c->chrToYV12 = nv21ToUV_c;
816 case PIX_FMT_BGR4_BYTE:
817 case PIX_FMT_RGB4_BYTE:
818 c->chrToYV12 = palToUV_c;
820 case PIX_FMT_GBRP9LE:
821 c->readChrPlanar = planar_rgb9le_to_uv;
823 case PIX_FMT_GBRP10LE:
824 c->readChrPlanar = planar_rgb10le_to_uv;
826 case PIX_FMT_GBRP16LE:
827 c->readChrPlanar = planar_rgb16le_to_uv;
829 case PIX_FMT_GBRP9BE:
830 c->readChrPlanar = planar_rgb9be_to_uv;
832 case PIX_FMT_GBRP10BE:
833 c->readChrPlanar = planar_rgb10be_to_uv;
835 case PIX_FMT_GBRP16BE:
836 c->readChrPlanar = planar_rgb16be_to_uv;
839 c->readChrPlanar = planar_rgb_to_uv;
842 case PIX_FMT_YUV444P9LE:
843 case PIX_FMT_YUV422P9LE:
844 case PIX_FMT_YUV420P9LE:
845 case PIX_FMT_YUV422P10LE:
846 case PIX_FMT_YUV444P10LE:
847 case PIX_FMT_YUV420P10LE:
848 case PIX_FMT_YUV420P16LE:
849 case PIX_FMT_YUV422P16LE:
850 case PIX_FMT_YUV444P16LE:
851 c->chrToYV12 = bswap16UV_c;
854 case PIX_FMT_YUV444P9BE:
855 case PIX_FMT_YUV422P9BE:
856 case PIX_FMT_YUV420P9BE:
857 case PIX_FMT_YUV444P10BE:
858 case PIX_FMT_YUV422P10BE:
859 case PIX_FMT_YUV420P10BE:
860 case PIX_FMT_YUV420P16BE:
861 case PIX_FMT_YUV422P16BE:
862 case PIX_FMT_YUV444P16BE:
863 c->chrToYV12 = bswap16UV_c;
867 if (c->chrSrcHSubSample) {
869 case PIX_FMT_RGBA64BE:
870 c->chrToYV12 = rgb64BEToUV_half_c;
872 case PIX_FMT_RGBA64LE:
873 c->chrToYV12 = rgb64LEToUV_half_c;
875 case PIX_FMT_RGB48BE:
876 c->chrToYV12 = rgb48BEToUV_half_c;
878 case PIX_FMT_RGB48LE:
879 c->chrToYV12 = rgb48LEToUV_half_c;
881 case PIX_FMT_BGR48BE:
882 c->chrToYV12 = bgr48BEToUV_half_c;
884 case PIX_FMT_BGR48LE:
885 c->chrToYV12 = bgr48LEToUV_half_c;
888 c->chrToYV12 = bgr32ToUV_half_c;
890 case PIX_FMT_RGB32_1:
891 c->chrToYV12 = bgr321ToUV_half_c;
894 c->chrToYV12 = bgr24ToUV_half_c;
896 case PIX_FMT_BGR565LE:
897 c->chrToYV12 = bgr16leToUV_half_c;
899 case PIX_FMT_BGR565BE:
900 c->chrToYV12 = bgr16beToUV_half_c;
902 case PIX_FMT_BGR555LE:
903 c->chrToYV12 = bgr15leToUV_half_c;
905 case PIX_FMT_BGR555BE:
906 c->chrToYV12 = bgr15beToUV_half_c;
908 case PIX_FMT_GBR24P :
909 c->chrToYV12 = gbr24pToUV_half_c;
911 case PIX_FMT_BGR444LE:
912 c->chrToYV12 = bgr12leToUV_half_c;
914 case PIX_FMT_BGR444BE:
915 c->chrToYV12 = bgr12beToUV_half_c;
918 c->chrToYV12 = rgb32ToUV_half_c;
920 case PIX_FMT_BGR32_1:
921 c->chrToYV12 = rgb321ToUV_half_c;
924 c->chrToYV12 = rgb24ToUV_half_c;
926 case PIX_FMT_RGB565LE:
927 c->chrToYV12 = rgb16leToUV_half_c;
929 case PIX_FMT_RGB565BE:
930 c->chrToYV12 = rgb16beToUV_half_c;
932 case PIX_FMT_RGB555LE:
933 c->chrToYV12 = rgb15leToUV_half_c;
935 case PIX_FMT_RGB555BE:
936 c->chrToYV12 = rgb15beToUV_half_c;
938 case PIX_FMT_RGB444LE:
939 c->chrToYV12 = rgb12leToUV_half_c;
941 case PIX_FMT_RGB444BE:
942 c->chrToYV12 = rgb12beToUV_half_c;
947 case PIX_FMT_RGBA64BE:
948 c->chrToYV12 = rgb64BEToUV_c;
950 case PIX_FMT_RGBA64LE:
951 c->chrToYV12 = rgb64LEToUV_c;
953 case PIX_FMT_RGB48BE:
954 c->chrToYV12 = rgb48BEToUV_c;
956 case PIX_FMT_RGB48LE:
957 c->chrToYV12 = rgb48LEToUV_c;
959 case PIX_FMT_BGR48BE:
960 c->chrToYV12 = bgr48BEToUV_c;
962 case PIX_FMT_BGR48LE:
963 c->chrToYV12 = bgr48LEToUV_c;
966 c->chrToYV12 = bgr32ToUV_c;
968 case PIX_FMT_RGB32_1:
969 c->chrToYV12 = bgr321ToUV_c;
972 c->chrToYV12 = bgr24ToUV_c;
974 case PIX_FMT_BGR565LE:
975 c->chrToYV12 = bgr16leToUV_c;
977 case PIX_FMT_BGR565BE:
978 c->chrToYV12 = bgr16beToUV_c;
980 case PIX_FMT_BGR555LE:
981 c->chrToYV12 = bgr15leToUV_c;
983 case PIX_FMT_BGR555BE:
984 c->chrToYV12 = bgr15beToUV_c;
986 case PIX_FMT_BGR444LE:
987 c->chrToYV12 = bgr12leToUV_c;
989 case PIX_FMT_BGR444BE:
990 c->chrToYV12 = bgr12beToUV_c;
993 c->chrToYV12 = rgb32ToUV_c;
995 case PIX_FMT_BGR32_1:
996 c->chrToYV12 = rgb321ToUV_c;
999 c->chrToYV12 = rgb24ToUV_c;
1001 case PIX_FMT_RGB565LE:
1002 c->chrToYV12 = rgb16leToUV_c;
1004 case PIX_FMT_RGB565BE:
1005 c->chrToYV12 = rgb16beToUV_c;
1007 case PIX_FMT_RGB555LE:
1008 c->chrToYV12 = rgb15leToUV_c;
1010 case PIX_FMT_RGB555BE:
1011 c->chrToYV12 = rgb15beToUV_c;
1013 case PIX_FMT_RGB444LE:
1014 c->chrToYV12 = rgb12leToUV_c;
1016 case PIX_FMT_RGB444BE:
1017 c->chrToYV12 = rgb12beToUV_c;
1022 c->lumToYV12 = NULL;
1023 c->alpToYV12 = NULL;
1024 switch (srcFormat) {
1025 case PIX_FMT_GBRP9LE:
1026 c->readLumPlanar = planar_rgb9le_to_y;
1028 case PIX_FMT_GBRP10LE:
1029 c->readLumPlanar = planar_rgb10le_to_y;
1031 case PIX_FMT_GBRP16LE:
1032 c->readLumPlanar = planar_rgb16le_to_y;
1034 case PIX_FMT_GBRP9BE:
1035 c->readLumPlanar = planar_rgb9be_to_y;
1037 case PIX_FMT_GBRP10BE:
1038 c->readLumPlanar = planar_rgb10be_to_y;
1040 case PIX_FMT_GBRP16BE:
1041 c->readLumPlanar = planar_rgb16be_to_y;
1044 c->readLumPlanar = planar_rgb_to_y;
1047 case PIX_FMT_YUV444P9LE:
1048 case PIX_FMT_YUV422P9LE:
1049 case PIX_FMT_YUV420P9LE:
1050 case PIX_FMT_YUV444P10LE:
1051 case PIX_FMT_YUV422P10LE:
1052 case PIX_FMT_YUV420P10LE:
1053 case PIX_FMT_YUV420P16LE:
1054 case PIX_FMT_YUV422P16LE:
1055 case PIX_FMT_YUV444P16LE:
1056 case PIX_FMT_GRAY16LE:
1057 c->lumToYV12 = bswap16Y_c;
1060 case PIX_FMT_YUV444P9BE:
1061 case PIX_FMT_YUV422P9BE:
1062 case PIX_FMT_YUV420P9BE:
1063 case PIX_FMT_YUV444P10BE:
1064 case PIX_FMT_YUV422P10BE:
1065 case PIX_FMT_YUV420P10BE:
1066 case PIX_FMT_YUV420P16BE:
1067 case PIX_FMT_YUV422P16BE:
1068 case PIX_FMT_YUV444P16BE:
1069 case PIX_FMT_GRAY16BE:
1070 c->lumToYV12 = bswap16Y_c;
1073 case PIX_FMT_YUYV422:
1075 c->lumToYV12 = yuy2ToY_c;
1077 case PIX_FMT_UYVY422:
1078 c->lumToYV12 = uyvyToY_c;
1081 c->lumToYV12 = bgr24ToY_c;
1083 case PIX_FMT_BGR565LE:
1084 c->lumToYV12 = bgr16leToY_c;
1086 case PIX_FMT_BGR565BE:
1087 c->lumToYV12 = bgr16beToY_c;
1089 case PIX_FMT_BGR555LE:
1090 c->lumToYV12 = bgr15leToY_c;
1092 case PIX_FMT_BGR555BE:
1093 c->lumToYV12 = bgr15beToY_c;
1095 case PIX_FMT_BGR444LE:
1096 c->lumToYV12 = bgr12leToY_c;
1098 case PIX_FMT_BGR444BE:
1099 c->lumToYV12 = bgr12beToY_c;
1102 c->lumToYV12 = rgb24ToY_c;
1104 case PIX_FMT_RGB565LE:
1105 c->lumToYV12 = rgb16leToY_c;
1107 case PIX_FMT_RGB565BE:
1108 c->lumToYV12 = rgb16beToY_c;
1110 case PIX_FMT_RGB555LE:
1111 c->lumToYV12 = rgb15leToY_c;
1113 case PIX_FMT_RGB555BE:
1114 c->lumToYV12 = rgb15beToY_c;
1116 case PIX_FMT_RGB444LE:
1117 c->lumToYV12 = rgb12leToY_c;
1119 case PIX_FMT_RGB444BE:
1120 c->lumToYV12 = rgb12beToY_c;
1125 case PIX_FMT_BGR4_BYTE:
1126 case PIX_FMT_RGB4_BYTE:
1127 c->lumToYV12 = palToY_c;
1129 case PIX_FMT_MONOBLACK:
1130 c->lumToYV12 = monoblack2Y_c;
1132 case PIX_FMT_MONOWHITE:
1133 c->lumToYV12 = monowhite2Y_c;
1136 c->lumToYV12 = bgr32ToY_c;
1138 case PIX_FMT_RGB32_1:
1139 c->lumToYV12 = bgr321ToY_c;
1142 c->lumToYV12 = rgb32ToY_c;
1144 case PIX_FMT_BGR32_1:
1145 c->lumToYV12 = rgb321ToY_c;
1147 case PIX_FMT_RGB48BE:
1148 c->lumToYV12 = rgb48BEToY_c;
1150 case PIX_FMT_RGB48LE:
1151 c->lumToYV12 = rgb48LEToY_c;
1153 case PIX_FMT_BGR48BE:
1154 c->lumToYV12 = bgr48BEToY_c;
1156 case PIX_FMT_BGR48LE:
1157 c->lumToYV12 = bgr48LEToY_c;
1159 case PIX_FMT_RGBA64BE:
1160 c->lumToYV12 = rgb64BEToY_c;
1162 case PIX_FMT_RGBA64LE:
1163 c->lumToYV12 = rgb64LEToY_c;
1167 switch (srcFormat) {
1168 case PIX_FMT_RGBA64LE:
1169 case PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break;
1172 c->alpToYV12 = rgbaToA_c;
1176 c->alpToYV12 = abgrToA_c;
1179 c->alpToYV12 = uyvyToY_c;
1182 c->alpToYV12 = palToA_c;