2 * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/avutil.h"
28 #include "libavutil/bswap.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/pixdesc.h"
36 #include "swscale_internal.h"
38 #define RGB2YUV_SHIFT 15
39 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
40 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
49 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
51 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
52 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
54 static av_always_inline void
55 rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
56 enum PixelFormat origin)
59 for (i = 0; i < width; i++) {
60 unsigned int r_b = input_pixel(&src[i*4+0]);
61 unsigned int g = input_pixel(&src[i*4+1]);
62 unsigned int b_r = input_pixel(&src[i*4+2]);
64 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
68 static av_always_inline void
69 rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
70 const uint16_t *src1, const uint16_t *src2,
71 int width, enum PixelFormat origin)
75 for (i = 0; i < width; i++) {
76 int r_b = input_pixel(&src1[i*4+0]);
77 int g = input_pixel(&src1[i*4+1]);
78 int b_r = input_pixel(&src1[i*4+2]);
80 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
81 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
85 static av_always_inline void
86 rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
87 const uint16_t *src1, const uint16_t *src2,
88 int width, enum PixelFormat origin)
92 for (i = 0; i < width; i++) {
93 int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
94 int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
95 int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
97 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
98 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
102 #define rgb64funcs(pattern, BE_LE, origin) \
103 static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
104 int width, uint32_t *unused) \
106 const uint16_t *src = (const uint16_t *) _src; \
107 uint16_t *dst = (uint16_t *) _dst; \
108 rgb64ToY_c_template(dst, src, width, origin); \
111 static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
112 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
113 int width, uint32_t *unused) \
115 const uint16_t *src1 = (const uint16_t *) _src1, \
116 *src2 = (const uint16_t *) _src2; \
117 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
118 rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
121 static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
122 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
123 int width, uint32_t *unused) \
125 const uint16_t *src1 = (const uint16_t *) _src1, \
126 *src2 = (const uint16_t *) _src2; \
127 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
128 rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
131 rgb64funcs(rgb, LE, PIX_FMT_RGBA64LE)
132 rgb64funcs(rgb, BE, PIX_FMT_RGBA64BE)
134 static av_always_inline void
135 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
136 enum PixelFormat origin)
139 for (i = 0; i < width; i++) {
140 unsigned int r_b = input_pixel(&src[i*3+0]);
141 unsigned int g = input_pixel(&src[i*3+1]);
142 unsigned int b_r = input_pixel(&src[i*3+2]);
144 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
148 static av_always_inline void
149 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
150 const uint16_t *src1, const uint16_t *src2,
151 int width, enum PixelFormat origin)
155 for (i = 0; i < width; i++) {
156 int r_b = input_pixel(&src1[i*3+0]);
157 int g = input_pixel(&src1[i*3+1]);
158 int b_r = input_pixel(&src1[i*3+2]);
160 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
161 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
165 static av_always_inline void
166 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
167 const uint16_t *src1, const uint16_t *src2,
168 int width, enum PixelFormat origin)
172 for (i = 0; i < width; i++) {
173 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
174 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
175 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
177 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
178 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
186 #define rgb48funcs(pattern, BE_LE, origin) \
187 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
188 int width, uint32_t *unused) \
190 const uint16_t *src = (const uint16_t *) _src; \
191 uint16_t *dst = (uint16_t *) _dst; \
192 rgb48ToY_c_template(dst, src, width, origin); \
195 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
196 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
197 int width, uint32_t *unused) \
199 const uint16_t *src1 = (const uint16_t *) _src1, \
200 *src2 = (const uint16_t *) _src2; \
201 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
202 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
205 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
206 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
207 int width, uint32_t *unused) \
209 const uint16_t *src1 = (const uint16_t *) _src1, \
210 *src2 = (const uint16_t *) _src2; \
211 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
212 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
215 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
216 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
217 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
218 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
220 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
221 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
222 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
224 static av_always_inline void
225 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
226 int width, enum PixelFormat origin,
227 int shr, int shg, int shb, int shp,
228 int maskr, int maskg, int maskb,
229 int rsh, int gsh, int bsh, int S)
231 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
232 const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
235 for (i = 0; i < width; i++) {
236 int px = input_pixel(i) >> shp;
237 int b = (px & maskb) >> shb;
238 int g = (px & maskg) >> shg;
239 int r = (px & maskr) >> shr;
241 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
245 static av_always_inline void
246 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
247 const uint8_t *src, int width,
248 enum PixelFormat origin,
249 int shr, int shg, int shb, int shp,
250 int maskr, int maskg, int maskb,
251 int rsh, int gsh, int bsh, int S)
253 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
254 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
255 const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
258 for (i = 0; i < width; i++) {
259 int px = input_pixel(i) >> shp;
260 int b = (px & maskb) >> shb;
261 int g = (px & maskg) >> shg;
262 int r = (px & maskr) >> shr;
264 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
265 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
269 static av_always_inline void
270 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
271 const uint8_t *src, int width,
272 enum PixelFormat origin,
273 int shr, int shg, int shb, int shp,
274 int maskr, int maskg, int maskb,
275 int rsh, int gsh, int bsh, int S)
277 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
278 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
279 maskgx = ~(maskr | maskb);
280 const unsigned rnd = (256U<<(S)) + (1<<(S-6));
283 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
284 for (i = 0; i < width; i++) {
285 int px0 = input_pixel(2 * i + 0) >> shp;
286 int px1 = input_pixel(2 * i + 1) >> shp;
287 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
288 int rb = px0 + px1 - g;
290 b = (rb & maskb) >> shb;
291 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
292 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
295 g = (g & maskg) >> shg;
297 r = (rb & maskr) >> shr;
299 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
300 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
306 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
307 maskg, maskb, rsh, gsh, bsh, S) \
308 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
309 int width, uint32_t *unused) \
311 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
312 shr, shg, shb, shp, \
313 maskr, maskg, maskb, rsh, gsh, bsh, S); \
316 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
317 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
318 int width, uint32_t *unused) \
320 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
321 shr, shg, shb, shp, \
322 maskr, maskg, maskb, rsh, gsh, bsh, S); \
325 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
326 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
327 int width, uint32_t *unused) \
329 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
330 shr, shg, shb, shp, \
331 maskr, maskg, maskb, rsh, gsh, bsh, S); \
334 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
335 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
336 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
337 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
338 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
339 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
340 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
341 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
342 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
343 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
344 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
345 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
346 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
347 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
348 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
349 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
351 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
352 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
353 int width, enum PixelFormat origin)
356 for (i=0; i<width; i++) {
357 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
358 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
359 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
361 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
362 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
366 static void rgba64ToA_c(int16_t *dst, const uint16_t *src, const uint8_t *unused1,
367 const uint8_t *unused2, int width, uint32_t *unused)
370 for (i=0; i<width; i++) {
375 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
378 for (i=0; i<width; i++) {
383 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
386 for (i=0; i<width; i++) {
387 dst[i]= src[4*i+3]<<6;
391 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
394 for (i=0; i<width; i++) {
397 dst[i]= (pal[d] >> 24)<<6;
401 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
404 for (i=0; i<width; i++) {
407 dst[i]= (pal[d] & 0xFF)<<6;
411 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
412 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
413 int width, uint32_t *pal)
416 assert(src1 == src2);
417 for (i=0; i<width; i++) {
420 dstU[i]= (uint8_t)(p>> 8)<<6;
421 dstV[i]= (uint8_t)(p>>16)<<6;
425 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
428 width = (width + 7) >> 3;
429 for (i = 0; i < width; i++) {
432 dst[8*i+j]= ((d>>(7-j))&1)*16383;
436 for(j=0; j<(width&7); j++)
437 dst[8*i+j]= ((d>>(7-j))&1)*16383;
441 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
444 width = (width + 7) >> 3;
445 for (i = 0; i < width; i++) {
448 dst[8*i+j]= ((d>>(7-j))&1)*16383;
452 for(j=0; j<(width&7); j++)
453 dst[8*i+j]= ((d>>(7-j))&1)*16383;
457 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
461 for (i=0; i<width; i++)
465 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
466 const uint8_t *src2, int width, uint32_t *unused)
469 for (i=0; i<width; i++) {
470 dstU[i]= src1[4*i + 1];
471 dstV[i]= src1[4*i + 3];
473 assert(src1 == src2);
476 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
479 const uint16_t *src = (const uint16_t *) _src;
480 uint16_t *dst = (uint16_t *) _dst;
481 for (i=0; i<width; i++) {
482 dst[i] = av_bswap16(src[i]);
486 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
487 const uint8_t *_src2, int width, uint32_t *unused)
490 const uint16_t *src1 = (const uint16_t *) _src1,
491 *src2 = (const uint16_t *) _src2;
492 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
493 for (i=0; i<width; i++) {
494 dstU[i] = av_bswap16(src1[i]);
495 dstV[i] = av_bswap16(src2[i]);
499 /* This is almost identical to the previous, end exists only because
500 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
501 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
505 for (i=0; i<width; i++)
509 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
510 const uint8_t *src2, int width, uint32_t *unused)
513 for (i=0; i<width; i++) {
514 dstU[i]= src1[4*i + 0];
515 dstV[i]= src1[4*i + 2];
517 assert(src1 == src2);
520 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
521 const uint8_t *src, int width)
524 for (i = 0; i < width; i++) {
525 dst1[i] = src[2*i+0];
526 dst2[i] = src[2*i+1];
530 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
531 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
532 int width, uint32_t *unused)
534 nvXXtoUV_c(dstU, dstV, src1, width);
537 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
538 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
539 int width, uint32_t *unused)
541 nvXXtoUV_c(dstV, dstU, src1, width);
544 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
546 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
547 int width, uint32_t *unused)
550 for (i=0; i<width; i++) {
555 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
559 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
560 const uint8_t *src2, int width, uint32_t *unused)
563 for (i=0; i<width; i++) {
564 int b= src1[3*i + 0];
565 int g= src1[3*i + 1];
566 int r= src1[3*i + 2];
568 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
569 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
571 assert(src1 == src2);
574 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
575 const uint8_t *src2, int width, uint32_t *unused)
578 for (i=0; i<width; i++) {
579 int b= src1[6*i + 0] + src1[6*i + 3];
580 int g= src1[6*i + 1] + src1[6*i + 4];
581 int r= src1[6*i + 2] + src1[6*i + 5];
583 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
584 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
586 assert(src1 == src2);
589 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
593 for (i=0; i<width; i++) {
598 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
602 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
603 const uint8_t *src2, int width, uint32_t *unused)
607 for (i=0; i<width; i++) {
608 int r= src1[3*i + 0];
609 int g= src1[3*i + 1];
610 int b= src1[3*i + 2];
612 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
613 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
617 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
618 const uint8_t *src2, int width, uint32_t *unused)
622 for (i=0; i<width; i++) {
623 int r= src1[6*i + 0] + src1[6*i + 3];
624 int g= src1[6*i + 1] + src1[6*i + 4];
625 int b= src1[6*i + 2] + src1[6*i + 5];
627 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
628 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
632 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
635 for (i = 0; i < width; i++) {
640 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
644 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
647 const uint16_t **src = (const uint16_t **) _src;
648 uint16_t *dst = (uint16_t *) _dst;
649 for (i = 0; i < width; i++) {
650 int g = AV_RL16(src[0] + i);
651 int b = AV_RL16(src[1] + i);
652 int r = AV_RL16(src[2] + i);
654 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
658 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
661 const uint16_t **src = (const uint16_t **) _src;
662 uint16_t *dst = (uint16_t *) _dst;
663 for (i = 0; i < width; i++) {
664 int g = AV_RB16(src[0] + i);
665 int b = AV_RB16(src[1] + i);
666 int r = AV_RB16(src[2] + i);
668 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
672 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
675 for (i = 0; i < width; i++) {
680 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
681 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
685 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
688 const uint16_t **src = (const uint16_t **) _src;
689 uint16_t *dstU = (uint16_t *) _dstU;
690 uint16_t *dstV = (uint16_t *) _dstV;
691 for (i = 0; i < width; i++) {
692 int g = AV_RL16(src[0] + i);
693 int b = AV_RL16(src[1] + i);
694 int r = AV_RL16(src[2] + i);
696 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
697 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
701 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
704 const uint16_t **src = (const uint16_t **) _src;
705 uint16_t *dstU = (uint16_t *) _dstU;
706 uint16_t *dstV = (uint16_t *) _dstV;
707 for (i = 0; i < width; i++) {
708 int g = AV_RB16(src[0] + i);
709 int b = AV_RB16(src[1] + i);
710 int r = AV_RB16(src[2] + i);
712 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
713 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
717 av_cold void ff_sws_init_input_funcs(SwsContext *c)
719 enum PixelFormat srcFormat = c->srcFormat;
723 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
724 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
725 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
726 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
730 case PIX_FMT_BGR4_BYTE:
731 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
732 case PIX_FMT_GBRP9LE:
733 case PIX_FMT_GBRP10LE:
734 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
735 case PIX_FMT_GBRP9BE:
736 case PIX_FMT_GBRP10BE:
737 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
738 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
740 case PIX_FMT_YUV444P9LE:
741 case PIX_FMT_YUV422P9LE:
742 case PIX_FMT_YUV420P9LE:
743 case PIX_FMT_YUV422P10LE:
744 case PIX_FMT_YUV444P10LE:
745 case PIX_FMT_YUV420P10LE:
746 case PIX_FMT_YUV420P16LE:
747 case PIX_FMT_YUV422P16LE:
748 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
750 case PIX_FMT_YUV444P9BE:
751 case PIX_FMT_YUV422P9BE:
752 case PIX_FMT_YUV420P9BE:
753 case PIX_FMT_YUV444P10BE:
754 case PIX_FMT_YUV422P10BE:
755 case PIX_FMT_YUV420P10BE:
756 case PIX_FMT_YUV420P16BE:
757 case PIX_FMT_YUV422P16BE:
758 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
761 if (c->chrSrcHSubSample) {
763 case PIX_FMT_RGBA64BE: c->chrToYV12 = rgb64BEToUV_half_c; break;
764 case PIX_FMT_RGBA64LE: c->chrToYV12 = rgb64LEToUV_half_c; break;
765 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
766 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
767 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
768 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
769 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
770 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
771 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
772 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
773 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
774 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
775 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
776 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
777 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
778 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
779 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
780 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
781 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
782 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
783 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
784 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
785 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
786 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
787 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
791 case PIX_FMT_RGBA64BE: c->chrToYV12 = rgb64BEToUV_c; break;
792 case PIX_FMT_RGBA64LE: c->chrToYV12 = rgb64LEToUV_c; break;
793 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
794 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
795 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
796 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
797 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
798 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
799 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
800 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
801 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
802 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
803 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
804 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
805 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
806 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
807 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
808 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
809 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
810 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
811 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
812 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
813 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
814 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
821 case PIX_FMT_GBRP9LE:
822 case PIX_FMT_GBRP10LE:
823 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
824 case PIX_FMT_GBRP9BE:
825 case PIX_FMT_GBRP10BE:
826 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
827 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
829 case PIX_FMT_YUV444P9LE:
830 case PIX_FMT_YUV422P9LE:
831 case PIX_FMT_YUV420P9LE:
832 case PIX_FMT_YUV444P10LE:
833 case PIX_FMT_YUV422P10LE:
834 case PIX_FMT_YUV420P10LE:
835 case PIX_FMT_YUV420P16LE:
836 case PIX_FMT_YUV422P16LE:
837 case PIX_FMT_YUV444P16LE:
838 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
840 case PIX_FMT_YUV444P9BE:
841 case PIX_FMT_YUV422P9BE:
842 case PIX_FMT_YUV420P9BE:
843 case PIX_FMT_YUV444P10BE:
844 case PIX_FMT_YUV422P10BE:
845 case PIX_FMT_YUV420P10BE:
846 case PIX_FMT_YUV420P16BE:
847 case PIX_FMT_YUV422P16BE:
848 case PIX_FMT_YUV444P16BE:
849 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
851 case PIX_FMT_YUYV422 :
852 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
853 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
854 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
855 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
856 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
857 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
858 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
859 case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
860 case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
861 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
862 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
863 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
864 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
865 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
866 case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
867 case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
871 case PIX_FMT_BGR4_BYTE:
872 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
873 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
874 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
875 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
876 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
877 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
878 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
879 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
880 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
881 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
882 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
883 case PIX_FMT_RGBA64BE:c->lumToYV12 = rgb64BEToY_c; break;
884 case PIX_FMT_RGBA64LE:c->lumToYV12 = rgb64LEToY_c; break;
888 case PIX_FMT_RGBA64LE:
889 case PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break;
891 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
893 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
894 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
895 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;