2 * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/avutil.h"
28 #include "libavutil/bswap.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/pixdesc.h"
36 #include "swscale_internal.h"
38 #define RGB2YUV_SHIFT 15
39 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
40 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
49 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
51 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
52 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
54 static av_always_inline void
55 rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
56 enum PixelFormat origin)
59 for (i = 0; i < width; i++) {
60 unsigned int r_b = input_pixel(&src[i*4+0]);
61 unsigned int g = input_pixel(&src[i*4+1]);
62 unsigned int b_r = input_pixel(&src[i*4+2]);
64 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
68 static av_always_inline void
69 rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
70 const uint16_t *src1, const uint16_t *src2,
71 int width, enum PixelFormat origin)
75 for (i = 0; i < width; i++) {
76 int r_b = input_pixel(&src1[i*4+0]);
77 int g = input_pixel(&src1[i*4+1]);
78 int b_r = input_pixel(&src1[i*4+2]);
80 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
81 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
85 static av_always_inline void
86 rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
87 const uint16_t *src1, const uint16_t *src2,
88 int width, enum PixelFormat origin)
92 for (i = 0; i < width; i++) {
93 int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
94 int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
95 int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
97 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
98 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
102 #define rgb64funcs(pattern, BE_LE, origin) \
103 static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
104 int width, uint32_t *unused) \
106 const uint16_t *src = (const uint16_t *) _src; \
107 uint16_t *dst = (uint16_t *) _dst; \
108 rgb64ToY_c_template(dst, src, width, origin); \
111 static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
112 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
113 int width, uint32_t *unused) \
115 const uint16_t *src1 = (const uint16_t *) _src1, \
116 *src2 = (const uint16_t *) _src2; \
117 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
118 rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
121 static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
122 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
123 int width, uint32_t *unused) \
125 const uint16_t *src1 = (const uint16_t *) _src1, \
126 *src2 = (const uint16_t *) _src2; \
127 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
128 rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
131 rgb64funcs(rgb, LE, PIX_FMT_RGBA64LE)
132 rgb64funcs(rgb, BE, PIX_FMT_RGBA64BE)
134 static av_always_inline void
135 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
136 enum PixelFormat origin)
139 for (i = 0; i < width; i++) {
140 unsigned int r_b = input_pixel(&src[i*3+0]);
141 unsigned int g = input_pixel(&src[i*3+1]);
142 unsigned int b_r = input_pixel(&src[i*3+2]);
144 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
148 static av_always_inline void
149 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
150 const uint16_t *src1, const uint16_t *src2,
151 int width, enum PixelFormat origin)
155 for (i = 0; i < width; i++) {
156 int r_b = input_pixel(&src1[i*3+0]);
157 int g = input_pixel(&src1[i*3+1]);
158 int b_r = input_pixel(&src1[i*3+2]);
160 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
161 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
165 static av_always_inline void
166 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
167 const uint16_t *src1, const uint16_t *src2,
168 int width, enum PixelFormat origin)
172 for (i = 0; i < width; i++) {
173 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
174 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
175 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
177 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
178 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
186 #define rgb48funcs(pattern, BE_LE, origin) \
187 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
188 int width, uint32_t *unused) \
190 const uint16_t *src = (const uint16_t *) _src; \
191 uint16_t *dst = (uint16_t *) _dst; \
192 rgb48ToY_c_template(dst, src, width, origin); \
195 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
196 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
197 int width, uint32_t *unused) \
199 const uint16_t *src1 = (const uint16_t *) _src1, \
200 *src2 = (const uint16_t *) _src2; \
201 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
202 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
205 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
206 const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
207 int width, uint32_t *unused) \
209 const uint16_t *src1 = (const uint16_t *) _src1, \
210 *src2 = (const uint16_t *) _src2; \
211 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
212 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
215 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
216 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
217 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
218 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
220 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
221 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
222 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
224 static av_always_inline void
225 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
226 int width, enum PixelFormat origin,
227 int shr, int shg, int shb, int shp,
228 int maskr, int maskg, int maskb,
229 int rsh, int gsh, int bsh, int S)
231 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
232 const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
235 for (i = 0; i < width; i++) {
236 int px = input_pixel(i) >> shp;
237 int b = (px & maskb) >> shb;
238 int g = (px & maskg) >> shg;
239 int r = (px & maskr) >> shr;
241 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
245 static av_always_inline void
246 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
247 const uint8_t *src, int width,
248 enum PixelFormat origin,
249 int shr, int shg, int shb, int shp,
250 int maskr, int maskg, int maskb,
251 int rsh, int gsh, int bsh, int S)
253 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
254 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
255 const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
258 for (i = 0; i < width; i++) {
259 int px = input_pixel(i) >> shp;
260 int b = (px & maskb) >> shb;
261 int g = (px & maskg) >> shg;
262 int r = (px & maskr) >> shr;
264 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
265 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
269 static av_always_inline void
270 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
271 const uint8_t *src, int width,
272 enum PixelFormat origin,
273 int shr, int shg, int shb, int shp,
274 int maskr, int maskg, int maskb,
275 int rsh, int gsh, int bsh, int S)
277 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
278 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
279 maskgx = ~(maskr | maskb);
280 const unsigned rnd = (256U<<(S)) + (1<<(S-6));
283 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
284 for (i = 0; i < width; i++) {
285 int px0 = input_pixel(2 * i + 0) >> shp;
286 int px1 = input_pixel(2 * i + 1) >> shp;
287 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
288 int rb = px0 + px1 - g;
290 b = (rb & maskb) >> shb;
291 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
292 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
295 g = (g & maskg) >> shg;
297 r = (rb & maskr) >> shr;
299 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
300 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
306 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
307 maskg, maskb, rsh, gsh, bsh, S) \
308 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
309 int width, uint32_t *unused) \
311 rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
312 shr, shg, shb, shp, \
313 maskr, maskg, maskb, rsh, gsh, bsh, S); \
316 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
317 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
318 int width, uint32_t *unused) \
320 rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
321 shr, shg, shb, shp, \
322 maskr, maskg, maskb, rsh, gsh, bsh, S); \
325 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
326 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
327 int width, uint32_t *unused) \
329 rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
330 shr, shg, shb, shp, \
331 maskr, maskg, maskb, rsh, gsh, bsh, S); \
334 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
335 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
336 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
337 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
338 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
339 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
340 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
341 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
342 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
343 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
344 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
345 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
346 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
347 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
348 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
349 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
351 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
352 const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
353 int width, enum PixelFormat origin)
356 for (i=0; i<width; i++) {
357 unsigned int g = gsrc[2*i] + gsrc[2*i+1];
358 unsigned int b = bsrc[2*i] + bsrc[2*i+1];
359 unsigned int r = rsrc[2*i] + rsrc[2*i+1];
361 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
362 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
366 static void rgba64ToA_c(int16_t *dst, const uint16_t *src, const uint8_t *unused1,
367 const uint8_t *unused2, int width, uint32_t *unused)
370 for (i=0; i<width; i++) {
375 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
378 for (i=0; i<width; i++) {
383 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
386 for (i=0; i<width; i++) {
387 dst[i]= src[4*i+3]<<6;
391 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
394 for (i=0; i<width; i++) {
397 dst[i]= (pal[d] >> 24)<<6;
401 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
404 for (i=0; i<width; i++) {
407 dst[i]= (pal[d] & 0xFF)<<6;
411 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
412 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
413 int width, uint32_t *pal)
416 assert(src1 == src2);
417 for (i=0; i<width; i++) {
420 dstU[i]= (uint8_t)(p>> 8)<<6;
421 dstV[i]= (uint8_t)(p>>16)<<6;
425 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
428 for (i=0; i<width/8; i++) {
431 dst[8*i+j]= ((d>>(7-j))&1)*16383;
435 for(j=0; j<(width&7); j++)
436 dst[8*i+j]= ((d>>(7-j))&1)*16383;
440 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
443 for (i=0; i<width/8; i++) {
446 dst[8*i+j]= ((d>>(7-j))&1)*16383;
450 for(j=0; j<(width&7); j++)
451 dst[8*i+j]= ((d>>(7-j))&1)*16383;
455 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
459 for (i=0; i<width; i++)
463 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
464 const uint8_t *src2, int width, uint32_t *unused)
467 for (i=0; i<width; i++) {
468 dstU[i]= src1[4*i + 1];
469 dstV[i]= src1[4*i + 3];
471 assert(src1 == src2);
474 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
477 const uint16_t *src = (const uint16_t *) _src;
478 uint16_t *dst = (uint16_t *) _dst;
479 for (i=0; i<width; i++) {
480 dst[i] = av_bswap16(src[i]);
484 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
485 const uint8_t *_src2, int width, uint32_t *unused)
488 const uint16_t *src1 = (const uint16_t *) _src1,
489 *src2 = (const uint16_t *) _src2;
490 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
491 for (i=0; i<width; i++) {
492 dstU[i] = av_bswap16(src1[i]);
493 dstV[i] = av_bswap16(src2[i]);
497 /* This is almost identical to the previous, end exists only because
498 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
499 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
503 for (i=0; i<width; i++)
507 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
508 const uint8_t *src2, int width, uint32_t *unused)
511 for (i=0; i<width; i++) {
512 dstU[i]= src1[4*i + 0];
513 dstV[i]= src1[4*i + 2];
515 assert(src1 == src2);
518 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
519 const uint8_t *src, int width)
522 for (i = 0; i < width; i++) {
523 dst1[i] = src[2*i+0];
524 dst2[i] = src[2*i+1];
528 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
529 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
530 int width, uint32_t *unused)
532 nvXXtoUV_c(dstU, dstV, src1, width);
535 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
536 const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
537 int width, uint32_t *unused)
539 nvXXtoUV_c(dstV, dstU, src1, width);
542 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
544 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
545 int width, uint32_t *unused)
548 for (i=0; i<width; i++) {
553 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
557 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
558 const uint8_t *src2, int width, uint32_t *unused)
561 for (i=0; i<width; i++) {
562 int b= src1[3*i + 0];
563 int g= src1[3*i + 1];
564 int r= src1[3*i + 2];
566 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
567 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
569 assert(src1 == src2);
572 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
573 const uint8_t *src2, int width, uint32_t *unused)
576 for (i=0; i<width; i++) {
577 int b= src1[6*i + 0] + src1[6*i + 3];
578 int g= src1[6*i + 1] + src1[6*i + 4];
579 int r= src1[6*i + 2] + src1[6*i + 5];
581 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
582 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
584 assert(src1 == src2);
587 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
591 for (i=0; i<width; i++) {
596 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
600 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
601 const uint8_t *src2, int width, uint32_t *unused)
605 for (i=0; i<width; i++) {
606 int r= src1[3*i + 0];
607 int g= src1[3*i + 1];
608 int b= src1[3*i + 2];
610 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
611 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
615 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
616 const uint8_t *src2, int width, uint32_t *unused)
620 for (i=0; i<width; i++) {
621 int r= src1[6*i + 0] + src1[6*i + 3];
622 int g= src1[6*i + 1] + src1[6*i + 4];
623 int b= src1[6*i + 2] + src1[6*i + 5];
625 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
626 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
630 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
633 for (i = 0; i < width; i++) {
638 dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
642 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
645 const uint16_t **src = (const uint16_t **) _src;
646 uint16_t *dst = (uint16_t *) _dst;
647 for (i = 0; i < width; i++) {
648 int g = AV_RL16(src[0] + i);
649 int b = AV_RL16(src[1] + i);
650 int r = AV_RL16(src[2] + i);
652 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
656 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
659 const uint16_t **src = (const uint16_t **) _src;
660 uint16_t *dst = (uint16_t *) _dst;
661 for (i = 0; i < width; i++) {
662 int g = AV_RB16(src[0] + i);
663 int b = AV_RB16(src[1] + i);
664 int r = AV_RB16(src[2] + i);
666 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
670 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
673 for (i = 0; i < width; i++) {
678 dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
679 dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
683 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
686 const uint16_t **src = (const uint16_t **) _src;
687 uint16_t *dstU = (uint16_t *) _dstU;
688 uint16_t *dstV = (uint16_t *) _dstV;
689 for (i = 0; i < width; i++) {
690 int g = AV_RL16(src[0] + i);
691 int b = AV_RL16(src[1] + i);
692 int r = AV_RL16(src[2] + i);
694 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
695 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
699 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
702 const uint16_t **src = (const uint16_t **) _src;
703 uint16_t *dstU = (uint16_t *) _dstU;
704 uint16_t *dstV = (uint16_t *) _dstV;
705 for (i = 0; i < width; i++) {
706 int g = AV_RB16(src[0] + i);
707 int b = AV_RB16(src[1] + i);
708 int r = AV_RB16(src[2] + i);
710 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
711 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
715 av_cold void ff_sws_init_input_funcs(SwsContext *c)
717 enum PixelFormat srcFormat = c->srcFormat;
721 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
722 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
723 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
724 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
728 case PIX_FMT_BGR4_BYTE:
729 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
730 case PIX_FMT_GBRP9LE:
731 case PIX_FMT_GBRP10LE:
732 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
733 case PIX_FMT_GBRP9BE:
734 case PIX_FMT_GBRP10BE:
735 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
736 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
738 case PIX_FMT_YUV444P9LE:
739 case PIX_FMT_YUV422P9LE:
740 case PIX_FMT_YUV420P9LE:
741 case PIX_FMT_YUV422P10LE:
742 case PIX_FMT_YUV444P10LE:
743 case PIX_FMT_YUV420P10LE:
744 case PIX_FMT_YUV420P16LE:
745 case PIX_FMT_YUV422P16LE:
746 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
748 case PIX_FMT_YUV444P9BE:
749 case PIX_FMT_YUV422P9BE:
750 case PIX_FMT_YUV420P9BE:
751 case PIX_FMT_YUV444P10BE:
752 case PIX_FMT_YUV422P10BE:
753 case PIX_FMT_YUV420P10BE:
754 case PIX_FMT_YUV420P16BE:
755 case PIX_FMT_YUV422P16BE:
756 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
759 if (c->chrSrcHSubSample) {
761 case PIX_FMT_RGBA64BE: c->chrToYV12 = rgb64BEToUV_half_c; break;
762 case PIX_FMT_RGBA64LE: c->chrToYV12 = rgb64LEToUV_half_c; break;
763 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
764 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
765 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
766 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
767 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
768 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
769 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
770 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
771 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
772 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
773 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
774 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
775 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
776 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
777 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
778 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
779 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
780 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
781 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
782 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
783 case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
784 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
785 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
789 case PIX_FMT_RGBA64BE: c->chrToYV12 = rgb64BEToUV_c; break;
790 case PIX_FMT_RGBA64LE: c->chrToYV12 = rgb64LEToUV_c; break;
791 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
792 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
793 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
794 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
795 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
796 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
797 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
798 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
799 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
800 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
801 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
802 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
803 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
804 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
805 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
806 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
807 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
808 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
809 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
810 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
811 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
812 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
819 case PIX_FMT_GBRP9LE:
820 case PIX_FMT_GBRP10LE:
821 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
822 case PIX_FMT_GBRP9BE:
823 case PIX_FMT_GBRP10BE:
824 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
825 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
827 case PIX_FMT_YUV444P9LE:
828 case PIX_FMT_YUV422P9LE:
829 case PIX_FMT_YUV420P9LE:
830 case PIX_FMT_YUV444P10LE:
831 case PIX_FMT_YUV422P10LE:
832 case PIX_FMT_YUV420P10LE:
833 case PIX_FMT_YUV420P16LE:
834 case PIX_FMT_YUV422P16LE:
835 case PIX_FMT_YUV444P16LE:
836 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
838 case PIX_FMT_YUV444P9BE:
839 case PIX_FMT_YUV422P9BE:
840 case PIX_FMT_YUV420P9BE:
841 case PIX_FMT_YUV444P10BE:
842 case PIX_FMT_YUV422P10BE:
843 case PIX_FMT_YUV420P10BE:
844 case PIX_FMT_YUV420P16BE:
845 case PIX_FMT_YUV422P16BE:
846 case PIX_FMT_YUV444P16BE:
847 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
849 case PIX_FMT_YUYV422 :
850 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
851 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
852 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
853 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
854 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
855 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
856 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
857 case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
858 case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
859 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
860 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
861 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
862 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
863 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
864 case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
865 case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
869 case PIX_FMT_BGR4_BYTE:
870 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
871 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
872 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
873 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
874 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
875 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
876 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
877 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
878 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
879 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
880 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
881 case PIX_FMT_RGBA64BE:c->lumToYV12 = rgb64BEToY_c; break;
882 case PIX_FMT_RGBA64LE:c->lumToYV12 = rgb64LEToY_c; break;
886 case PIX_FMT_RGBA64LE:
887 case PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break;
889 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
891 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
892 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
893 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;