2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
22 const int16_t **lumSrc, int lumFilterSize,
23 const int16_t *chrFilter, const int16_t **chrUSrc,
24 const int16_t **chrVSrc,
25 int chrFilterSize, const int16_t **alpSrc,
26 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
27 uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
29 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
30 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
31 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
34 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
35 const int16_t **lumSrc, int lumFilterSize,
36 const int16_t *chrFilter, const int16_t **chrUSrc,
37 const int16_t **chrVSrc,
38 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
39 int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither)
41 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
42 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
43 dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither);
46 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
47 const int16_t *chrUSrc, const int16_t *chrVSrc,
48 const int16_t *alpSrc,
49 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
50 uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
54 for (i=0; i<dstW; i++) {
55 int val= (lumSrc[i]+lumDither[i&7])>>7;
56 dest[i]= av_clip_uint8(val);
60 for (i=0; i<chrDstW; i++) {
61 int u=(chrUSrc[i]+chrDither[i&7])>>7;
62 int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
63 uDest[i]= av_clip_uint8(u);
64 vDest[i]= av_clip_uint8(v);
67 if (CONFIG_SWSCALE_ALPHA && aDest)
68 for (i=0; i<dstW; i++) {
69 int val= (alpSrc[i]+lumDither[i&7])>>7;
70 aDest[i]= av_clip_uint8(val);
76 * vertical scale YV12 to RGB
78 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
79 const int16_t **lumSrc, int lumFilterSize,
80 const int16_t *chrFilter, const int16_t **chrUSrc,
81 const int16_t **chrVSrc,
82 int chrFilterSize, const int16_t **alpSrc,
83 uint8_t *dest, int dstW, int dstY)
85 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
86 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
87 alpSrc, dest, dstW, dstY);
91 * vertical bilinear scale YV12 to RGB
93 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
94 const uint16_t *buf1, const uint16_t *ubuf0,
95 const uint16_t *ubuf1, const uint16_t *vbuf0,
96 const uint16_t *vbuf1, const uint16_t *abuf0,
97 const uint16_t *abuf1, uint8_t *dest, int dstW,
98 int yalpha, int uvalpha, int y)
100 int yalpha1=4095- yalpha;
101 int uvalpha1=4095-uvalpha;
104 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
108 * YV12 to RGB without scaling or interpolating
110 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
111 const uint16_t *ubuf0, const uint16_t *ubuf1,
112 const uint16_t *vbuf0, const uint16_t *vbuf1,
113 const uint16_t *abuf0, uint8_t *dest, int dstW,
114 int uvalpha, enum PixelFormat dstFormat,
120 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
121 const int yalpha= 4096; //FIXME ...
123 if (uvalpha < 2048) {
124 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
126 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
130 //FIXME yuy2* can read up to 7 samples too much
132 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
136 for (i=0; i<width; i++)
140 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
141 const uint8_t *src2, int width, uint32_t *unused)
144 for (i=0; i<width; i++) {
145 dstU[i]= src1[4*i + 1];
146 dstV[i]= src1[4*i + 3];
148 assert(src1 == src2);
151 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
152 const uint8_t *src2, int width, uint32_t *unused)
155 // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
156 // we need to skip each second pixel. Same for BEToUV.
157 for (i=0; i<width; i++) {
158 dstU[i]= src1[2*i + 1];
159 dstV[i]= src2[2*i + 1];
163 /* This is almost identical to the previous, end exists only because
164 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
165 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
169 for (i=0; i<width; i++)
173 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
174 const uint8_t *src2, int width, uint32_t *unused)
177 for (i=0; i<width; i++) {
178 dstU[i]= src1[4*i + 0];
179 dstV[i]= src1[4*i + 2];
181 assert(src1 == src2);
184 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
185 const uint8_t *src2, int width, uint32_t *unused)
188 for (i=0; i<width; i++) {
194 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
195 const uint8_t *src, int width)
198 for (i = 0; i < width; i++) {
199 dst1[i] = src[2*i+0];
200 dst2[i] = src[2*i+1];
204 // FIXME Maybe dither instead.
206 #define YUV_NBPS(depth, endianness, rfunc) \
207 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
208 const uint16_t *srcU, const uint16_t *srcV, \
209 int width, uint32_t *unused) \
212 for (i = 0; i < width; i++) { \
213 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
214 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
218 static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \
221 for (i = 0; i < width; i++) \
222 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
225 YUV_NBPS( 9, LE, AV_RL16)
226 YUV_NBPS( 9, BE, AV_RB16)
227 YUV_NBPS(10, LE, AV_RL16)
228 YUV_NBPS(10, BE, AV_RB16)
231 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
232 const uint8_t *src1, const uint8_t *src2,
233 int width, uint32_t *unused)
235 nvXXtoUV_c(dstU, dstV, src1, width);
238 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
239 const uint8_t *src1, const uint8_t *src2,
240 int width, uint32_t *unused)
242 nvXXtoUV_c(dstV, dstU, src1, width);
245 // bilinear / bicubic scaling
246 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
248 const int16_t *filter, const int16_t *filterPos,
252 for (i=0; i<dstW; i++) {
254 int srcPos= filterPos[i];
256 for (j=0; j<filterSize; j++) {
257 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
259 //filter += hFilterSize;
260 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
266 #define DEBUG_SWSCALE_BUFFERS 0
267 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
270 static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
271 int lastInLumBuf, int lastInChrBuf);
274 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
275 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
277 /* load a few things into local vars to make the code more readable? and faster */
278 const int srcW= c->srcW;
279 const int dstW= c->dstW;
280 const int dstH= c->dstH;
281 const int chrDstW= c->chrDstW;
282 const int chrSrcW= c->chrSrcW;
283 const int lumXInc= c->lumXInc;
284 const int chrXInc= c->chrXInc;
285 const enum PixelFormat dstFormat= c->dstFormat;
286 const int flags= c->flags;
287 int16_t *vLumFilterPos= c->vLumFilterPos;
288 int16_t *vChrFilterPos= c->vChrFilterPos;
289 int16_t *hLumFilterPos= c->hLumFilterPos;
290 int16_t *hChrFilterPos= c->hChrFilterPos;
291 int16_t *vLumFilter= c->vLumFilter;
292 int16_t *vChrFilter= c->vChrFilter;
293 int16_t *hLumFilter= c->hLumFilter;
294 int16_t *hChrFilter= c->hChrFilter;
295 int32_t *lumMmxFilter= c->lumMmxFilter;
296 int32_t *chrMmxFilter= c->chrMmxFilter;
297 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
298 const int vLumFilterSize= c->vLumFilterSize;
299 const int vChrFilterSize= c->vChrFilterSize;
300 const int hLumFilterSize= c->hLumFilterSize;
301 const int hChrFilterSize= c->hChrFilterSize;
302 int16_t **lumPixBuf= c->lumPixBuf;
303 int16_t **chrUPixBuf= c->chrUPixBuf;
304 int16_t **chrVPixBuf= c->chrVPixBuf;
305 int16_t **alpPixBuf= c->alpPixBuf;
306 const int vLumBufSize= c->vLumBufSize;
307 const int vChrBufSize= c->vChrBufSize;
308 uint8_t *formatConvBuffer= c->formatConvBuffer;
309 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
310 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
312 uint32_t *pal=c->pal_yuv;
314 /* vars which will change and which we need to store back in the context */
316 int lumBufIndex= c->lumBufIndex;
317 int chrBufIndex= c->chrBufIndex;
318 int lastInLumBuf= c->lastInLumBuf;
319 int lastInChrBuf= c->lastInChrBuf;
321 if (isPacked(c->srcFormat)) {
329 srcStride[3]= srcStride[0];
331 srcStride[1]<<= c->vChrDrop;
332 srcStride[2]<<= c->vChrDrop;
334 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
335 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
336 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
337 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
338 srcSliceY, srcSliceH, dstY, dstH);
339 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
340 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
342 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
343 static int warnedAlready=0; //FIXME move this into the context perhaps
344 if (flags & SWS_PRINT_INFO && !warnedAlready) {
345 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
346 " ->cannot do aligned memory accesses anymore\n");
351 /* Note the user might start scaling the picture in the middle so this
352 will not get executed. This is not really intended but works
353 currently, so people might do it. */
364 for (;dstY < dstH; dstY++) {
365 unsigned char *dest =dst[0]+dstStride[0]*dstY;
366 const int chrDstY= dstY>>c->chrDstVSubSample;
367 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
368 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
369 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
370 const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY &7] : flat64;
371 const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64;
373 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
374 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
375 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
376 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
377 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
378 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
381 //handle holes (FAST_BILINEAR & weird filters)
382 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
383 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
384 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
385 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
387 DEBUG_BUFFERS("dstY: %d\n", dstY);
388 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
389 firstLumSrcY, lastLumSrcY, lastInLumBuf);
390 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
391 firstChrSrcY, lastChrSrcY, lastInChrBuf);
393 // Do we have enough lines in this slice to output the dstY line
394 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
397 lastLumSrcY = srcSliceY + srcSliceH - 1;
398 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
399 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
400 lastLumSrcY, lastChrSrcY);
403 //Do horizontal scaling
404 while(lastInLumBuf < lastLumSrcY) {
405 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
406 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
408 assert(lumBufIndex < 2*vLumBufSize);
409 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
410 assert(lastInLumBuf + 1 - srcSliceY >= 0);
411 hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
412 hLumFilter, hLumFilterPos, hLumFilterSize,
415 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
416 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
417 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
421 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
422 lumBufIndex, lastInLumBuf);
424 while(lastInChrBuf < lastChrSrcY) {
425 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
426 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
428 assert(chrBufIndex < 2*vChrBufSize);
429 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
430 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
431 //FIXME replace parameters through context struct (some at least)
433 if (c->needs_hcscale)
434 hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
435 chrDstW, src1, src2, chrSrcW, chrXInc,
436 hChrFilter, hChrFilterPos, hChrFilterSize,
437 formatConvBuffer, pal);
439 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
440 chrBufIndex, lastInChrBuf);
442 //wrap buf index around to stay inside the ring buffer
443 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
444 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
446 break; //we can't output a dstY line so let's try with the next slice
449 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
452 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
453 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
454 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
455 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
456 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
457 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
458 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
460 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
461 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
462 dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
463 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
464 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
465 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
466 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
467 yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
468 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
469 chrVSrcPtr, vChrFilterSize,
470 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
471 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
473 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
474 const int16_t *lumBuf = lumSrcPtr[0];
475 const int16_t *chrUBuf= chrUSrcPtr[0];
476 const int16_t *chrVBuf= chrVSrcPtr[0];
477 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
478 c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
479 uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
480 } else { //General YV12
482 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
483 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
484 chrVSrcPtr, vChrFilterSize,
485 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
488 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
489 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
490 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
491 int chrAlpha= vChrFilter[2*dstY+1];
492 if(flags & SWS_FULL_CHR_H_INT) {
493 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
494 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
495 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
496 chrVSrcPtr, vChrFilterSize,
497 alpSrcPtr, dest, dstW, dstY);
499 c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
500 *chrVSrcPtr, *(chrVSrcPtr+1),
501 alpPixBuf ? *alpSrcPtr : NULL,
502 dest, dstW, chrAlpha, dstFormat, flags, dstY);
504 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
505 int lumAlpha= vLumFilter[2*dstY+1];
506 int chrAlpha= vChrFilter[2*dstY+1];
508 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
510 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
511 if(flags & SWS_FULL_CHR_H_INT) {
512 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
513 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
514 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
515 alpSrcPtr, dest, dstW, dstY);
517 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
518 *chrVSrcPtr, *(chrVSrcPtr+1),
519 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
520 dest, dstW, lumAlpha, chrAlpha, dstY);
522 } else { //general RGB
523 if(flags & SWS_FULL_CHR_H_INT) {
525 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
526 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
527 alpSrcPtr, dest, dstW, dstY);
530 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
531 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
532 alpSrcPtr, dest, dstW, dstY);
536 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
537 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
538 const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
539 const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
540 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
541 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
542 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
543 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
545 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
546 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
547 dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
548 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
549 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
550 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
551 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
553 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
554 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
555 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
559 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
560 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
561 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
564 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
565 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
566 if(flags & SWS_FULL_CHR_H_INT) {
568 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
569 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
570 alpSrcPtr, dest, dstW, dstY);
573 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
574 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
575 alpSrcPtr, dest, dstW, dstY);
581 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
582 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
585 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
586 __asm__ volatile("sfence":::"memory");
590 /* store changed local vars back in the context */
592 c->lumBufIndex= lumBufIndex;
593 c->chrBufIndex= chrBufIndex;
594 c->lastInLumBuf= lastInLumBuf;
595 c->lastInChrBuf= lastInChrBuf;
597 return dstY - lastDstY;
600 static void sws_init_swScale_c(SwsContext *c)
602 enum PixelFormat srcFormat = c->srcFormat;
604 c->yuv2nv12X = yuv2nv12X_c;
605 c->yuv2yuv1 = yuv2yuv1_c;
606 c->yuv2yuvX = yuv2yuvX_c;
607 c->yuv2packed1 = yuv2packed1_c;
608 c->yuv2packed2 = yuv2packed2_c;
609 c->yuv2packedX = yuv2packedX_c;
611 c->hScale = hScale_c;
613 if (c->flags & SWS_FAST_BILINEAR)
615 c->hyscale_fast = hyscale_fast_c;
616 c->hcscale_fast = hcscale_fast_c;
621 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
622 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
623 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
624 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
628 case PIX_FMT_BGR4_BYTE:
629 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
630 case PIX_FMT_GRAY16BE :
631 case PIX_FMT_YUV420P9BE:
632 case PIX_FMT_YUV422P10BE:
633 case PIX_FMT_YUV420P10BE:
634 case PIX_FMT_YUV420P16BE:
635 case PIX_FMT_YUV422P16BE:
636 case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
637 case PIX_FMT_GRAY16LE :
638 case PIX_FMT_YUV420P9LE:
639 case PIX_FMT_YUV422P10LE:
640 case PIX_FMT_YUV420P10LE:
641 case PIX_FMT_YUV420P16LE:
642 case PIX_FMT_YUV422P16LE:
643 case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
645 if (c->chrSrcHSubSample) {
647 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half; break;
648 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half; break;
649 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half; break;
650 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half; break;
651 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break;
652 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
653 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
654 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
655 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
656 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break;
657 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
658 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
659 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
660 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
664 case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV; break;
665 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV; break;
666 case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV; break;
667 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV; break;
668 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break;
669 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
670 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
671 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
672 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
673 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break;
674 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
675 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
676 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
677 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
684 case PIX_FMT_YUYV422 :
685 case PIX_FMT_GRAY8A :
686 c->lumToYV12 = yuy2ToY_c; break;
687 case PIX_FMT_UYVY422 :
688 c->lumToYV12 = uyvyToY_c; break;
689 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
690 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
691 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
692 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
693 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
694 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
698 case PIX_FMT_BGR4_BYTE:
699 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
700 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
701 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
702 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break;
703 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
704 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break;
705 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
706 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY; break;
707 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY; break;
708 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY; break;
709 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY; break;
714 case PIX_FMT_RGB32_1:
716 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
717 case PIX_FMT_GRAY8A : c->alpToYV12 = yuy2ToY_c; break;
718 case PIX_FMT_PAL8 : c->alpToYV12 = palToA; break;
722 if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
723 c->hScale16= hScale16_c;
726 case PIX_FMT_GRAY8A :
735 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
737 c->lumConvertRange = lumRangeFromJpeg_c;
738 c->chrConvertRange = chrRangeFromJpeg_c;
740 c->lumConvertRange = lumRangeToJpeg_c;
741 c->chrConvertRange = chrRangeToJpeg_c;
745 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
746 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
747 c->needs_hcscale = 1;