2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
22 const int16_t **lumSrc, int lumFilterSize,
23 const int16_t *chrFilter, const int16_t **chrSrc,
24 int chrFilterSize, const int16_t **alpSrc,
25 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
26 uint8_t *aDest, long dstW, long chrDstW)
28 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
29 chrFilter, chrSrc, chrFilterSize,
30 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
33 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
34 const int16_t **lumSrc, int lumFilterSize,
35 const int16_t *chrFilter, const int16_t **chrSrc,
36 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
37 int dstW, int chrDstW, enum PixelFormat dstFormat)
39 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
40 chrFilter, chrSrc, chrFilterSize,
41 dest, uDest, dstW, chrDstW, dstFormat);
44 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
45 const int16_t *chrSrc, const int16_t *alpSrc,
46 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
47 uint8_t *aDest, long dstW, long chrDstW)
50 for (i=0; i<dstW; i++) {
51 int val= (lumSrc[i]+64)>>7;
62 for (i=0; i<chrDstW; i++) {
63 int u=(chrSrc[i ]+64)>>7;
64 int v=(chrSrc[i + VOFW]+64)>>7;
68 else if (u>255) u=255;
70 else if (v>255) v=255;
77 if (CONFIG_SWSCALE_ALPHA && aDest)
78 for (i=0; i<dstW; i++) {
79 int val= (alpSrc[i]+64)>>7;
80 aDest[i]= av_clip_uint8(val);
86 * vertical scale YV12 to RGB
88 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
89 const int16_t **lumSrc, int lumFilterSize,
90 const int16_t *chrFilter, const int16_t **chrSrc,
91 int chrFilterSize, const int16_t **alpSrc,
92 uint8_t *dest, long dstW, long dstY)
94 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
95 chrFilter, chrSrc, chrFilterSize,
96 alpSrc, dest, dstW, dstY);
100 * vertical bilinear scale YV12 to RGB
102 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
103 const uint16_t *buf1, const uint16_t *uvbuf0,
104 const uint16_t *uvbuf1, const uint16_t *abuf0,
105 const uint16_t *abuf1, uint8_t *dest, int dstW,
106 int yalpha, int uvalpha, int y)
108 int yalpha1=4095- yalpha;
109 int uvalpha1=4095-uvalpha;
112 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
116 * YV12 to RGB without scaling or interpolating
118 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
119 const uint16_t *uvbuf0, const uint16_t *uvbuf1,
120 const uint16_t *abuf0, uint8_t *dest, int dstW,
121 int uvalpha, enum PixelFormat dstFormat,
127 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
128 const int yalpha= 4096; //FIXME ...
130 if (flags&SWS_FULL_CHR_H_INT) {
131 c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
135 if (uvalpha < 2048) {
136 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
138 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
142 //FIXME yuy2* can read up to 7 samples too much
144 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
148 for (i=0; i<width; i++)
152 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
153 const uint8_t *src2, long width, uint32_t *unused)
156 for (i=0; i<width; i++) {
157 dstU[i]= src1[4*i + 1];
158 dstV[i]= src1[4*i + 3];
160 assert(src1 == src2);
163 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
164 const uint8_t *src2, long width, uint32_t *unused)
167 for (i=0; i<width; i++) {
168 dstU[i]= src1[2*i + 1];
169 dstV[i]= src2[2*i + 1];
173 /* This is almost identical to the previous, end exists only because
174 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
175 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
179 for (i=0; i<width; i++)
183 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
184 const uint8_t *src2, long width, uint32_t *unused)
187 for (i=0; i<width; i++) {
188 dstU[i]= src1[4*i + 0];
189 dstV[i]= src1[4*i + 2];
191 assert(src1 == src2);
194 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
195 const uint8_t *src2, long width, uint32_t *unused)
198 for (i=0; i<width; i++) {
204 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
205 const uint8_t *src, long width)
208 for (i = 0; i < width; i++) {
209 dst1[i] = src[2*i+0];
210 dst2[i] = src[2*i+1];
214 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
215 const uint8_t *src1, const uint8_t *src2,
216 long width, uint32_t *unused)
218 nvXXtoUV_c(dstU, dstV, src1, width);
221 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
222 const uint8_t *src1, const uint8_t *src2,
223 long width, uint32_t *unused)
225 nvXXtoUV_c(dstV, dstU, src1, width);
228 static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
229 long width, uint32_t *unused)
232 for (i=0; i<width; i++) {
237 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
241 static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
242 const uint8_t *src2, long width, uint32_t *unused)
245 for (i=0; i<width; i++) {
246 int b= src1[3*i + 0];
247 int g= src1[3*i + 1];
248 int r= src1[3*i + 2];
250 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
251 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
253 assert(src1 == src2);
256 static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
257 const uint8_t *src2, long width, uint32_t *unused)
260 for (i=0; i<width; i++) {
261 int b= src1[6*i + 0] + src1[6*i + 3];
262 int g= src1[6*i + 1] + src1[6*i + 4];
263 int r= src1[6*i + 2] + src1[6*i + 5];
265 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
266 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
268 assert(src1 == src2);
271 static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
275 for (i=0; i<width; i++) {
280 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
284 static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
285 const uint8_t *src2, long width, uint32_t *unused)
289 for (i=0; i<width; i++) {
290 int r= src1[3*i + 0];
291 int g= src1[3*i + 1];
292 int b= src1[3*i + 2];
294 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
295 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
299 static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
300 const uint8_t *src2, long width, uint32_t *unused)
304 for (i=0; i<width; i++) {
305 int r= src1[6*i + 0] + src1[6*i + 3];
306 int g= src1[6*i + 1] + src1[6*i + 4];
307 int b= src1[6*i + 2] + src1[6*i + 5];
309 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
310 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
315 // bilinear / bicubic scaling
316 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
318 const int16_t *filter, const int16_t *filterPos,
322 for (i=0; i<dstW; i++) {
324 int srcPos= filterPos[i];
326 //printf("filterPos: %d\n", filterPos[i]);
327 for (j=0; j<filterSize; j++) {
328 //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
329 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
331 //filter += hFilterSize;
332 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
337 //FIXME all pal and rgb srcFormats could do this convertion as well
338 //FIXME all scalers more complex than bilinear could do half of this transform
339 static void chrRangeToJpeg_c(uint16_t *dst, int width)
342 for (i = 0; i < width; i++) {
343 dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
344 dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
347 static void chrRangeFromJpeg_c(uint16_t *dst, int width)
350 for (i = 0; i < width; i++) {
351 dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469
352 dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
355 static void lumRangeToJpeg_c(uint16_t *dst, int width)
358 for (i = 0; i < width; i++)
359 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
361 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
364 for (i = 0; i < width; i++)
365 dst[i] = (dst[i]*14071 + 33561947)>>14;
368 static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
369 const uint8_t *src, int srcW, int xInc)
373 for (i=0;i<dstWidth;i++) {
374 register unsigned int xx=xpos>>16;
375 register unsigned int xalpha=(xpos&0xFFFF)>>9;
376 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
381 // *** horizontal scale Y line to temp buffer
382 static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
383 const uint8_t *src, int srcW, int xInc,
384 const int16_t *hLumFilter,
385 const int16_t *hLumFilterPos, int hLumFilterSize,
386 uint8_t *formatConvBuffer,
387 uint32_t *pal, int isAlpha)
389 void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
390 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
392 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
395 toYV12(formatConvBuffer, src, srcW, pal);
396 src= formatConvBuffer;
399 if (!c->hyscale_fast) {
400 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
401 } else { // fast bilinear upscale / crap downscale
402 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
406 convertRange(dst, dstWidth);
409 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
410 long dstWidth, const uint8_t *src1,
411 const uint8_t *src2, int srcW, int xInc)
415 for (i=0;i<dstWidth;i++) {
416 register unsigned int xx=xpos>>16;
417 register unsigned int xalpha=(xpos&0xFFFF)>>9;
418 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
419 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
421 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
422 dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
428 inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
429 const uint8_t *src1, const uint8_t *src2,
430 int srcW, int xInc, const int16_t *hChrFilter,
431 const int16_t *hChrFilterPos, int hChrFilterSize,
432 uint8_t *formatConvBuffer, uint32_t *pal)
435 src1 += c->chrSrcOffset;
436 src2 += c->chrSrcOffset;
439 c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
440 src1= formatConvBuffer;
441 src2= formatConvBuffer+VOFW;
444 if (!c->hcscale_fast) {
445 c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
446 c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
447 } else { // fast bilinear upscale / crap downscale
448 c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
451 if (c->chrConvertRange)
452 c->chrConvertRange(dst, dstWidth);
455 #define DEBUG_SWSCALE_BUFFERS 0
456 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
458 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
459 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
461 /* load a few things into local vars to make the code more readable? and faster */
462 const int srcW= c->srcW;
463 const int dstW= c->dstW;
464 const int dstH= c->dstH;
465 const int chrDstW= c->chrDstW;
466 const int chrSrcW= c->chrSrcW;
467 const int lumXInc= c->lumXInc;
468 const int chrXInc= c->chrXInc;
469 const enum PixelFormat dstFormat= c->dstFormat;
470 const int flags= c->flags;
471 int16_t *vLumFilterPos= c->vLumFilterPos;
472 int16_t *vChrFilterPos= c->vChrFilterPos;
473 int16_t *hLumFilterPos= c->hLumFilterPos;
474 int16_t *hChrFilterPos= c->hChrFilterPos;
475 int16_t *vLumFilter= c->vLumFilter;
476 int16_t *vChrFilter= c->vChrFilter;
477 int16_t *hLumFilter= c->hLumFilter;
478 int16_t *hChrFilter= c->hChrFilter;
479 int32_t *lumMmxFilter= c->lumMmxFilter;
480 int32_t *chrMmxFilter= c->chrMmxFilter;
481 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
482 const int vLumFilterSize= c->vLumFilterSize;
483 const int vChrFilterSize= c->vChrFilterSize;
484 const int hLumFilterSize= c->hLumFilterSize;
485 const int hChrFilterSize= c->hChrFilterSize;
486 int16_t **lumPixBuf= c->lumPixBuf;
487 int16_t **chrPixBuf= c->chrPixBuf;
488 int16_t **alpPixBuf= c->alpPixBuf;
489 const int vLumBufSize= c->vLumBufSize;
490 const int vChrBufSize= c->vChrBufSize;
491 uint8_t *formatConvBuffer= c->formatConvBuffer;
492 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
493 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
495 uint32_t *pal=c->pal_yuv;
497 /* vars which will change and which we need to store back in the context */
499 int lumBufIndex= c->lumBufIndex;
500 int chrBufIndex= c->chrBufIndex;
501 int lastInLumBuf= c->lastInLumBuf;
502 int lastInChrBuf= c->lastInChrBuf;
504 if (isPacked(c->srcFormat)) {
512 srcStride[3]= srcStride[0];
514 srcStride[1]<<= c->vChrDrop;
515 srcStride[2]<<= c->vChrDrop;
517 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
518 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
519 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
520 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
521 srcSliceY, srcSliceH, dstY, dstH);
522 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
523 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
525 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
526 static int warnedAlready=0; //FIXME move this into the context perhaps
527 if (flags & SWS_PRINT_INFO && !warnedAlready) {
528 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
529 " ->cannot do aligned memory accesses anymore\n");
534 /* Note the user might start scaling the picture in the middle so this
535 will not get executed. This is not really intended but works
536 currently, so people might do it. */
547 for (;dstY < dstH; dstY++) {
548 unsigned char *dest =dst[0]+dstStride[0]*dstY;
549 const int chrDstY= dstY>>c->chrDstVSubSample;
550 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
551 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
552 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
554 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
555 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
556 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
557 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
558 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
559 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
562 //handle holes (FAST_BILINEAR & weird filters)
563 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
564 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
565 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
566 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
568 DEBUG_BUFFERS("dstY: %d\n", dstY);
569 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
570 firstLumSrcY, lastLumSrcY, lastInLumBuf);
571 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
572 firstChrSrcY, lastChrSrcY, lastInChrBuf);
574 // Do we have enough lines in this slice to output the dstY line
575 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
578 lastLumSrcY = srcSliceY + srcSliceH - 1;
579 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
580 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
581 lastLumSrcY, lastChrSrcY);
584 //Do horizontal scaling
585 while(lastInLumBuf < lastLumSrcY) {
586 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
587 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
589 assert(lumBufIndex < 2*vLumBufSize);
590 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
591 assert(lastInLumBuf + 1 - srcSliceY >= 0);
592 hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
593 hLumFilter, hLumFilterPos, hLumFilterSize,
596 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
597 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
598 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
602 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
603 lumBufIndex, lastInLumBuf);
605 while(lastInChrBuf < lastChrSrcY) {
606 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
607 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
609 assert(chrBufIndex < 2*vChrBufSize);
610 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
611 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
612 //FIXME replace parameters through context struct (some at least)
614 if (c->needs_hcscale)
615 hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
616 hChrFilter, hChrFilterPos, hChrFilterSize,
620 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
621 chrBufIndex, lastInChrBuf);
623 //wrap buf index around to stay inside the ring buffer
624 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
625 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
627 break; //we can't output a dstY line so let's try with the next slice
630 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
631 const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
632 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
633 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
634 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
635 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
637 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
638 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
639 dest, uDest, dstW, chrDstW, dstFormat);
640 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
641 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
642 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
643 if (is16BPS(dstFormat)) {
645 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
646 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
647 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
649 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
650 const int16_t *lumBuf = lumSrcPtr[0];
651 const int16_t *chrBuf= chrSrcPtr[0];
652 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
653 c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
654 } else { //General YV12
656 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
657 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
658 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
661 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
662 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
663 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
664 int chrAlpha= vChrFilter[2*dstY+1];
665 if(flags & SWS_FULL_CHR_H_INT) {
666 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
667 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
668 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
669 alpSrcPtr, dest, dstW, dstY);
671 c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
672 alpPixBuf ? *alpSrcPtr : NULL,
673 dest, dstW, chrAlpha, dstFormat, flags, dstY);
675 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
676 int lumAlpha= vLumFilter[2*dstY+1];
677 int chrAlpha= vChrFilter[2*dstY+1];
679 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
681 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
682 if(flags & SWS_FULL_CHR_H_INT) {
683 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
684 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
685 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
686 alpSrcPtr, dest, dstW, dstY);
688 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
689 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
690 dest, dstW, lumAlpha, chrAlpha, dstY);
692 } else { //general RGB
693 if(flags & SWS_FULL_CHR_H_INT) {
695 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
696 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
697 alpSrcPtr, dest, dstW, dstY);
700 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
701 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
702 alpSrcPtr, dest, dstW, dstY);
706 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
707 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
708 const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
709 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
710 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
711 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
712 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
714 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
715 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
716 dest, uDest, dstW, chrDstW, dstFormat);
717 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
718 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
719 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
720 if (is16BPS(dstFormat)) {
722 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
723 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
724 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
728 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
729 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
730 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
733 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
734 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
735 if(flags & SWS_FULL_CHR_H_INT) {
737 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
738 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
739 alpSrcPtr, dest, dstW, dstY);
742 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
743 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
744 alpSrcPtr, dest, dstW, dstY);
750 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
751 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
753 /* store changed local vars back in the context */
755 c->lumBufIndex= lumBufIndex;
756 c->chrBufIndex= chrBufIndex;
757 c->lastInLumBuf= lastInLumBuf;
758 c->lastInChrBuf= lastInChrBuf;
760 return dstY - lastDstY;
763 static void sws_init_swScale_c(SwsContext *c)
765 enum PixelFormat srcFormat = c->srcFormat;
767 c->yuv2nv12X = yuv2nv12X_c;
768 c->yuv2yuv1 = yuv2yuv1_c;
769 c->yuv2yuvX = yuv2yuvX_c;
770 c->yuv2packed1 = yuv2packed1_c;
771 c->yuv2packed2 = yuv2packed2_c;
772 c->yuv2packedX = yuv2packedX_c;
774 c->hScale = hScale_c;
776 if (c->flags & SWS_FAST_BILINEAR)
778 c->hyscale_fast = hyscale_fast_c;
779 c->hcscale_fast = hcscale_fast_c;
784 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
785 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
786 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
787 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
791 case PIX_FMT_BGR4_BYTE:
792 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
793 case PIX_FMT_YUV420P16BE:
794 case PIX_FMT_YUV422P16BE:
795 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
796 case PIX_FMT_YUV420P16LE:
797 case PIX_FMT_YUV422P16LE:
798 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
800 if (c->chrSrcHSubSample) {
802 case PIX_FMT_RGB48BE:
803 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
804 case PIX_FMT_BGR48BE:
805 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
806 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break;
807 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
808 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
809 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
810 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
811 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break;
812 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
813 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
814 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
815 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
819 case PIX_FMT_RGB48BE:
820 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
821 case PIX_FMT_BGR48BE:
822 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
823 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break;
824 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
825 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
826 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
827 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
828 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break;
829 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
830 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
831 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
832 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
839 case PIX_FMT_YUYV422 :
840 case PIX_FMT_YUV420P16BE:
841 case PIX_FMT_YUV422P16BE:
842 case PIX_FMT_YUV444P16BE:
844 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
845 case PIX_FMT_UYVY422 :
846 case PIX_FMT_YUV420P16LE:
847 case PIX_FMT_YUV422P16LE:
848 case PIX_FMT_YUV444P16LE:
849 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
850 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
851 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
852 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
853 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
854 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
855 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
859 case PIX_FMT_BGR4_BYTE:
860 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
861 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
862 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
863 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break;
864 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
865 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break;
866 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
867 case PIX_FMT_RGB48BE:
868 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
869 case PIX_FMT_BGR48BE:
870 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
875 case PIX_FMT_RGB32_1:
877 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
878 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
890 case PIX_FMT_RGB48LE:
891 case PIX_FMT_BGR48LE:
898 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
900 c->lumConvertRange = lumRangeFromJpeg_c;
901 c->chrConvertRange = chrRangeFromJpeg_c;
903 c->lumConvertRange = lumRangeToJpeg_c;
904 c->chrConvertRange = chrRangeToJpeg_c;
908 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
909 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
910 c->needs_hcscale = 1;