2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
22 const int16_t **lumSrc, int lumFilterSize,
23 const int16_t *chrFilter, const int16_t **chrSrc,
24 int chrFilterSize, const int16_t **alpSrc,
25 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
26 uint8_t *aDest, long dstW, long chrDstW)
28 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
29 chrFilter, chrSrc, chrFilterSize,
30 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
33 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
34 const int16_t **lumSrc, int lumFilterSize,
35 const int16_t *chrFilter, const int16_t **chrSrc,
36 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
37 int dstW, int chrDstW, enum PixelFormat dstFormat)
39 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
40 chrFilter, chrSrc, chrFilterSize,
41 dest, uDest, dstW, chrDstW, dstFormat);
44 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
45 const int16_t *chrSrc, const int16_t *alpSrc,
46 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
47 uint8_t *aDest, long dstW, long chrDstW)
50 for (i=0; i<dstW; i++) {
51 int val= (lumSrc[i]+64)>>7;
62 for (i=0; i<chrDstW; i++) {
63 int u=(chrSrc[i ]+64)>>7;
64 int v=(chrSrc[i + VOFW]+64)>>7;
68 else if (u>255) u=255;
70 else if (v>255) v=255;
77 if (CONFIG_SWSCALE_ALPHA && aDest)
78 for (i=0; i<dstW; i++) {
79 int val= (alpSrc[i]+64)>>7;
80 aDest[i]= av_clip_uint8(val);
86 * vertical scale YV12 to RGB
88 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
89 const int16_t **lumSrc, int lumFilterSize,
90 const int16_t *chrFilter, const int16_t **chrSrc,
91 int chrFilterSize, const int16_t **alpSrc,
92 uint8_t *dest, long dstW, long dstY)
94 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
95 chrFilter, chrSrc, chrFilterSize,
96 alpSrc, dest, dstW, dstY);
100 * vertical bilinear scale YV12 to RGB
102 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
103 const uint16_t *buf1, const uint16_t *uvbuf0,
104 const uint16_t *uvbuf1, const uint16_t *abuf0,
105 const uint16_t *abuf1, uint8_t *dest, int dstW,
106 int yalpha, int uvalpha, int y)
108 int yalpha1=4095- yalpha;
109 int uvalpha1=4095-uvalpha;
112 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
116 * YV12 to RGB without scaling or interpolating
118 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
119 const uint16_t *uvbuf0, const uint16_t *uvbuf1,
120 const uint16_t *abuf0, uint8_t *dest, int dstW,
121 int uvalpha, enum PixelFormat dstFormat,
127 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
128 const int yalpha= 4096; //FIXME ...
130 if (flags&SWS_FULL_CHR_H_INT) {
131 c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
135 if (uvalpha < 2048) {
136 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
138 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
142 //FIXME yuy2* can read up to 7 samples too much
144 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
148 for (i=0; i<width; i++)
152 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
153 const uint8_t *src2, long width, uint32_t *unused)
156 for (i=0; i<width; i++) {
157 dstU[i]= src1[4*i + 1];
158 dstV[i]= src1[4*i + 3];
160 assert(src1 == src2);
163 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
164 const uint8_t *src2, long width, uint32_t *unused)
167 for (i=0; i<width; i++) {
168 dstU[i]= src1[2*i + 1];
169 dstV[i]= src2[2*i + 1];
173 /* This is almost identical to the previous, end exists only because
174 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
175 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
179 for (i=0; i<width; i++)
183 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
184 const uint8_t *src2, long width, uint32_t *unused)
187 for (i=0; i<width; i++) {
188 dstU[i]= src1[4*i + 0];
189 dstV[i]= src1[4*i + 2];
191 assert(src1 == src2);
194 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
195 const uint8_t *src2, long width, uint32_t *unused)
198 for (i=0; i<width; i++) {
204 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
205 const uint8_t *src, long width)
208 for (i = 0; i < width; i++) {
209 dst1[i] = src[2*i+0];
210 dst2[i] = src[2*i+1];
214 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
215 const uint8_t *src1, const uint8_t *src2,
216 long width, uint32_t *unused)
218 nvXXtoUV_c(dstU, dstV, src1, width);
221 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
222 const uint8_t *src1, const uint8_t *src2,
223 long width, uint32_t *unused)
225 nvXXtoUV_c(dstV, dstU, src1, width);
228 // FIXME Maybe dither instead.
229 #define YUV_NBPS(depth) \
230 static inline void yuv ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
231 const uint8_t *_srcU, const uint8_t *_srcV, \
232 long width, uint32_t *unused) \
235 const uint16_t *srcU = (const uint16_t*)_srcU; \
236 const uint16_t *srcV = (const uint16_t*)_srcV; \
237 for (i = 0; i < width; i++) { \
238 dstU[i] = srcU[i]>>(depth-8); \
239 dstV[i] = srcV[i]>>(depth-8); \
243 static inline void yuv ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
246 const uint16_t *srcY = (const uint16_t*)_srcY; \
247 for (i = 0; i < width; i++) \
248 dstY[i] = srcY[i]>>(depth-8); \
254 static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
255 long width, uint32_t *unused)
258 for (i=0; i<width; i++) {
263 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
267 static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
268 const uint8_t *src2, long width, uint32_t *unused)
271 for (i=0; i<width; i++) {
272 int b= src1[3*i + 0];
273 int g= src1[3*i + 1];
274 int r= src1[3*i + 2];
276 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
277 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
279 assert(src1 == src2);
282 static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
283 const uint8_t *src2, long width, uint32_t *unused)
286 for (i=0; i<width; i++) {
287 int b= src1[6*i + 0] + src1[6*i + 3];
288 int g= src1[6*i + 1] + src1[6*i + 4];
289 int r= src1[6*i + 2] + src1[6*i + 5];
291 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
292 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
294 assert(src1 == src2);
297 static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
301 for (i=0; i<width; i++) {
306 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
310 static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
311 const uint8_t *src2, long width, uint32_t *unused)
315 for (i=0; i<width; i++) {
316 int r= src1[3*i + 0];
317 int g= src1[3*i + 1];
318 int b= src1[3*i + 2];
320 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
321 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
325 static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
326 const uint8_t *src2, long width, uint32_t *unused)
330 for (i=0; i<width; i++) {
331 int r= src1[6*i + 0] + src1[6*i + 3];
332 int g= src1[6*i + 1] + src1[6*i + 4];
333 int b= src1[6*i + 2] + src1[6*i + 5];
335 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
336 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
341 // bilinear / bicubic scaling
342 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
344 const int16_t *filter, const int16_t *filterPos,
348 for (i=0; i<dstW; i++) {
350 int srcPos= filterPos[i];
352 //printf("filterPos: %d\n", filterPos[i]);
353 for (j=0; j<filterSize; j++) {
354 //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
355 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
357 //filter += hFilterSize;
358 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
363 //FIXME all pal and rgb srcFormats could do this convertion as well
364 //FIXME all scalers more complex than bilinear could do half of this transform
365 static void chrRangeToJpeg_c(uint16_t *dst, int width)
368 for (i = 0; i < width; i++) {
369 dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
370 dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
373 static void chrRangeFromJpeg_c(uint16_t *dst, int width)
376 for (i = 0; i < width; i++) {
377 dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469
378 dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
381 static void lumRangeToJpeg_c(uint16_t *dst, int width)
384 for (i = 0; i < width; i++)
385 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
387 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
390 for (i = 0; i < width; i++)
391 dst[i] = (dst[i]*14071 + 33561947)>>14;
394 static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
395 const uint8_t *src, int srcW, int xInc)
399 for (i=0;i<dstWidth;i++) {
400 register unsigned int xx=xpos>>16;
401 register unsigned int xalpha=(xpos&0xFFFF)>>9;
402 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
407 // *** horizontal scale Y line to temp buffer
408 static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
409 const uint8_t *src, int srcW, int xInc,
410 const int16_t *hLumFilter,
411 const int16_t *hLumFilterPos, int hLumFilterSize,
412 uint8_t *formatConvBuffer,
413 uint32_t *pal, int isAlpha)
415 void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
416 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
418 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
421 toYV12(formatConvBuffer, src, srcW, pal);
422 src= formatConvBuffer;
425 if (!c->hyscale_fast) {
426 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
427 } else { // fast bilinear upscale / crap downscale
428 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
432 convertRange(dst, dstWidth);
435 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
436 long dstWidth, const uint8_t *src1,
437 const uint8_t *src2, int srcW, int xInc)
441 for (i=0;i<dstWidth;i++) {
442 register unsigned int xx=xpos>>16;
443 register unsigned int xalpha=(xpos&0xFFFF)>>9;
444 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
445 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
447 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
448 dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
454 inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
455 const uint8_t *src1, const uint8_t *src2,
456 int srcW, int xInc, const int16_t *hChrFilter,
457 const int16_t *hChrFilterPos, int hChrFilterSize,
458 uint8_t *formatConvBuffer, uint32_t *pal)
461 src1 += c->chrSrcOffset;
462 src2 += c->chrSrcOffset;
465 c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
466 src1= formatConvBuffer;
467 src2= formatConvBuffer+VOFW;
470 if (!c->hcscale_fast) {
471 c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
472 c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
473 } else { // fast bilinear upscale / crap downscale
474 c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
477 if (c->chrConvertRange)
478 c->chrConvertRange(dst, dstWidth);
481 #define DEBUG_SWSCALE_BUFFERS 0
482 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
484 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
485 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
487 /* load a few things into local vars to make the code more readable? and faster */
488 const int srcW= c->srcW;
489 const int dstW= c->dstW;
490 const int dstH= c->dstH;
491 const int chrDstW= c->chrDstW;
492 const int chrSrcW= c->chrSrcW;
493 const int lumXInc= c->lumXInc;
494 const int chrXInc= c->chrXInc;
495 const enum PixelFormat dstFormat= c->dstFormat;
496 const int flags= c->flags;
497 int16_t *vLumFilterPos= c->vLumFilterPos;
498 int16_t *vChrFilterPos= c->vChrFilterPos;
499 int16_t *hLumFilterPos= c->hLumFilterPos;
500 int16_t *hChrFilterPos= c->hChrFilterPos;
501 int16_t *vLumFilter= c->vLumFilter;
502 int16_t *vChrFilter= c->vChrFilter;
503 int16_t *hLumFilter= c->hLumFilter;
504 int16_t *hChrFilter= c->hChrFilter;
505 int32_t *lumMmxFilter= c->lumMmxFilter;
506 int32_t *chrMmxFilter= c->chrMmxFilter;
507 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
508 const int vLumFilterSize= c->vLumFilterSize;
509 const int vChrFilterSize= c->vChrFilterSize;
510 const int hLumFilterSize= c->hLumFilterSize;
511 const int hChrFilterSize= c->hChrFilterSize;
512 int16_t **lumPixBuf= c->lumPixBuf;
513 int16_t **chrPixBuf= c->chrPixBuf;
514 int16_t **alpPixBuf= c->alpPixBuf;
515 const int vLumBufSize= c->vLumBufSize;
516 const int vChrBufSize= c->vChrBufSize;
517 uint8_t *formatConvBuffer= c->formatConvBuffer;
518 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
519 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
521 uint32_t *pal=c->pal_yuv;
523 /* vars which will change and which we need to store back in the context */
525 int lumBufIndex= c->lumBufIndex;
526 int chrBufIndex= c->chrBufIndex;
527 int lastInLumBuf= c->lastInLumBuf;
528 int lastInChrBuf= c->lastInChrBuf;
530 if (isPacked(c->srcFormat)) {
538 srcStride[3]= srcStride[0];
540 srcStride[1]<<= c->vChrDrop;
541 srcStride[2]<<= c->vChrDrop;
543 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
544 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
545 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
546 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
547 srcSliceY, srcSliceH, dstY, dstH);
548 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
549 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
551 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
552 static int warnedAlready=0; //FIXME move this into the context perhaps
553 if (flags & SWS_PRINT_INFO && !warnedAlready) {
554 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
555 " ->cannot do aligned memory accesses anymore\n");
560 /* Note the user might start scaling the picture in the middle so this
561 will not get executed. This is not really intended but works
562 currently, so people might do it. */
573 for (;dstY < dstH; dstY++) {
574 unsigned char *dest =dst[0]+dstStride[0]*dstY;
575 const int chrDstY= dstY>>c->chrDstVSubSample;
576 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
577 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
578 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
580 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
581 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
582 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
583 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
584 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
585 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
588 //handle holes (FAST_BILINEAR & weird filters)
589 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
590 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
591 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
592 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
594 DEBUG_BUFFERS("dstY: %d\n", dstY);
595 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
596 firstLumSrcY, lastLumSrcY, lastInLumBuf);
597 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
598 firstChrSrcY, lastChrSrcY, lastInChrBuf);
600 // Do we have enough lines in this slice to output the dstY line
601 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
604 lastLumSrcY = srcSliceY + srcSliceH - 1;
605 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
606 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
607 lastLumSrcY, lastChrSrcY);
610 //Do horizontal scaling
611 while(lastInLumBuf < lastLumSrcY) {
612 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
613 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
615 assert(lumBufIndex < 2*vLumBufSize);
616 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
617 assert(lastInLumBuf + 1 - srcSliceY >= 0);
618 hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
619 hLumFilter, hLumFilterPos, hLumFilterSize,
622 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
623 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
624 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
628 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
629 lumBufIndex, lastInLumBuf);
631 while(lastInChrBuf < lastChrSrcY) {
632 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
633 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
635 assert(chrBufIndex < 2*vChrBufSize);
636 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
637 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
638 //FIXME replace parameters through context struct (some at least)
640 if (c->needs_hcscale)
641 hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
642 hChrFilter, hChrFilterPos, hChrFilterSize,
646 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
647 chrBufIndex, lastInChrBuf);
649 //wrap buf index around to stay inside the ring buffer
650 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
651 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
653 break; //we can't output a dstY line so let's try with the next slice
656 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
657 const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
658 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
659 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
660 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
661 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
663 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
664 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
665 dest, uDest, dstW, chrDstW, dstFormat);
666 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
667 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
668 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
669 if (is16BPS(dstFormat)) {
671 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
672 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
673 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
675 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
676 const int16_t *lumBuf = lumSrcPtr[0];
677 const int16_t *chrBuf= chrSrcPtr[0];
678 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
679 c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
680 } else { //General YV12
682 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
683 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
684 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
687 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
688 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
689 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
690 int chrAlpha= vChrFilter[2*dstY+1];
691 if(flags & SWS_FULL_CHR_H_INT) {
692 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
693 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
694 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
695 alpSrcPtr, dest, dstW, dstY);
697 c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
698 alpPixBuf ? *alpSrcPtr : NULL,
699 dest, dstW, chrAlpha, dstFormat, flags, dstY);
701 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
702 int lumAlpha= vLumFilter[2*dstY+1];
703 int chrAlpha= vChrFilter[2*dstY+1];
705 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
707 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
708 if(flags & SWS_FULL_CHR_H_INT) {
709 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
710 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
711 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
712 alpSrcPtr, dest, dstW, dstY);
714 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
715 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
716 dest, dstW, lumAlpha, chrAlpha, dstY);
718 } else { //general RGB
719 if(flags & SWS_FULL_CHR_H_INT) {
721 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
722 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
723 alpSrcPtr, dest, dstW, dstY);
726 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
727 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
728 alpSrcPtr, dest, dstW, dstY);
732 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
733 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
734 const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
735 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
736 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
737 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
738 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
740 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
741 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
742 dest, uDest, dstW, chrDstW, dstFormat);
743 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
744 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
745 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
746 if (is16BPS(dstFormat)) {
748 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
749 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
750 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
754 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
755 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
756 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
759 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
760 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
761 if(flags & SWS_FULL_CHR_H_INT) {
763 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
764 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
765 alpSrcPtr, dest, dstW, dstY);
768 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
769 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
770 alpSrcPtr, dest, dstW, dstY);
776 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
777 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
779 /* store changed local vars back in the context */
781 c->lumBufIndex= lumBufIndex;
782 c->chrBufIndex= chrBufIndex;
783 c->lastInLumBuf= lastInLumBuf;
784 c->lastInChrBuf= lastInChrBuf;
786 return dstY - lastDstY;
789 static void sws_init_swScale_c(SwsContext *c)
791 enum PixelFormat srcFormat = c->srcFormat;
793 c->yuv2nv12X = yuv2nv12X_c;
794 c->yuv2yuv1 = yuv2yuv1_c;
795 c->yuv2yuvX = yuv2yuvX_c;
796 c->yuv2packed1 = yuv2packed1_c;
797 c->yuv2packed2 = yuv2packed2_c;
798 c->yuv2packedX = yuv2packedX_c;
800 c->hScale = hScale_c;
802 if (c->flags & SWS_FAST_BILINEAR)
804 c->hyscale_fast = hyscale_fast_c;
805 c->hcscale_fast = hcscale_fast_c;
810 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
811 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
812 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
813 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
817 case PIX_FMT_BGR4_BYTE:
818 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
819 case PIX_FMT_YUV420P9BE:
820 case PIX_FMT_YUV420P9LE: c->chrToYV12 = yuv9ToUV_c; break;
821 case PIX_FMT_YUV420P10BE:
822 case PIX_FMT_YUV420P10LE: c->chrToYV12 = yuv10ToUV_c; break;
823 case PIX_FMT_YUV420P16BE:
824 case PIX_FMT_YUV422P16BE:
825 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
826 case PIX_FMT_YUV420P16LE:
827 case PIX_FMT_YUV422P16LE:
828 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
830 if (c->chrSrcHSubSample) {
832 case PIX_FMT_RGB48BE:
833 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
834 case PIX_FMT_BGR48BE:
835 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
836 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break;
837 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
838 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
839 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
840 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
841 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break;
842 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
843 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
844 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
845 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
849 case PIX_FMT_RGB48BE:
850 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
851 case PIX_FMT_BGR48BE:
852 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
853 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break;
854 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
855 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
856 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
857 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
858 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break;
859 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
860 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
861 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
862 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
869 case PIX_FMT_YUV420P9BE:
870 case PIX_FMT_YUV420P9LE: c->lumToYV12 = yuv9ToY_c; break;
871 case PIX_FMT_YUV420P10BE:
872 case PIX_FMT_YUV420P10LE: c->lumToYV12 = yuv10ToY_c; break;
873 case PIX_FMT_YUYV422 :
874 case PIX_FMT_YUV420P16BE:
875 case PIX_FMT_YUV422P16BE:
876 case PIX_FMT_YUV444P16BE:
878 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
879 case PIX_FMT_UYVY422 :
880 case PIX_FMT_YUV420P16LE:
881 case PIX_FMT_YUV422P16LE:
882 case PIX_FMT_YUV444P16LE:
883 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
884 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
885 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
886 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
887 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
888 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
889 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
893 case PIX_FMT_BGR4_BYTE:
894 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
895 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
896 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
897 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break;
898 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
899 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break;
900 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
901 case PIX_FMT_RGB48BE:
902 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
903 case PIX_FMT_BGR48BE:
904 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
909 case PIX_FMT_RGB32_1:
911 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
912 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
924 case PIX_FMT_RGB48LE:
925 case PIX_FMT_BGR48LE:
932 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
934 c->lumConvertRange = lumRangeFromJpeg_c;
935 c->chrConvertRange = chrRangeFromJpeg_c;
937 c->lumConvertRange = lumRangeToJpeg_c;
938 c->chrConvertRange = chrRangeToJpeg_c;
942 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
943 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
944 c->needs_hcscale = 1;