2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
22 const int16_t **lumSrc, int lumFilterSize,
23 const int16_t *chrFilter, const int16_t **chrSrc,
24 int chrFilterSize, const int16_t **alpSrc,
25 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
26 uint8_t *aDest, long dstW, long chrDstW)
28 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
29 chrFilter, chrSrc, chrFilterSize,
30 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
33 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
34 const int16_t **lumSrc, int lumFilterSize,
35 const int16_t *chrFilter, const int16_t **chrSrc,
36 int chrFilterSize, uint8_t *dest, uint8_t *uDest,
37 int dstW, int chrDstW, enum PixelFormat dstFormat)
39 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
40 chrFilter, chrSrc, chrFilterSize,
41 dest, uDest, dstW, chrDstW, dstFormat);
44 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
45 const int16_t *chrSrc, const int16_t *alpSrc,
46 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
47 uint8_t *aDest, long dstW, long chrDstW)
50 for (i=0; i<dstW; i++) {
51 int val= (lumSrc[i]+64)>>7;
62 for (i=0; i<chrDstW; i++) {
63 int u=(chrSrc[i ]+64)>>7;
64 int v=(chrSrc[i + VOFW]+64)>>7;
68 else if (u>255) u=255;
70 else if (v>255) v=255;
77 if (CONFIG_SWSCALE_ALPHA && aDest)
78 for (i=0; i<dstW; i++) {
79 int val= (alpSrc[i]+64)>>7;
80 aDest[i]= av_clip_uint8(val);
86 * vertical scale YV12 to RGB
88 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
89 const int16_t **lumSrc, int lumFilterSize,
90 const int16_t *chrFilter, const int16_t **chrSrc,
91 int chrFilterSize, const int16_t **alpSrc,
92 uint8_t *dest, long dstW, long dstY)
94 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
95 chrFilter, chrSrc, chrFilterSize,
96 alpSrc, dest, dstW, dstY);
100 * vertical bilinear scale YV12 to RGB
102 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
103 const uint16_t *buf1, const uint16_t *uvbuf0,
104 const uint16_t *uvbuf1, const uint16_t *abuf0,
105 const uint16_t *abuf1, uint8_t *dest, int dstW,
106 int yalpha, int uvalpha, int y)
108 int yalpha1=4095- yalpha;
109 int uvalpha1=4095-uvalpha;
112 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
116 * YV12 to RGB without scaling or interpolating
118 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
119 const uint16_t *uvbuf0, const uint16_t *uvbuf1,
120 const uint16_t *abuf0, uint8_t *dest, int dstW,
121 int uvalpha, enum PixelFormat dstFormat,
127 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
128 const int yalpha= 4096; //FIXME ...
130 if (flags&SWS_FULL_CHR_H_INT) {
131 c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
135 if (uvalpha < 2048) {
136 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
138 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
142 //FIXME yuy2* can read up to 7 samples too much
144 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
148 for (i=0; i<width; i++)
152 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
153 const uint8_t *src2, long width, uint32_t *unused)
156 for (i=0; i<width; i++) {
157 dstU[i]= src1[4*i + 1];
158 dstV[i]= src1[4*i + 3];
160 assert(src1 == src2);
163 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
164 const uint8_t *src2, long width, uint32_t *unused)
167 // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
168 // we need to skip each second pixel. Same for BEToUV.
169 for (i=0; i<width; i++) {
170 dstU[i]= src1[2*i + 1];
171 dstV[i]= src2[2*i + 1];
175 /* This is almost identical to the previous, end exists only because
176 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
177 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
181 for (i=0; i<width; i++)
185 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
186 const uint8_t *src2, long width, uint32_t *unused)
189 for (i=0; i<width; i++) {
190 dstU[i]= src1[4*i + 0];
191 dstV[i]= src1[4*i + 2];
193 assert(src1 == src2);
196 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
197 const uint8_t *src2, long width, uint32_t *unused)
200 for (i=0; i<width; i++) {
206 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
207 const uint8_t *src, long width)
210 for (i = 0; i < width; i++) {
211 dst1[i] = src[2*i+0];
212 dst2[i] = src[2*i+1];
216 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
217 const uint8_t *src1, const uint8_t *src2,
218 long width, uint32_t *unused)
220 nvXXtoUV_c(dstU, dstV, src1, width);
223 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
224 const uint8_t *src1, const uint8_t *src2,
225 long width, uint32_t *unused)
227 nvXXtoUV_c(dstV, dstU, src1, width);
230 // FIXME Maybe dither instead.
231 #define YUV_NBPS(depth, endianness, rfunc) \
232 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
233 const uint8_t *_srcU, const uint8_t *_srcV, \
234 long width, uint32_t *unused) \
237 const uint16_t *srcU = (const uint16_t*)_srcU; \
238 const uint16_t *srcV = (const uint16_t*)_srcV; \
239 for (i = 0; i < width; i++) { \
240 dstU[i] = rfunc(&srcU[i])>>(depth-8); \
241 dstV[i] = rfunc(&srcV[i])>>(depth-8); \
245 static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
248 const uint16_t *srcY = (const uint16_t*)_srcY; \
249 for (i = 0; i < width; i++) \
250 dstY[i] = rfunc(&srcY[i])>>(depth-8); \
253 YUV_NBPS( 9, LE, AV_RL16)
254 YUV_NBPS( 9, BE, AV_RB16)
255 YUV_NBPS(10, LE, AV_RL16)
256 YUV_NBPS(10, BE, AV_RB16)
258 static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
259 long width, uint32_t *unused)
262 for (i=0; i<width; i++) {
267 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
271 static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
272 const uint8_t *src2, long width, uint32_t *unused)
275 for (i=0; i<width; i++) {
276 int b= src1[3*i + 0];
277 int g= src1[3*i + 1];
278 int r= src1[3*i + 2];
280 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
281 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
283 assert(src1 == src2);
286 static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
287 const uint8_t *src2, long width, uint32_t *unused)
290 for (i=0; i<width; i++) {
291 int b= src1[6*i + 0] + src1[6*i + 3];
292 int g= src1[6*i + 1] + src1[6*i + 4];
293 int r= src1[6*i + 2] + src1[6*i + 5];
295 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
296 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
298 assert(src1 == src2);
301 static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
305 for (i=0; i<width; i++) {
310 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
314 static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
315 const uint8_t *src2, long width, uint32_t *unused)
319 for (i=0; i<width; i++) {
320 int r= src1[3*i + 0];
321 int g= src1[3*i + 1];
322 int b= src1[3*i + 2];
324 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
325 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
329 static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
330 const uint8_t *src2, long width, uint32_t *unused)
334 for (i=0; i<width; i++) {
335 int r= src1[6*i + 0] + src1[6*i + 3];
336 int g= src1[6*i + 1] + src1[6*i + 4];
337 int b= src1[6*i + 2] + src1[6*i + 5];
339 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
340 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
345 // bilinear / bicubic scaling
346 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
348 const int16_t *filter, const int16_t *filterPos,
352 for (i=0; i<dstW; i++) {
354 int srcPos= filterPos[i];
356 //printf("filterPos: %d\n", filterPos[i]);
357 for (j=0; j<filterSize; j++) {
358 //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
359 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
361 //filter += hFilterSize;
362 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
367 //FIXME all pal and rgb srcFormats could do this convertion as well
368 //FIXME all scalers more complex than bilinear could do half of this transform
369 static void chrRangeToJpeg_c(uint16_t *dst, int width)
372 for (i = 0; i < width; i++) {
373 dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
374 dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
377 static void chrRangeFromJpeg_c(uint16_t *dst, int width)
380 for (i = 0; i < width; i++) {
381 dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469
382 dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
385 static void lumRangeToJpeg_c(uint16_t *dst, int width)
388 for (i = 0; i < width; i++)
389 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
391 static void lumRangeFromJpeg_c(uint16_t *dst, int width)
394 for (i = 0; i < width; i++)
395 dst[i] = (dst[i]*14071 + 33561947)>>14;
398 static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
399 const uint8_t *src, int srcW, int xInc)
403 for (i=0;i<dstWidth;i++) {
404 register unsigned int xx=xpos>>16;
405 register unsigned int xalpha=(xpos&0xFFFF)>>9;
406 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
411 // *** horizontal scale Y line to temp buffer
412 static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
413 const uint8_t *src, int srcW, int xInc,
414 const int16_t *hLumFilter,
415 const int16_t *hLumFilterPos, int hLumFilterSize,
416 uint8_t *formatConvBuffer,
417 uint32_t *pal, int isAlpha)
419 void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
420 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
422 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
425 toYV12(formatConvBuffer, src, srcW, pal);
426 src= formatConvBuffer;
429 if (!c->hyscale_fast) {
430 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
431 } else { // fast bilinear upscale / crap downscale
432 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
436 convertRange(dst, dstWidth);
439 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
440 long dstWidth, const uint8_t *src1,
441 const uint8_t *src2, int srcW, int xInc)
445 for (i=0;i<dstWidth;i++) {
446 register unsigned int xx=xpos>>16;
447 register unsigned int xalpha=(xpos&0xFFFF)>>9;
448 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
449 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
451 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
452 dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
458 inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
459 const uint8_t *src1, const uint8_t *src2,
460 int srcW, int xInc, const int16_t *hChrFilter,
461 const int16_t *hChrFilterPos, int hChrFilterSize,
462 uint8_t *formatConvBuffer, uint32_t *pal)
465 src1 += c->chrSrcOffset;
466 src2 += c->chrSrcOffset;
469 c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
470 src1= formatConvBuffer;
471 src2= formatConvBuffer+VOFW;
474 if (!c->hcscale_fast) {
475 c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
476 c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
477 } else { // fast bilinear upscale / crap downscale
478 c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
481 if (c->chrConvertRange)
482 c->chrConvertRange(dst, dstWidth);
485 #define DEBUG_SWSCALE_BUFFERS 0
486 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
488 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
489 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
491 /* load a few things into local vars to make the code more readable? and faster */
492 const int srcW= c->srcW;
493 const int dstW= c->dstW;
494 const int dstH= c->dstH;
495 const int chrDstW= c->chrDstW;
496 const int chrSrcW= c->chrSrcW;
497 const int lumXInc= c->lumXInc;
498 const int chrXInc= c->chrXInc;
499 const enum PixelFormat dstFormat= c->dstFormat;
500 const int flags= c->flags;
501 int16_t *vLumFilterPos= c->vLumFilterPos;
502 int16_t *vChrFilterPos= c->vChrFilterPos;
503 int16_t *hLumFilterPos= c->hLumFilterPos;
504 int16_t *hChrFilterPos= c->hChrFilterPos;
505 int16_t *vLumFilter= c->vLumFilter;
506 int16_t *vChrFilter= c->vChrFilter;
507 int16_t *hLumFilter= c->hLumFilter;
508 int16_t *hChrFilter= c->hChrFilter;
509 int32_t *lumMmxFilter= c->lumMmxFilter;
510 int32_t *chrMmxFilter= c->chrMmxFilter;
511 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
512 const int vLumFilterSize= c->vLumFilterSize;
513 const int vChrFilterSize= c->vChrFilterSize;
514 const int hLumFilterSize= c->hLumFilterSize;
515 const int hChrFilterSize= c->hChrFilterSize;
516 int16_t **lumPixBuf= c->lumPixBuf;
517 int16_t **chrPixBuf= c->chrPixBuf;
518 int16_t **alpPixBuf= c->alpPixBuf;
519 const int vLumBufSize= c->vLumBufSize;
520 const int vChrBufSize= c->vChrBufSize;
521 uint8_t *formatConvBuffer= c->formatConvBuffer;
522 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
523 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
525 uint32_t *pal=c->pal_yuv;
527 /* vars which will change and which we need to store back in the context */
529 int lumBufIndex= c->lumBufIndex;
530 int chrBufIndex= c->chrBufIndex;
531 int lastInLumBuf= c->lastInLumBuf;
532 int lastInChrBuf= c->lastInChrBuf;
534 if (isPacked(c->srcFormat)) {
542 srcStride[3]= srcStride[0];
544 srcStride[1]<<= c->vChrDrop;
545 srcStride[2]<<= c->vChrDrop;
547 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
548 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
549 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
550 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
551 srcSliceY, srcSliceH, dstY, dstH);
552 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
553 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
555 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
556 static int warnedAlready=0; //FIXME move this into the context perhaps
557 if (flags & SWS_PRINT_INFO && !warnedAlready) {
558 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
559 " ->cannot do aligned memory accesses anymore\n");
564 /* Note the user might start scaling the picture in the middle so this
565 will not get executed. This is not really intended but works
566 currently, so people might do it. */
577 for (;dstY < dstH; dstY++) {
578 unsigned char *dest =dst[0]+dstStride[0]*dstY;
579 const int chrDstY= dstY>>c->chrDstVSubSample;
580 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
581 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
582 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
584 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
585 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
586 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
587 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
588 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
589 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
592 //handle holes (FAST_BILINEAR & weird filters)
593 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
594 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
595 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
596 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
598 DEBUG_BUFFERS("dstY: %d\n", dstY);
599 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
600 firstLumSrcY, lastLumSrcY, lastInLumBuf);
601 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
602 firstChrSrcY, lastChrSrcY, lastInChrBuf);
604 // Do we have enough lines in this slice to output the dstY line
605 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
608 lastLumSrcY = srcSliceY + srcSliceH - 1;
609 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
610 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
611 lastLumSrcY, lastChrSrcY);
614 //Do horizontal scaling
615 while(lastInLumBuf < lastLumSrcY) {
616 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
617 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
619 assert(lumBufIndex < 2*vLumBufSize);
620 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
621 assert(lastInLumBuf + 1 - srcSliceY >= 0);
622 hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
623 hLumFilter, hLumFilterPos, hLumFilterSize,
626 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
627 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
628 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
632 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
633 lumBufIndex, lastInLumBuf);
635 while(lastInChrBuf < lastChrSrcY) {
636 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
637 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
639 assert(chrBufIndex < 2*vChrBufSize);
640 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
641 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
642 //FIXME replace parameters through context struct (some at least)
644 if (c->needs_hcscale)
645 hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
646 hChrFilter, hChrFilterPos, hChrFilterSize,
650 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
651 chrBufIndex, lastInChrBuf);
653 //wrap buf index around to stay inside the ring buffer
654 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
655 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
657 break; //we can't output a dstY line so let's try with the next slice
660 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
661 const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
662 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
663 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
664 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
665 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
667 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
668 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
669 dest, uDest, dstW, chrDstW, dstFormat);
670 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
671 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
672 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
673 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
675 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
676 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
677 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
679 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
680 const int16_t *lumBuf = lumSrcPtr[0];
681 const int16_t *chrBuf= chrSrcPtr[0];
682 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
683 c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
684 } else { //General YV12
686 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
687 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
688 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
691 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
692 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
693 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
694 int chrAlpha= vChrFilter[2*dstY+1];
695 if(flags & SWS_FULL_CHR_H_INT) {
696 yuv2rgbXinC_full(c, //FIXME write a packed1_full function
697 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
698 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
699 alpSrcPtr, dest, dstW, dstY);
701 c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
702 alpPixBuf ? *alpSrcPtr : NULL,
703 dest, dstW, chrAlpha, dstFormat, flags, dstY);
705 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
706 int lumAlpha= vLumFilter[2*dstY+1];
707 int chrAlpha= vChrFilter[2*dstY+1];
709 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
711 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
712 if(flags & SWS_FULL_CHR_H_INT) {
713 yuv2rgbXinC_full(c, //FIXME write a packed2_full function
714 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
715 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
716 alpSrcPtr, dest, dstW, dstY);
718 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
719 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
720 dest, dstW, lumAlpha, chrAlpha, dstY);
722 } else { //general RGB
723 if(flags & SWS_FULL_CHR_H_INT) {
725 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
726 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
727 alpSrcPtr, dest, dstW, dstY);
730 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
731 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
732 alpSrcPtr, dest, dstW, dstY);
736 } else { // hmm looks like we can't use MMX here without overwriting this array's tail
737 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
738 const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
739 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
740 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
741 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
742 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
744 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
745 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
746 dest, uDest, dstW, chrDstW, dstFormat);
747 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
748 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
749 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
750 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
752 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
753 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
754 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
758 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
759 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
760 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
763 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
764 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
765 if(flags & SWS_FULL_CHR_H_INT) {
767 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
768 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
769 alpSrcPtr, dest, dstW, dstY);
772 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
773 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
774 alpSrcPtr, dest, dstW, dstY);
780 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
781 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
783 /* store changed local vars back in the context */
785 c->lumBufIndex= lumBufIndex;
786 c->chrBufIndex= chrBufIndex;
787 c->lastInLumBuf= lastInLumBuf;
788 c->lastInChrBuf= lastInChrBuf;
790 return dstY - lastDstY;
793 static void sws_init_swScale_c(SwsContext *c)
795 enum PixelFormat srcFormat = c->srcFormat;
797 c->yuv2nv12X = yuv2nv12X_c;
798 c->yuv2yuv1 = yuv2yuv1_c;
799 c->yuv2yuvX = yuv2yuvX_c;
800 c->yuv2packed1 = yuv2packed1_c;
801 c->yuv2packed2 = yuv2packed2_c;
802 c->yuv2packedX = yuv2packedX_c;
804 c->hScale = hScale_c;
806 if (c->flags & SWS_FAST_BILINEAR)
808 c->hyscale_fast = hyscale_fast_c;
809 c->hcscale_fast = hcscale_fast_c;
814 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
815 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
816 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
817 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
821 case PIX_FMT_BGR4_BYTE:
822 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
823 case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
824 case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
825 case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
826 case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
827 case PIX_FMT_YUV420P16BE:
828 case PIX_FMT_YUV422P16BE:
829 case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
830 case PIX_FMT_YUV420P16LE:
831 case PIX_FMT_YUV422P16LE:
832 case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
834 if (c->chrSrcHSubSample) {
836 case PIX_FMT_RGB48BE:
837 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
838 case PIX_FMT_BGR48BE:
839 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
840 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break;
841 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
842 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
843 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
844 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
845 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break;
846 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
847 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
848 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
849 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
853 case PIX_FMT_RGB48BE:
854 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
855 case PIX_FMT_BGR48BE:
856 case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
857 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break;
858 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
859 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
860 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
861 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
862 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break;
863 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
864 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
865 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
866 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
873 case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
874 case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
875 case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
876 case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
877 case PIX_FMT_YUYV422 :
878 case PIX_FMT_YUV420P16BE:
879 case PIX_FMT_YUV422P16BE:
880 case PIX_FMT_YUV444P16BE:
882 case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
883 case PIX_FMT_UYVY422 :
884 case PIX_FMT_YUV420P16LE:
885 case PIX_FMT_YUV422P16LE:
886 case PIX_FMT_YUV444P16LE:
887 case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
888 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
889 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
890 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
891 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
892 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
893 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
897 case PIX_FMT_BGR4_BYTE:
898 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
899 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
900 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
901 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break;
902 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
903 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break;
904 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
905 case PIX_FMT_RGB48BE:
906 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
907 case PIX_FMT_BGR48BE:
908 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
913 case PIX_FMT_RGB32_1:
915 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
916 case PIX_FMT_Y400A : c->alpToYV12 = yuy2ToY_c; break;
928 case PIX_FMT_RGB48LE:
929 case PIX_FMT_BGR48LE:
936 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
938 c->lumConvertRange = lumRangeFromJpeg_c;
939 c->chrConvertRange = chrRangeFromJpeg_c;
941 c->lumConvertRange = lumRangeToJpeg_c;
942 c->chrConvertRange = chrRangeToJpeg_c;
946 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
947 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
948 c->needs_hcscale = 1;