2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
37 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
38 { 36, 68, 60, 92, 34, 66, 58, 90,},
39 { 100, 4,124, 28, 98, 2,122, 26,},
40 { 52, 84, 44, 76, 50, 82, 42, 74,},
41 { 116, 20,108, 12,114, 18,106, 10,},
42 { 32, 64, 56, 88, 38, 70, 62, 94,},
43 { 96, 0,120, 24,102, 6,126, 30,},
44 { 48, 80, 40, 72, 54, 86, 46, 78,},
45 { 112, 16,104, 8,118, 22,110, 14,},
47 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
48 { 64, 64, 64, 64, 64, 64, 64, 64 };
50 static av_always_inline void fillPlane(uint8_t* plane, int stride,
51 int width, int height,
55 uint8_t *ptr = plane + stride*y;
56 for (i=0; i<height; i++) {
57 memset(ptr, val, width);
62 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
63 const int16_t *filter,
64 const int32_t *filterPos, int filterSize)
67 int32_t *dst = (int32_t *) _dst;
68 const uint16_t *src = (const uint16_t *) _src;
69 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
72 for (i = 0; i < dstW; i++) {
74 int srcPos = filterPos[i];
77 for (j = 0; j < filterSize; j++) {
78 val += src[srcPos + j] * filter[filterSize * i + j];
80 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
81 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
85 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
86 const int16_t *filter,
87 const int32_t *filterPos, int filterSize)
90 const uint16_t *src = (const uint16_t *) _src;
91 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
93 for (i = 0; i < dstW; i++) {
95 int srcPos = filterPos[i];
98 for (j = 0; j < filterSize; j++) {
99 val += src[srcPos + j] * filter[filterSize * i + j];
101 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
102 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
106 // bilinear / bicubic scaling
107 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
108 const int16_t *filter, const int32_t *filterPos,
112 for (i=0; i<dstW; i++) {
114 int srcPos= filterPos[i];
116 for (j=0; j<filterSize; j++) {
117 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
119 //filter += hFilterSize;
120 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
125 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
126 const int16_t *filter, const int32_t *filterPos,
130 int32_t *dst = (int32_t *) _dst;
131 for (i=0; i<dstW; i++) {
133 int srcPos= filterPos[i];
135 for (j=0; j<filterSize; j++) {
136 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
138 //filter += hFilterSize;
139 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
144 //FIXME all pal and rgb srcFormats could do this convertion as well
145 //FIXME all scalers more complex than bilinear could do half of this transform
146 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
149 for (i = 0; i < width; i++) {
150 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
151 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
154 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
157 for (i = 0; i < width; i++) {
158 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
159 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
162 static void lumRangeToJpeg_c(int16_t *dst, int width)
165 for (i = 0; i < width; i++)
166 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
168 static void lumRangeFromJpeg_c(int16_t *dst, int width)
171 for (i = 0; i < width; i++)
172 dst[i] = (dst[i]*14071 + 33561947)>>14;
175 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
178 int32_t *dstU = (int32_t *) _dstU;
179 int32_t *dstV = (int32_t *) _dstV;
180 for (i = 0; i < width; i++) {
181 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
182 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
185 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
188 int32_t *dstU = (int32_t *) _dstU;
189 int32_t *dstV = (int32_t *) _dstV;
190 for (i = 0; i < width; i++) {
191 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
192 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
195 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
198 int32_t *dst = (int32_t *) _dst;
199 for (i = 0; i < width; i++)
200 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
202 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
205 int32_t *dst = (int32_t *) _dst;
206 for (i = 0; i < width; i++)
207 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
210 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
211 const uint8_t *src, int srcW, int xInc)
215 for (i=0;i<dstWidth;i++) {
216 register unsigned int xx=xpos>>16;
217 register unsigned int xalpha=(xpos&0xFFFF)>>9;
218 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
223 // *** horizontal scale Y line to temp buffer
224 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
225 const uint8_t *src_in[4], int srcW, int xInc,
226 const int16_t *hLumFilter,
227 const int32_t *hLumFilterPos, int hLumFilterSize,
228 uint8_t *formatConvBuffer,
229 uint32_t *pal, int isAlpha)
231 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
232 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
233 const uint8_t *src = src_in[isAlpha ? 3 : 0];
236 toYV12(formatConvBuffer, src, srcW, pal);
237 src= formatConvBuffer;
238 } else if (c->readLumPlanar && !isAlpha) {
239 c->readLumPlanar(formatConvBuffer, src_in, srcW);
240 src = formatConvBuffer;
243 if (!c->hyscale_fast) {
244 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
245 } else { // fast bilinear upscale / crap downscale
246 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
250 convertRange(dst, dstWidth);
253 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
254 int dstWidth, const uint8_t *src1,
255 const uint8_t *src2, int srcW, int xInc)
259 for (i=0;i<dstWidth;i++) {
260 register unsigned int xx=xpos>>16;
261 register unsigned int xalpha=(xpos&0xFFFF)>>9;
262 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
263 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
268 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
269 const uint8_t *src_in[4],
270 int srcW, int xInc, const int16_t *hChrFilter,
271 const int32_t *hChrFilterPos, int hChrFilterSize,
272 uint8_t *formatConvBuffer, uint32_t *pal)
274 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
276 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
277 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
278 src1= formatConvBuffer;
280 } else if (c->readChrPlanar) {
281 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
282 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
283 src1= formatConvBuffer;
287 if (!c->hcscale_fast) {
288 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
289 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
290 } else { // fast bilinear upscale / crap downscale
291 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
294 if (c->chrConvertRange)
295 c->chrConvertRange(dst1, dst2, dstWidth);
298 #define DEBUG_SWSCALE_BUFFERS 0
299 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
301 static int swScale(SwsContext *c, const uint8_t* src[],
302 int srcStride[], int srcSliceY,
303 int srcSliceH, uint8_t* dst[], int dstStride[])
305 /* load a few things into local vars to make the code more readable? and faster */
306 const int srcW= c->srcW;
307 const int dstW= c->dstW;
308 const int dstH= c->dstH;
309 const int chrDstW= c->chrDstW;
310 const int chrSrcW= c->chrSrcW;
311 const int lumXInc= c->lumXInc;
312 const int chrXInc= c->chrXInc;
313 const enum PixelFormat dstFormat= c->dstFormat;
314 const int flags= c->flags;
315 int32_t *vLumFilterPos= c->vLumFilterPos;
316 int32_t *vChrFilterPos= c->vChrFilterPos;
317 int32_t *hLumFilterPos= c->hLumFilterPos;
318 int32_t *hChrFilterPos= c->hChrFilterPos;
319 int16_t *vLumFilter= c->vLumFilter;
320 int16_t *vChrFilter= c->vChrFilter;
321 int16_t *hLumFilter= c->hLumFilter;
322 int16_t *hChrFilter= c->hChrFilter;
323 int32_t *lumMmxFilter= c->lumMmxFilter;
324 int32_t *chrMmxFilter= c->chrMmxFilter;
325 const int vLumFilterSize= c->vLumFilterSize;
326 const int vChrFilterSize= c->vChrFilterSize;
327 const int hLumFilterSize= c->hLumFilterSize;
328 const int hChrFilterSize= c->hChrFilterSize;
329 int16_t **lumPixBuf= c->lumPixBuf;
330 int16_t **chrUPixBuf= c->chrUPixBuf;
331 int16_t **chrVPixBuf= c->chrVPixBuf;
332 int16_t **alpPixBuf= c->alpPixBuf;
333 const int vLumBufSize= c->vLumBufSize;
334 const int vChrBufSize= c->vChrBufSize;
335 uint8_t *formatConvBuffer= c->formatConvBuffer;
336 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
337 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
339 uint32_t *pal=c->pal_yuv;
340 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
341 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
342 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
343 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
344 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
345 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
346 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
348 /* vars which will change and which we need to store back in the context */
350 int lumBufIndex= c->lumBufIndex;
351 int chrBufIndex= c->chrBufIndex;
352 int lastInLumBuf= c->lastInLumBuf;
353 int lastInChrBuf= c->lastInChrBuf;
355 if (isPacked(c->srcFormat)) {
363 srcStride[3]= srcStride[0];
365 srcStride[1]<<= c->vChrDrop;
366 srcStride[2]<<= c->vChrDrop;
368 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
369 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
370 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
371 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
372 srcSliceY, srcSliceH, dstY, dstH);
373 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
374 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
376 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
377 static int warnedAlready=0; //FIXME move this into the context perhaps
378 if (flags & SWS_PRINT_INFO && !warnedAlready) {
379 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
380 " ->cannot do aligned memory accesses anymore\n");
385 /* Note the user might start scaling the picture in the middle so this
386 will not get executed. This is not really intended but works
387 currently, so people might do it. */
396 if (!should_dither) {
397 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
401 for (;dstY < dstH; dstY++) {
402 const int chrDstY= dstY>>c->chrDstVSubSample;
404 dst[0] + dstStride[0] * dstY,
405 dst[1] + dstStride[1] * chrDstY,
406 dst[2] + dstStride[2] * chrDstY,
407 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
410 const int firstLumSrcY= FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); //First line needed as input
411 const int firstLumSrcY2= FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]);
412 const int firstChrSrcY= FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]); //First line needed as input
414 // Last line needed as input
415 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
416 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
417 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
420 //handle holes (FAST_BILINEAR & weird filters)
421 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
422 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
423 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
424 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
426 DEBUG_BUFFERS("dstY: %d\n", dstY);
427 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
428 firstLumSrcY, lastLumSrcY, lastInLumBuf);
429 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
430 firstChrSrcY, lastChrSrcY, lastInChrBuf);
432 // Do we have enough lines in this slice to output the dstY line
433 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
436 lastLumSrcY = srcSliceY + srcSliceH - 1;
437 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
438 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
439 lastLumSrcY, lastChrSrcY);
442 //Do horizontal scaling
443 while(lastInLumBuf < lastLumSrcY) {
444 const uint8_t *src1[4] = {
445 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
446 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
447 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
448 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
451 assert(lumBufIndex < 2*vLumBufSize);
452 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
453 assert(lastInLumBuf + 1 - srcSliceY >= 0);
454 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
455 hLumFilter, hLumFilterPos, hLumFilterSize,
458 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
459 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
460 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
464 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
465 lumBufIndex, lastInLumBuf);
467 while(lastInChrBuf < lastChrSrcY) {
468 const uint8_t *src1[4] = {
469 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
470 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
471 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
472 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
475 assert(chrBufIndex < 2*vChrBufSize);
476 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
477 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
478 //FIXME replace parameters through context struct (some at least)
480 if (c->needs_hcscale)
481 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
482 chrDstW, src1, chrSrcW, chrXInc,
483 hChrFilter, hChrFilterPos, hChrFilterSize,
484 formatConvBuffer, pal);
486 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
487 chrBufIndex, lastInChrBuf);
489 //wrap buf index around to stay inside the ring buffer
490 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
491 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
493 break; //we can't output a dstY line so let's try with the next slice
496 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
499 c->chrDither8 = dither_8x8_128[chrDstY & 7];
500 c->lumDither8 = dither_8x8_128[dstY & 7];
502 if (dstY >= dstH-2) {
503 // hmm looks like we can't use MMX here without overwriting this array's tail
504 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
505 &yuv2packed1, &yuv2packed2, &yuv2packedX);
509 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
510 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
511 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
512 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
514 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
515 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
516 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
517 for (i = 0; i < neg; i++)
518 tmpY[i] = lumSrcPtr[neg];
519 for ( ; i < end; i++)
520 tmpY[i] = lumSrcPtr[i];
521 for ( ; i < vLumFilterSize; i++)
526 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
527 for (i = 0; i < neg; i++)
528 tmpA[i] = alpSrcPtr[neg];
529 for ( ; i < end; i++)
530 tmpA[i] = alpSrcPtr[i];
531 for ( ; i < vLumFilterSize; i++)
532 tmpA[i] = tmpA[i - 1];
536 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
537 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
538 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
539 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
540 for (i = 0; i < neg; i++) {
541 tmpU[i] = chrUSrcPtr[neg];
542 tmpV[i] = chrVSrcPtr[neg];
544 for ( ; i < end; i++) {
545 tmpU[i] = chrUSrcPtr[i];
546 tmpV[i] = chrVSrcPtr[i];
548 for ( ; i < vChrFilterSize; i++) {
549 tmpU[i] = tmpU[i - 1];
550 tmpV[i] = tmpV[i - 1];
556 if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
557 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
559 if (vLumFilterSize == 1) {
560 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
562 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
563 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
566 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
568 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
569 } else if (vChrFilterSize == 1) {
570 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
571 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
573 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
574 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
575 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
576 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
580 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
581 if (vLumFilterSize == 1) {
582 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
584 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
585 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
589 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
590 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
591 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <= 2) { //unscaled RGB
592 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
593 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
594 alpPixBuf ? *alpSrcPtr : NULL,
595 dest[0], dstW, chrAlpha, dstY);
596 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
597 int lumAlpha = vLumFilter[2 * dstY + 1];
598 int chrAlpha = vChrFilter[2 * dstY + 1];
600 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
602 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
603 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
604 alpPixBuf ? alpSrcPtr : NULL,
605 dest[0], dstW, lumAlpha, chrAlpha, dstY);
606 } else { //general RGB
607 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
608 lumSrcPtr, vLumFilterSize,
609 vChrFilter + dstY * vChrFilterSize,
610 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
611 alpSrcPtr, dest[0], dstW, dstY);
617 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
618 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
621 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
622 __asm__ volatile("sfence":::"memory");
626 /* store changed local vars back in the context */
628 c->lumBufIndex= lumBufIndex;
629 c->chrBufIndex= chrBufIndex;
630 c->lastInLumBuf= lastInLumBuf;
631 c->lastInChrBuf= lastInChrBuf;
633 return dstY - lastDstY;
636 static av_cold void sws_init_swScale_c(SwsContext *c)
638 enum PixelFormat srcFormat = c->srcFormat;
640 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
641 &c->yuv2nv12cX, &c->yuv2packed1,
642 &c->yuv2packed2, &c->yuv2packedX);
644 ff_sws_init_input_funcs(c);
646 if (c->srcBpc == 8) {
647 if (c->dstBpc <= 10) {
648 c->hyScale = c->hcScale = hScale8To15_c;
649 if (c->flags & SWS_FAST_BILINEAR) {
650 c->hyscale_fast = hyscale_fast_c;
651 c->hcscale_fast = hcscale_fast_c;
654 c->hyScale = c->hcScale = hScale8To19_c;
657 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
660 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
661 if (c->dstBpc <= 10) {
663 c->lumConvertRange = lumRangeFromJpeg_c;
664 c->chrConvertRange = chrRangeFromJpeg_c;
666 c->lumConvertRange = lumRangeToJpeg_c;
667 c->chrConvertRange = chrRangeToJpeg_c;
671 c->lumConvertRange = lumRangeFromJpeg16_c;
672 c->chrConvertRange = chrRangeFromJpeg16_c;
674 c->lumConvertRange = lumRangeToJpeg16_c;
675 c->chrConvertRange = chrRangeToJpeg16_c;
680 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
681 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
682 c->needs_hcscale = 1;
685 SwsFunc ff_getSwsFunc(SwsContext *c)
687 sws_init_swScale_c(c);
690 ff_sws_init_swScale_mmx(c);
692 ff_sws_init_swScale_altivec(c);