2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
37 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
38 { 36, 68, 60, 92, 34, 66, 58, 90,},
39 { 100, 4,124, 28, 98, 2,122, 26,},
40 { 52, 84, 44, 76, 50, 82, 42, 74,},
41 { 116, 20,108, 12,114, 18,106, 10,},
42 { 32, 64, 56, 88, 38, 70, 62, 94,},
43 { 96, 0,120, 24,102, 6,126, 30,},
44 { 48, 80, 40, 72, 54, 86, 46, 78,},
45 { 112, 16,104, 8,118, 22,110, 14,},
47 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
48 { 64, 64, 64, 64, 64, 64, 64, 64 };
50 static av_always_inline void fillPlane(uint8_t* plane, int stride,
51 int width, int height,
55 uint8_t *ptr = plane + stride*y;
56 for (i=0; i<height; i++) {
57 memset(ptr, val, width);
62 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
63 const int16_t *filter,
64 const int16_t *filterPos, int filterSize)
67 int32_t *dst = (int32_t *) _dst;
68 const uint16_t *src = (const uint16_t *) _src;
69 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
72 for (i = 0; i < dstW; i++) {
74 int srcPos = filterPos[i];
77 for (j = 0; j < filterSize; j++) {
78 val += src[srcPos + j] * filter[filterSize * i + j];
80 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
81 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
85 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
86 const int16_t *filter,
87 const int16_t *filterPos, int filterSize)
90 const uint16_t *src = (const uint16_t *) _src;
91 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
93 for (i = 0; i < dstW; i++) {
95 int srcPos = filterPos[i];
98 for (j = 0; j < filterSize; j++) {
99 val += src[srcPos + j] * filter[filterSize * i + j];
101 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
102 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
106 // bilinear / bicubic scaling
107 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
108 const int16_t *filter, const int16_t *filterPos,
112 for (i=0; i<dstW; i++) {
114 int srcPos= filterPos[i];
116 for (j=0; j<filterSize; j++) {
117 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
119 //filter += hFilterSize;
120 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
125 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
126 const int16_t *filter, const int16_t *filterPos,
130 int32_t *dst = (int32_t *) _dst;
131 for (i=0; i<dstW; i++) {
133 int srcPos= filterPos[i];
135 for (j=0; j<filterSize; j++) {
136 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
138 //filter += hFilterSize;
139 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
144 //FIXME all pal and rgb srcFormats could do this convertion as well
145 //FIXME all scalers more complex than bilinear could do half of this transform
146 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
149 for (i = 0; i < width; i++) {
150 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
151 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
154 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
157 for (i = 0; i < width; i++) {
158 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
159 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
162 static void lumRangeToJpeg_c(int16_t *dst, int width)
165 for (i = 0; i < width; i++)
166 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
168 static void lumRangeFromJpeg_c(int16_t *dst, int width)
171 for (i = 0; i < width; i++)
172 dst[i] = (dst[i]*14071 + 33561947)>>14;
175 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
178 int32_t *dstU = (int32_t *) _dstU;
179 int32_t *dstV = (int32_t *) _dstV;
180 for (i = 0; i < width; i++) {
181 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
182 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
185 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
188 int32_t *dstU = (int32_t *) _dstU;
189 int32_t *dstV = (int32_t *) _dstV;
190 for (i = 0; i < width; i++) {
191 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
192 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
195 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
198 int32_t *dst = (int32_t *) _dst;
199 for (i = 0; i < width; i++)
200 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
202 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
205 int32_t *dst = (int32_t *) _dst;
206 for (i = 0; i < width; i++)
207 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
210 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
211 const uint8_t *src, int srcW, int xInc)
215 for (i=0;i<dstWidth;i++) {
216 register unsigned int xx=xpos>>16;
217 register unsigned int xalpha=(xpos&0xFFFF)>>9;
218 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
223 // *** horizontal scale Y line to temp buffer
224 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
225 const uint8_t *src_in[4], int srcW, int xInc,
226 const int16_t *hLumFilter,
227 const int16_t *hLumFilterPos, int hLumFilterSize,
228 uint8_t *formatConvBuffer,
229 uint32_t *pal, int isAlpha)
231 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
232 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
233 const uint8_t *src = src_in[isAlpha ? 3 : 0];
236 toYV12(formatConvBuffer, src, srcW, pal);
237 src= formatConvBuffer;
238 } else if (c->readLumPlanar && !isAlpha) {
239 c->readLumPlanar(formatConvBuffer, src_in, srcW);
240 src = formatConvBuffer;
243 if (!c->hyscale_fast) {
244 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
245 } else { // fast bilinear upscale / crap downscale
246 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
250 convertRange(dst, dstWidth);
253 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
254 int dstWidth, const uint8_t *src1,
255 const uint8_t *src2, int srcW, int xInc)
259 for (i=0;i<dstWidth;i++) {
260 register unsigned int xx=xpos>>16;
261 register unsigned int xalpha=(xpos&0xFFFF)>>9;
262 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
263 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
268 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
269 const uint8_t *src_in[4],
270 int srcW, int xInc, const int16_t *hChrFilter,
271 const int16_t *hChrFilterPos, int hChrFilterSize,
272 uint8_t *formatConvBuffer, uint32_t *pal)
274 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
276 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
277 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
278 src1= formatConvBuffer;
280 } else if (c->readChrPlanar) {
281 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
282 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
283 src1= formatConvBuffer;
287 if (!c->hcscale_fast) {
288 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
289 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
290 } else { // fast bilinear upscale / crap downscale
291 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
294 if (c->chrConvertRange)
295 c->chrConvertRange(dst1, dst2, dstWidth);
298 #define DEBUG_SWSCALE_BUFFERS 0
299 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
301 static int swScale(SwsContext *c, const uint8_t* src[],
302 int srcStride[], int srcSliceY,
303 int srcSliceH, uint8_t* dst[], int dstStride[])
305 /* load a few things into local vars to make the code more readable? and faster */
306 const int srcW= c->srcW;
307 const int dstW= c->dstW;
308 const int dstH= c->dstH;
309 const int chrDstW= c->chrDstW;
310 const int chrSrcW= c->chrSrcW;
311 const int lumXInc= c->lumXInc;
312 const int chrXInc= c->chrXInc;
313 const enum PixelFormat dstFormat= c->dstFormat;
314 const int flags= c->flags;
315 int16_t *vLumFilterPos= c->vLumFilterPos;
316 int16_t *vChrFilterPos= c->vChrFilterPos;
317 int16_t *hLumFilterPos= c->hLumFilterPos;
318 int16_t *hChrFilterPos= c->hChrFilterPos;
319 int16_t *vLumFilter= c->vLumFilter;
320 int16_t *vChrFilter= c->vChrFilter;
321 int16_t *hLumFilter= c->hLumFilter;
322 int16_t *hChrFilter= c->hChrFilter;
323 int32_t *lumMmxFilter= c->lumMmxFilter;
324 int32_t *chrMmxFilter= c->chrMmxFilter;
325 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
326 const int vLumFilterSize= c->vLumFilterSize;
327 const int vChrFilterSize= c->vChrFilterSize;
328 const int hLumFilterSize= c->hLumFilterSize;
329 const int hChrFilterSize= c->hChrFilterSize;
330 int16_t **lumPixBuf= c->lumPixBuf;
331 int16_t **chrUPixBuf= c->chrUPixBuf;
332 int16_t **chrVPixBuf= c->chrVPixBuf;
333 int16_t **alpPixBuf= c->alpPixBuf;
334 const int vLumBufSize= c->vLumBufSize;
335 const int vChrBufSize= c->vChrBufSize;
336 uint8_t *formatConvBuffer= c->formatConvBuffer;
337 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
338 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
340 uint32_t *pal=c->pal_yuv;
341 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
342 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
343 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
344 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
345 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
346 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
347 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
349 /* vars which will change and which we need to store back in the context */
351 int lumBufIndex= c->lumBufIndex;
352 int chrBufIndex= c->chrBufIndex;
353 int lastInLumBuf= c->lastInLumBuf;
354 int lastInChrBuf= c->lastInChrBuf;
356 if (isPacked(c->srcFormat)) {
364 srcStride[3]= srcStride[0];
366 srcStride[1]<<= c->vChrDrop;
367 srcStride[2]<<= c->vChrDrop;
369 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
370 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
371 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
372 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
373 srcSliceY, srcSliceH, dstY, dstH);
374 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
375 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
377 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
378 static int warnedAlready=0; //FIXME move this into the context perhaps
379 if (flags & SWS_PRINT_INFO && !warnedAlready) {
380 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
381 " ->cannot do aligned memory accesses anymore\n");
386 /* Note the user might start scaling the picture in the middle so this
387 will not get executed. This is not really intended but works
388 currently, so people might do it. */
397 if (!should_dither) {
398 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
402 for (;dstY < dstH; dstY++) {
403 const int chrDstY= dstY>>c->chrDstVSubSample;
405 dst[0] + dstStride[0] * dstY,
406 dst[1] + dstStride[1] * chrDstY,
407 dst[2] + dstStride[2] * chrDstY,
408 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
411 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
412 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
413 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
415 // Last line needed as input
416 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
417 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
418 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
421 //handle holes (FAST_BILINEAR & weird filters)
422 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
423 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
424 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
425 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
427 DEBUG_BUFFERS("dstY: %d\n", dstY);
428 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
429 firstLumSrcY, lastLumSrcY, lastInLumBuf);
430 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
431 firstChrSrcY, lastChrSrcY, lastInChrBuf);
433 // Do we have enough lines in this slice to output the dstY line
434 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
437 lastLumSrcY = srcSliceY + srcSliceH - 1;
438 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
439 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
440 lastLumSrcY, lastChrSrcY);
443 //Do horizontal scaling
444 while(lastInLumBuf < lastLumSrcY) {
445 const uint8_t *src1[4] = {
446 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
447 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
448 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
449 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
452 assert(lumBufIndex < 2*vLumBufSize);
453 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
454 assert(lastInLumBuf + 1 - srcSliceY >= 0);
455 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
456 hLumFilter, hLumFilterPos, hLumFilterSize,
459 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
460 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
461 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
465 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
466 lumBufIndex, lastInLumBuf);
468 while(lastInChrBuf < lastChrSrcY) {
469 const uint8_t *src1[4] = {
470 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
471 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
472 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
473 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
476 assert(chrBufIndex < 2*vChrBufSize);
477 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
478 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
479 //FIXME replace parameters through context struct (some at least)
481 if (c->needs_hcscale)
482 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
483 chrDstW, src1, chrSrcW, chrXInc,
484 hChrFilter, hChrFilterPos, hChrFilterSize,
485 formatConvBuffer, pal);
487 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
488 chrBufIndex, lastInChrBuf);
490 //wrap buf index around to stay inside the ring buffer
491 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
492 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
494 break; //we can't output a dstY line so let's try with the next slice
497 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
500 c->chrDither8 = dither_8x8_128[chrDstY & 7];
501 c->lumDither8 = dither_8x8_128[dstY & 7];
503 if (dstY >= dstH-2) {
504 // hmm looks like we can't use MMX here without overwriting this array's tail
505 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
506 &yuv2packed1, &yuv2packed2, &yuv2packedX);
510 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
511 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
512 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
513 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
515 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
516 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
517 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
518 for (i = 0; i < neg; i++)
519 tmpY[i] = lumSrcPtr[neg];
520 for ( ; i < end; i++)
521 tmpY[i] = lumSrcPtr[i];
522 for ( ; i < vLumFilterSize; i++)
527 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
528 for (i = 0; i < neg; i++)
529 tmpA[i] = alpSrcPtr[neg];
530 for ( ; i < end; i++)
531 tmpA[i] = alpSrcPtr[i];
532 for ( ; i < vLumFilterSize; i++)
533 tmpA[i] = tmpA[i - 1];
537 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
538 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
539 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
540 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
541 for (i = 0; i < neg; i++) {
542 tmpU[i] = chrUSrcPtr[neg];
543 tmpV[i] = chrVSrcPtr[neg];
545 for ( ; i < end; i++) {
546 tmpU[i] = chrUSrcPtr[i];
547 tmpV[i] = chrVSrcPtr[i];
549 for ( ; i < vChrFilterSize; i++) {
550 tmpU[i] = tmpU[i - 1];
551 tmpV[i] = tmpV[i - 1];
557 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
558 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
560 if (vLumFilterSize == 1) {
561 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
563 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
564 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
567 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
569 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
570 } else if (vChrFilterSize == 1) {
571 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
572 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
574 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
575 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
576 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
577 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
581 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
582 if (vLumFilterSize == 1) {
583 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
585 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
586 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
590 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
591 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
592 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
593 int chrAlpha = vChrFilter[2 * dstY + 1];
594 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
595 alpPixBuf ? *alpSrcPtr : NULL,
596 dest[0], dstW, chrAlpha, dstY);
597 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
598 int lumAlpha = vLumFilter[2 * dstY + 1];
599 int chrAlpha = vChrFilter[2 * dstY + 1];
601 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
603 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
604 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
605 alpPixBuf ? alpSrcPtr : NULL,
606 dest[0], dstW, lumAlpha, chrAlpha, dstY);
607 } else { //general RGB
608 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
609 lumSrcPtr, vLumFilterSize,
610 vChrFilter + dstY * vChrFilterSize,
611 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
612 alpSrcPtr, dest[0], dstW, dstY);
618 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
619 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
622 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
623 __asm__ volatile("sfence":::"memory");
627 /* store changed local vars back in the context */
629 c->lumBufIndex= lumBufIndex;
630 c->chrBufIndex= chrBufIndex;
631 c->lastInLumBuf= lastInLumBuf;
632 c->lastInChrBuf= lastInChrBuf;
634 return dstY - lastDstY;
637 static av_cold void sws_init_swScale_c(SwsContext *c)
639 enum PixelFormat srcFormat = c->srcFormat;
641 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
642 &c->yuv2nv12cX, &c->yuv2packed1,
643 &c->yuv2packed2, &c->yuv2packedX);
645 ff_sws_init_input_funcs(c);
647 if (c->srcBpc == 8) {
648 if (c->dstBpc <= 10) {
649 c->hyScale = c->hcScale = hScale8To15_c;
650 if (c->flags & SWS_FAST_BILINEAR) {
651 c->hyscale_fast = hyscale_fast_c;
652 c->hcscale_fast = hcscale_fast_c;
655 c->hyScale = c->hcScale = hScale8To19_c;
658 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
661 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
662 if (c->dstBpc <= 10) {
664 c->lumConvertRange = lumRangeFromJpeg_c;
665 c->chrConvertRange = chrRangeFromJpeg_c;
667 c->lumConvertRange = lumRangeToJpeg_c;
668 c->chrConvertRange = chrRangeToJpeg_c;
672 c->lumConvertRange = lumRangeFromJpeg16_c;
673 c->chrConvertRange = chrRangeFromJpeg16_c;
675 c->lumConvertRange = lumRangeToJpeg16_c;
676 c->chrConvertRange = chrRangeToJpeg16_c;
681 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
682 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
683 c->needs_hcscale = 1;
686 SwsFunc ff_getSwsFunc(SwsContext *c)
688 sws_init_swScale_c(c);
691 ff_sws_init_swScale_mmx(c);
693 ff_sws_init_swScale_altivec(c);