2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/avassert.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/cpu.h"
33 #include "libavutil/avutil.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/bswap.h"
36 #include "libavutil/pixdesc.h"
38 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
39 { 36, 68, 60, 92, 34, 66, 58, 90,},
40 { 100, 4,124, 28, 98, 2,122, 26,},
41 { 52, 84, 44, 76, 50, 82, 42, 74,},
42 { 116, 20,108, 12,114, 18,106, 10,},
43 { 32, 64, 56, 88, 38, 70, 62, 94,},
44 { 96, 0,120, 24,102, 6,126, 30,},
45 { 48, 80, 40, 72, 54, 86, 46, 78,},
46 { 112, 16,104, 8,118, 22,110, 14,},
48 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
49 { 64, 64, 64, 64, 64, 64, 64, 64 };
52 static av_always_inline void fillPlane(uint8_t* plane, int stride,
53 int width, int height,
57 uint8_t *ptr = plane + stride*y;
58 for (i=0; i<height; i++) {
59 memset(ptr, val, width);
64 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
65 const int16_t *filter,
66 const int32_t *filterPos, int filterSize)
69 int32_t *dst = (int32_t *) _dst;
70 const uint16_t *src = (const uint16_t *) _src;
71 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
74 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
77 for (i = 0; i < dstW; i++) {
79 int srcPos = filterPos[i];
82 for (j = 0; j < filterSize; j++) {
83 val += src[srcPos + j] * filter[filterSize * i + j];
85 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
86 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
90 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
91 const int16_t *filter,
92 const int32_t *filterPos, int filterSize)
95 const uint16_t *src = (const uint16_t *) _src;
96 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
99 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
101 for (i = 0; i < dstW; i++) {
103 int srcPos = filterPos[i];
106 for (j = 0; j < filterSize; j++) {
107 val += src[srcPos + j] * filter[filterSize * i + j];
109 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
110 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
114 // bilinear / bicubic scaling
115 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
116 const int16_t *filter, const int32_t *filterPos,
120 for (i=0; i<dstW; i++) {
122 int srcPos= filterPos[i];
124 for (j=0; j<filterSize; j++) {
125 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
127 //filter += hFilterSize;
128 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
133 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
134 const int16_t *filter, const int32_t *filterPos,
138 int32_t *dst = (int32_t *) _dst;
139 for (i=0; i<dstW; i++) {
141 int srcPos= filterPos[i];
143 for (j=0; j<filterSize; j++) {
144 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
146 //filter += hFilterSize;
147 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
152 //FIXME all pal and rgb srcFormats could do this convertion as well
153 //FIXME all scalers more complex than bilinear could do half of this transform
154 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
157 for (i = 0; i < width; i++) {
158 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
159 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
162 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
165 for (i = 0; i < width; i++) {
166 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
167 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
170 static void lumRangeToJpeg_c(int16_t *dst, int width)
173 for (i = 0; i < width; i++)
174 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
176 static void lumRangeFromJpeg_c(int16_t *dst, int width)
179 for (i = 0; i < width; i++)
180 dst[i] = (dst[i]*14071 + 33561947)>>14;
183 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
186 int32_t *dstU = (int32_t *) _dstU;
187 int32_t *dstV = (int32_t *) _dstV;
188 for (i = 0; i < width; i++) {
189 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
190 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
193 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
196 int32_t *dstU = (int32_t *) _dstU;
197 int32_t *dstV = (int32_t *) _dstV;
198 for (i = 0; i < width; i++) {
199 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
200 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
203 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
206 int32_t *dst = (int32_t *) _dst;
207 for (i = 0; i < width; i++)
208 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
210 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
213 int32_t *dst = (int32_t *) _dst;
214 for (i = 0; i < width; i++)
215 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
218 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
219 const uint8_t *src, int srcW, int xInc)
223 for (i=0;i<dstWidth;i++) {
224 register unsigned int xx=xpos>>16;
225 register unsigned int xalpha=(xpos&0xFFFF)>>9;
226 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
229 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
230 dst[i] = src[srcW-1]*128;
233 // *** horizontal scale Y line to temp buffer
234 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
235 const uint8_t *src_in[4], int srcW, int xInc,
236 const int16_t *hLumFilter,
237 const int32_t *hLumFilterPos, int hLumFilterSize,
238 uint8_t *formatConvBuffer,
239 uint32_t *pal, int isAlpha)
241 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
242 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
243 const uint8_t *src = src_in[isAlpha ? 3 : 0];
246 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
247 src= formatConvBuffer;
248 } else if (c->readLumPlanar && !isAlpha) {
249 c->readLumPlanar(formatConvBuffer, src_in, srcW);
250 src = formatConvBuffer;
253 if (!c->hyscale_fast) {
254 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
255 } else { // fast bilinear upscale / crap downscale
256 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
260 convertRange(dst, dstWidth);
263 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
264 int dstWidth, const uint8_t *src1,
265 const uint8_t *src2, int srcW, int xInc)
269 for (i=0;i<dstWidth;i++) {
270 register unsigned int xx=xpos>>16;
271 register unsigned int xalpha=(xpos&0xFFFF)>>9;
272 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
273 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
276 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
277 dst1[i] = src1[srcW-1]*128;
278 dst2[i] = src2[srcW-1]*128;
282 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
283 const uint8_t *src_in[4],
284 int srcW, int xInc, const int16_t *hChrFilter,
285 const int32_t *hChrFilterPos, int hChrFilterSize,
286 uint8_t *formatConvBuffer, uint32_t *pal)
288 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
290 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
291 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
292 src1= formatConvBuffer;
294 } else if (c->readChrPlanar) {
295 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
296 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
297 src1= formatConvBuffer;
301 if (!c->hcscale_fast) {
302 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
303 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
304 } else { // fast bilinear upscale / crap downscale
305 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
308 if (c->chrConvertRange)
309 c->chrConvertRange(dst1, dst2, dstWidth);
312 #define DEBUG_SWSCALE_BUFFERS 0
313 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
315 static int swScale(SwsContext *c, const uint8_t* src[],
316 int srcStride[], int srcSliceY,
317 int srcSliceH, uint8_t* dst[], int dstStride[])
319 /* load a few things into local vars to make the code more readable? and faster */
320 const int srcW= c->srcW;
321 const int dstW= c->dstW;
322 const int dstH= c->dstH;
323 const int chrDstW= c->chrDstW;
324 const int chrSrcW= c->chrSrcW;
325 const int lumXInc= c->lumXInc;
326 const int chrXInc= c->chrXInc;
327 const enum PixelFormat dstFormat= c->dstFormat;
328 const int flags= c->flags;
329 int32_t *vLumFilterPos= c->vLumFilterPos;
330 int32_t *vChrFilterPos= c->vChrFilterPos;
331 int32_t *hLumFilterPos= c->hLumFilterPos;
332 int32_t *hChrFilterPos= c->hChrFilterPos;
333 int16_t *hLumFilter= c->hLumFilter;
334 int16_t *hChrFilter= c->hChrFilter;
335 int32_t *lumMmxFilter= c->lumMmxFilter;
336 int32_t *chrMmxFilter= c->chrMmxFilter;
337 const int vLumFilterSize= c->vLumFilterSize;
338 const int vChrFilterSize= c->vChrFilterSize;
339 const int hLumFilterSize= c->hLumFilterSize;
340 const int hChrFilterSize= c->hChrFilterSize;
341 int16_t **lumPixBuf= c->lumPixBuf;
342 int16_t **chrUPixBuf= c->chrUPixBuf;
343 int16_t **chrVPixBuf= c->chrVPixBuf;
344 int16_t **alpPixBuf= c->alpPixBuf;
345 const int vLumBufSize= c->vLumBufSize;
346 const int vChrBufSize= c->vChrBufSize;
347 uint8_t *formatConvBuffer= c->formatConvBuffer;
348 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
349 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
351 uint32_t *pal=c->pal_yuv;
352 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
354 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
355 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
356 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
357 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
358 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
359 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
361 /* vars which will change and which we need to store back in the context */
363 int lumBufIndex= c->lumBufIndex;
364 int chrBufIndex= c->chrBufIndex;
365 int lastInLumBuf= c->lastInLumBuf;
366 int lastInChrBuf= c->lastInChrBuf;
368 if (isPacked(c->srcFormat)) {
376 srcStride[3]= srcStride[0];
378 srcStride[1]<<= c->vChrDrop;
379 srcStride[2]<<= c->vChrDrop;
381 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
382 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
383 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
384 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
385 srcSliceY, srcSliceH, dstY, dstH);
386 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
387 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
389 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
390 static int warnedAlready=0; //FIXME move this into the context perhaps
391 if (flags & SWS_PRINT_INFO && !warnedAlready) {
392 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
393 " ->cannot do aligned memory accesses anymore\n");
398 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
399 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
400 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
402 static int warnedAlready=0;
403 int cpu_flags = av_get_cpu_flags();
404 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
405 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
410 /* Note the user might start scaling the picture in the middle so this
411 will not get executed. This is not really intended but works
412 currently, so people might do it. */
421 if (!should_dither) {
422 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
426 for (;dstY < dstH; dstY++) {
427 const int chrDstY= dstY>>c->chrDstVSubSample;
429 dst[0] + dstStride[0] * dstY,
430 dst[1] + dstStride[1] * chrDstY,
431 dst[2] + dstStride[2] * chrDstY,
432 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
434 int use_mmx_vfilter= c->use_mmx_vfilter;
436 const int firstLumSrcY= FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); //First line needed as input
437 const int firstLumSrcY2= FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]);
438 const int firstChrSrcY= FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]); //First line needed as input
440 // Last line needed as input
441 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
442 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
443 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
446 //handle holes (FAST_BILINEAR & weird filters)
447 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
448 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
449 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
450 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
452 DEBUG_BUFFERS("dstY: %d\n", dstY);
453 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
454 firstLumSrcY, lastLumSrcY, lastInLumBuf);
455 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
456 firstChrSrcY, lastChrSrcY, lastInChrBuf);
458 // Do we have enough lines in this slice to output the dstY line
459 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
462 lastLumSrcY = srcSliceY + srcSliceH - 1;
463 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
464 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
465 lastLumSrcY, lastChrSrcY);
468 //Do horizontal scaling
469 while(lastInLumBuf < lastLumSrcY) {
470 const uint8_t *src1[4] = {
471 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
472 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
473 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
474 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
477 assert(lumBufIndex < 2*vLumBufSize);
478 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
479 assert(lastInLumBuf + 1 - srcSliceY >= 0);
480 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
481 hLumFilter, hLumFilterPos, hLumFilterSize,
484 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
485 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
486 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
490 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
491 lumBufIndex, lastInLumBuf);
493 while(lastInChrBuf < lastChrSrcY) {
494 const uint8_t *src1[4] = {
495 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
496 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
497 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
498 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
501 assert(chrBufIndex < 2*vChrBufSize);
502 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
503 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
504 //FIXME replace parameters through context struct (some at least)
506 if (c->needs_hcscale)
507 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
508 chrDstW, src1, chrSrcW, chrXInc,
509 hChrFilter, hChrFilterPos, hChrFilterSize,
510 formatConvBuffer, pal);
512 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
513 chrBufIndex, lastInChrBuf);
515 //wrap buf index around to stay inside the ring buffer
516 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
517 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
519 break; //we can't output a dstY line so let's try with the next slice
522 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
525 c->chrDither8 = dither_8x8_128[chrDstY & 7];
526 c->lumDither8 = dither_8x8_128[dstY & 7];
528 if (dstY >= dstH-2) {
529 // hmm looks like we can't use MMX here without overwriting this array's tail
530 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
531 &yuv2packed1, &yuv2packed2, &yuv2packedX);
536 const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
537 const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
538 const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
539 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
540 int16_t *vLumFilter= c->vLumFilter;
541 int16_t *vChrFilter= c->vChrFilter;
543 if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
544 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
546 vLumFilter += dstY * vLumFilterSize;
547 vChrFilter += chrDstY * vChrFilterSize;
549 // av_assert0(use_mmx_vfilter != (
550 // yuv2planeX == yuv2planeX_10BE_c
551 // || yuv2planeX == yuv2planeX_10LE_c
552 // || yuv2planeX == yuv2planeX_9BE_c
553 // || yuv2planeX == yuv2planeX_9LE_c
554 // || yuv2planeX == yuv2planeX_16BE_c
555 // || yuv2planeX == yuv2planeX_16LE_c
556 // || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
559 vLumFilter= c->lumMmxFilter;
560 vChrFilter= c->chrMmxFilter;
563 if (vLumFilterSize == 1) {
564 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
566 yuv2planeX(vLumFilter, vLumFilterSize,
567 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
570 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
572 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
573 } else if (vChrFilterSize == 1) {
574 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
575 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
577 yuv2planeX(vChrFilter, vChrFilterSize,
578 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
579 yuv2planeX(vChrFilter, vChrFilterSize,
580 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
584 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
586 vLumFilter= c->alpMmxFilter;
588 if (vLumFilterSize == 1) {
589 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
591 yuv2planeX(vLumFilter, vLumFilterSize,
592 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
596 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
597 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
598 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <= 2) { //unscaled RGB
599 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
600 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
601 alpPixBuf ? *alpSrcPtr : NULL,
602 dest[0], dstW, chrAlpha, dstY);
603 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
604 int lumAlpha = vLumFilter[2 * dstY + 1];
605 int chrAlpha = vChrFilter[2 * dstY + 1];
607 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
609 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
610 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
611 alpPixBuf ? alpSrcPtr : NULL,
612 dest[0], dstW, lumAlpha, chrAlpha, dstY);
613 } else { //general RGB
614 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
615 lumSrcPtr, vLumFilterSize,
616 vChrFilter + dstY * vChrFilterSize,
617 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
618 alpSrcPtr, dest[0], dstW, dstY);
624 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
625 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
628 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
629 __asm__ volatile("sfence":::"memory");
633 /* store changed local vars back in the context */
635 c->lumBufIndex= lumBufIndex;
636 c->chrBufIndex= chrBufIndex;
637 c->lastInLumBuf= lastInLumBuf;
638 c->lastInChrBuf= lastInChrBuf;
640 return dstY - lastDstY;
643 static av_cold void sws_init_swScale_c(SwsContext *c)
645 enum PixelFormat srcFormat = c->srcFormat;
647 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
648 &c->yuv2nv12cX, &c->yuv2packed1,
649 &c->yuv2packed2, &c->yuv2packedX);
651 ff_sws_init_input_funcs(c);
654 if (c->srcBpc == 8) {
655 if (c->dstBpc <= 10) {
656 c->hyScale = c->hcScale = hScale8To15_c;
657 if (c->flags & SWS_FAST_BILINEAR) {
658 c->hyscale_fast = hyscale_fast_c;
659 c->hcscale_fast = hcscale_fast_c;
662 c->hyScale = c->hcScale = hScale8To19_c;
665 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
668 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
669 if (c->dstBpc <= 10) {
671 c->lumConvertRange = lumRangeFromJpeg_c;
672 c->chrConvertRange = chrRangeFromJpeg_c;
674 c->lumConvertRange = lumRangeToJpeg_c;
675 c->chrConvertRange = chrRangeToJpeg_c;
679 c->lumConvertRange = lumRangeFromJpeg16_c;
680 c->chrConvertRange = chrRangeFromJpeg16_c;
682 c->lumConvertRange = lumRangeToJpeg16_c;
683 c->chrConvertRange = chrRangeToJpeg16_c;
688 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
689 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
690 c->needs_hcscale = 1;
693 SwsFunc ff_getSwsFunc(SwsContext *c)
695 sws_init_swScale_c(c);
698 ff_sws_init_swScale_mmx(c);
700 ff_sws_init_swScale_altivec(c);