2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/avassert.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/cpu.h"
33 #include "libavutil/avutil.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/bswap.h"
36 #include "libavutil/pixdesc.h"
38 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
39 { 36, 68, 60, 92, 34, 66, 58, 90,},
40 { 100, 4,124, 28, 98, 2,122, 26,},
41 { 52, 84, 44, 76, 50, 82, 42, 74,},
42 { 116, 20,108, 12,114, 18,106, 10,},
43 { 32, 64, 56, 88, 38, 70, 62, 94,},
44 { 96, 0,120, 24,102, 6,126, 30,},
45 { 48, 80, 40, 72, 54, 86, 46, 78,},
46 { 112, 16,104, 8,118, 22,110, 14,},
48 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
49 { 64, 64, 64, 64, 64, 64, 64, 64 };
52 static av_always_inline void fillPlane(uint8_t* plane, int stride,
53 int width, int height,
57 uint8_t *ptr = plane + stride*y;
58 for (i=0; i<height; i++) {
59 memset(ptr, val, width);
64 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
65 const int16_t *filter,
66 const int16_t *filterPos, int filterSize)
69 int32_t *dst = (int32_t *) _dst;
70 const uint16_t *src = (const uint16_t *) _src;
71 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
74 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
77 for (i = 0; i < dstW; i++) {
79 int srcPos = filterPos[i];
82 for (j = 0; j < filterSize; j++) {
83 val += src[srcPos + j] * filter[filterSize * i + j];
85 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
86 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
90 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
91 const int16_t *filter,
92 const int16_t *filterPos, int filterSize)
95 const uint16_t *src = (const uint16_t *) _src;
96 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
99 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
101 for (i = 0; i < dstW; i++) {
103 int srcPos = filterPos[i];
106 for (j = 0; j < filterSize; j++) {
107 val += src[srcPos + j] * filter[filterSize * i + j];
109 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
110 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
114 // bilinear / bicubic scaling
115 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
116 const int16_t *filter, const int16_t *filterPos,
120 for (i=0; i<dstW; i++) {
122 int srcPos= filterPos[i];
124 for (j=0; j<filterSize; j++) {
125 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
127 //filter += hFilterSize;
128 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
133 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
134 const int16_t *filter, const int16_t *filterPos,
138 int32_t *dst = (int32_t *) _dst;
139 for (i=0; i<dstW; i++) {
141 int srcPos= filterPos[i];
143 for (j=0; j<filterSize; j++) {
144 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
146 //filter += hFilterSize;
147 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
152 //FIXME all pal and rgb srcFormats could do this convertion as well
153 //FIXME all scalers more complex than bilinear could do half of this transform
154 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
157 for (i = 0; i < width; i++) {
158 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
159 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
162 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
165 for (i = 0; i < width; i++) {
166 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
167 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
170 static void lumRangeToJpeg_c(int16_t *dst, int width)
173 for (i = 0; i < width; i++)
174 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
176 static void lumRangeFromJpeg_c(int16_t *dst, int width)
179 for (i = 0; i < width; i++)
180 dst[i] = (dst[i]*14071 + 33561947)>>14;
183 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
186 int32_t *dstU = (int32_t *) _dstU;
187 int32_t *dstV = (int32_t *) _dstV;
188 for (i = 0; i < width; i++) {
189 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
190 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
193 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
196 int32_t *dstU = (int32_t *) _dstU;
197 int32_t *dstV = (int32_t *) _dstV;
198 for (i = 0; i < width; i++) {
199 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
200 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
203 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
206 int32_t *dst = (int32_t *) _dst;
207 for (i = 0; i < width; i++)
208 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
210 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
213 int32_t *dst = (int32_t *) _dst;
214 for (i = 0; i < width; i++)
215 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
218 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
219 const uint8_t *src, int srcW, int xInc)
223 for (i=0;i<dstWidth;i++) {
224 register unsigned int xx=xpos>>16;
225 register unsigned int xalpha=(xpos&0xFFFF)>>9;
226 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
229 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
230 dst[i] = src[srcW-1]*128;
233 // *** horizontal scale Y line to temp buffer
234 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
235 const uint8_t *src_in[4], int srcW, int xInc,
236 const int16_t *hLumFilter,
237 const int16_t *hLumFilterPos, int hLumFilterSize,
238 uint8_t *formatConvBuffer,
239 uint32_t *pal, int isAlpha)
241 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
242 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
243 const uint8_t *src = src_in[isAlpha ? 3 : 0];
246 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
247 src= formatConvBuffer;
248 } else if (c->readLumPlanar && !isAlpha) {
249 c->readLumPlanar(formatConvBuffer, src_in, srcW);
250 src = formatConvBuffer;
253 if (!c->hyscale_fast) {
254 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
255 } else { // fast bilinear upscale / crap downscale
256 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
260 convertRange(dst, dstWidth);
263 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
264 int dstWidth, const uint8_t *src1,
265 const uint8_t *src2, int srcW, int xInc)
269 for (i=0;i<dstWidth;i++) {
270 register unsigned int xx=xpos>>16;
271 register unsigned int xalpha=(xpos&0xFFFF)>>9;
272 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
273 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
276 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
277 dst1[i] = src1[srcW-1]*128;
278 dst2[i] = src2[srcW-1]*128;
282 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
283 const uint8_t *src_in[4],
284 int srcW, int xInc, const int16_t *hChrFilter,
285 const int16_t *hChrFilterPos, int hChrFilterSize,
286 uint8_t *formatConvBuffer, uint32_t *pal)
288 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
290 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
291 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
292 src1= formatConvBuffer;
294 } else if (c->readChrPlanar) {
295 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
296 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
297 src1= formatConvBuffer;
301 if (!c->hcscale_fast) {
302 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
303 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
304 } else { // fast bilinear upscale / crap downscale
305 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
308 if (c->chrConvertRange)
309 c->chrConvertRange(dst1, dst2, dstWidth);
312 #define DEBUG_SWSCALE_BUFFERS 0
313 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
315 static int swScale(SwsContext *c, const uint8_t* src[],
316 int srcStride[], int srcSliceY,
317 int srcSliceH, uint8_t* dst[], int dstStride[])
319 /* load a few things into local vars to make the code more readable? and faster */
320 const int srcW= c->srcW;
321 const int dstW= c->dstW;
322 const int dstH= c->dstH;
323 const int chrDstW= c->chrDstW;
324 const int chrSrcW= c->chrSrcW;
325 const int lumXInc= c->lumXInc;
326 const int chrXInc= c->chrXInc;
327 const enum PixelFormat dstFormat= c->dstFormat;
328 const int flags= c->flags;
329 int16_t *vLumFilterPos= c->vLumFilterPos;
330 int16_t *vChrFilterPos= c->vChrFilterPos;
331 int16_t *hLumFilterPos= c->hLumFilterPos;
332 int16_t *hChrFilterPos= c->hChrFilterPos;
333 int16_t *hLumFilter= c->hLumFilter;
334 int16_t *hChrFilter= c->hChrFilter;
335 int32_t *lumMmxFilter= c->lumMmxFilter;
336 int32_t *chrMmxFilter= c->chrMmxFilter;
337 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
338 const int vLumFilterSize= c->vLumFilterSize;
339 const int vChrFilterSize= c->vChrFilterSize;
340 const int hLumFilterSize= c->hLumFilterSize;
341 const int hChrFilterSize= c->hChrFilterSize;
342 int16_t **lumPixBuf= c->lumPixBuf;
343 int16_t **chrUPixBuf= c->chrUPixBuf;
344 int16_t **chrVPixBuf= c->chrVPixBuf;
345 int16_t **alpPixBuf= c->alpPixBuf;
346 const int vLumBufSize= c->vLumBufSize;
347 const int vChrBufSize= c->vChrBufSize;
348 uint8_t *formatConvBuffer= c->formatConvBuffer;
349 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
350 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
352 uint32_t *pal=c->pal_yuv;
353 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
355 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
356 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
357 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
358 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
359 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
360 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
362 /* vars which will change and which we need to store back in the context */
364 int lumBufIndex= c->lumBufIndex;
365 int chrBufIndex= c->chrBufIndex;
366 int lastInLumBuf= c->lastInLumBuf;
367 int lastInChrBuf= c->lastInChrBuf;
369 if (isPacked(c->srcFormat)) {
377 srcStride[3]= srcStride[0];
379 srcStride[1]<<= c->vChrDrop;
380 srcStride[2]<<= c->vChrDrop;
382 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
383 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
384 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
385 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
386 srcSliceY, srcSliceH, dstY, dstH);
387 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
388 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
390 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
391 static int warnedAlready=0; //FIXME move this into the context perhaps
392 if (flags & SWS_PRINT_INFO && !warnedAlready) {
393 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
394 " ->cannot do aligned memory accesses anymore\n");
399 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
400 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
401 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
403 static int warnedAlready=0;
404 int cpu_flags = av_get_cpu_flags();
405 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
406 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
411 /* Note the user might start scaling the picture in the middle so this
412 will not get executed. This is not really intended but works
413 currently, so people might do it. */
422 if (!should_dither) {
423 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
427 for (;dstY < dstH; dstY++) {
428 const int chrDstY= dstY>>c->chrDstVSubSample;
430 dst[0] + dstStride[0] * dstY,
431 dst[1] + dstStride[1] * chrDstY,
432 dst[2] + dstStride[2] * chrDstY,
433 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
435 int use_mmx_vfilter= c->use_mmx_vfilter;
437 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
438 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
439 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
440 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
441 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
442 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
445 //handle holes (FAST_BILINEAR & weird filters)
446 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
447 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
448 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
449 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
451 DEBUG_BUFFERS("dstY: %d\n", dstY);
452 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
453 firstLumSrcY, lastLumSrcY, lastInLumBuf);
454 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
455 firstChrSrcY, lastChrSrcY, lastInChrBuf);
457 // Do we have enough lines in this slice to output the dstY line
458 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
461 lastLumSrcY = srcSliceY + srcSliceH - 1;
462 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
463 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
464 lastLumSrcY, lastChrSrcY);
467 //Do horizontal scaling
468 while(lastInLumBuf < lastLumSrcY) {
469 const uint8_t *src1[4] = {
470 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
471 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
472 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
473 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
476 assert(lumBufIndex < 2*vLumBufSize);
477 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
478 assert(lastInLumBuf + 1 - srcSliceY >= 0);
479 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
480 hLumFilter, hLumFilterPos, hLumFilterSize,
483 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
484 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
485 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
489 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
490 lumBufIndex, lastInLumBuf);
492 while(lastInChrBuf < lastChrSrcY) {
493 const uint8_t *src1[4] = {
494 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
495 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
496 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
497 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
500 assert(chrBufIndex < 2*vChrBufSize);
501 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
502 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
503 //FIXME replace parameters through context struct (some at least)
505 if (c->needs_hcscale)
506 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
507 chrDstW, src1, chrSrcW, chrXInc,
508 hChrFilter, hChrFilterPos, hChrFilterSize,
509 formatConvBuffer, pal);
511 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
512 chrBufIndex, lastInChrBuf);
514 //wrap buf index around to stay inside the ring buffer
515 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
516 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
518 break; //we can't output a dstY line so let's try with the next slice
521 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
524 c->chrDither8 = dither_8x8_128[chrDstY & 7];
525 c->lumDither8 = dither_8x8_128[dstY & 7];
527 if (dstY >= dstH-2) {
528 // hmm looks like we can't use MMX here without overwriting this array's tail
529 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
530 &yuv2packed1, &yuv2packed2, &yuv2packedX);
535 const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
536 const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
537 const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
538 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
539 int16_t *vLumFilter= c->vLumFilter;
540 int16_t *vChrFilter= c->vChrFilter;
542 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
543 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
545 vLumFilter += dstY * vLumFilterSize;
546 vChrFilter += chrDstY * vChrFilterSize;
548 // av_assert0(use_mmx_vfilter != (
549 // yuv2planeX == yuv2planeX_10BE_c
550 // || yuv2planeX == yuv2planeX_10LE_c
551 // || yuv2planeX == yuv2planeX_9BE_c
552 // || yuv2planeX == yuv2planeX_9LE_c
553 // || yuv2planeX == yuv2planeX_16BE_c
554 // || yuv2planeX == yuv2planeX_16LE_c
555 // || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
558 vLumFilter= c->lumMmxFilter;
559 vChrFilter= c->chrMmxFilter;
562 if (vLumFilterSize == 1) {
563 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
565 yuv2planeX(vLumFilter, vLumFilterSize,
566 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
569 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
571 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
572 } else if (vChrFilterSize == 1) {
573 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
574 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
576 yuv2planeX(vChrFilter, vChrFilterSize,
577 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
578 yuv2planeX(vChrFilter, vChrFilterSize,
579 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
583 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
585 vLumFilter= c->alpMmxFilter;
587 if (vLumFilterSize == 1) {
588 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
590 yuv2planeX(vLumFilter, vLumFilterSize,
591 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
595 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
596 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
597 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
598 int chrAlpha = vChrFilter[2 * dstY + 1];
599 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
600 alpPixBuf ? *alpSrcPtr : NULL,
601 dest[0], dstW, chrAlpha, dstY);
602 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
603 int lumAlpha = vLumFilter[2 * dstY + 1];
604 int chrAlpha = vChrFilter[2 * dstY + 1];
606 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
608 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
609 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
610 alpPixBuf ? alpSrcPtr : NULL,
611 dest[0], dstW, lumAlpha, chrAlpha, dstY);
612 } else { //general RGB
613 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
614 lumSrcPtr, vLumFilterSize,
615 vChrFilter + dstY * vChrFilterSize,
616 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
617 alpSrcPtr, dest[0], dstW, dstY);
623 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
624 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
627 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
628 __asm__ volatile("sfence":::"memory");
632 /* store changed local vars back in the context */
634 c->lumBufIndex= lumBufIndex;
635 c->chrBufIndex= chrBufIndex;
636 c->lastInLumBuf= lastInLumBuf;
637 c->lastInChrBuf= lastInChrBuf;
639 return dstY - lastDstY;
642 static av_cold void sws_init_swScale_c(SwsContext *c)
644 enum PixelFormat srcFormat = c->srcFormat;
646 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
647 &c->yuv2nv12cX, &c->yuv2packed1,
648 &c->yuv2packed2, &c->yuv2packedX);
650 ff_sws_init_input_funcs(c);
653 if (c->srcBpc == 8) {
654 if (c->dstBpc <= 10) {
655 c->hyScale = c->hcScale = hScale8To15_c;
656 if (c->flags & SWS_FAST_BILINEAR) {
657 c->hyscale_fast = hyscale_fast_c;
658 c->hcscale_fast = hcscale_fast_c;
661 c->hyScale = c->hcScale = hScale8To19_c;
664 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
667 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
668 if (c->dstBpc <= 10) {
670 c->lumConvertRange = lumRangeFromJpeg_c;
671 c->chrConvertRange = chrRangeFromJpeg_c;
673 c->lumConvertRange = lumRangeToJpeg_c;
674 c->chrConvertRange = chrRangeToJpeg_c;
678 c->lumConvertRange = lumRangeFromJpeg16_c;
679 c->chrConvertRange = chrRangeFromJpeg16_c;
681 c->lumConvertRange = lumRangeToJpeg16_c;
682 c->chrConvertRange = chrRangeToJpeg16_c;
687 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
688 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
689 c->needs_hcscale = 1;
692 SwsFunc ff_getSwsFunc(SwsContext *c)
694 sws_init_swScale_c(c);
697 ff_sws_init_swScale_mmx(c);
699 ff_sws_init_swScale_altivec(c);