2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "swscale_internal.h"
30 #include "libavutil/avassert.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/cpu.h"
33 #include "libavutil/avutil.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/bswap.h"
36 #include "libavutil/pixdesc.h"
38 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
39 { 36, 68, 60, 92, 34, 66, 58, 90,},
40 { 100, 4,124, 28, 98, 2,122, 26,},
41 { 52, 84, 44, 76, 50, 82, 42, 74,},
42 { 116, 20,108, 12,114, 18,106, 10,},
43 { 32, 64, 56, 88, 38, 70, 62, 94,},
44 { 96, 0,120, 24,102, 6,126, 30,},
45 { 48, 80, 40, 72, 54, 86, 46, 78,},
46 { 112, 16,104, 8,118, 22,110, 14,},
48 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
49 { 64, 64, 64, 64, 64, 64, 64, 64 };
52 static av_always_inline void fillPlane(uint8_t* plane, int stride,
53 int width, int height,
57 uint8_t *ptr = plane + stride*y;
58 for (i=0; i<height; i++) {
59 memset(ptr, val, width);
64 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
65 const int16_t *filter,
66 const int16_t *filterPos, int filterSize)
69 int32_t *dst = (int32_t *) _dst;
70 const uint16_t *src = (const uint16_t *) _src;
71 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
74 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
77 for (i = 0; i < dstW; i++) {
79 int srcPos = filterPos[i];
82 for (j = 0; j < filterSize; j++) {
83 val += src[srcPos + j] * filter[filterSize * i + j];
85 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
86 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
90 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
91 const int16_t *filter,
92 const int16_t *filterPos, int filterSize)
95 const uint16_t *src = (const uint16_t *) _src;
96 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
99 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
101 for (i = 0; i < dstW; i++) {
103 int srcPos = filterPos[i];
106 for (j = 0; j < filterSize; j++) {
107 val += src[srcPos + j] * filter[filterSize * i + j];
109 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
110 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
114 // bilinear / bicubic scaling
115 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
116 const int16_t *filter, const int16_t *filterPos,
120 for (i=0; i<dstW; i++) {
122 int srcPos= filterPos[i];
124 for (j=0; j<filterSize; j++) {
125 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
127 //filter += hFilterSize;
128 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
133 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
134 const int16_t *filter, const int16_t *filterPos,
138 int32_t *dst = (int32_t *) _dst;
139 for (i=0; i<dstW; i++) {
141 int srcPos= filterPos[i];
143 for (j=0; j<filterSize; j++) {
144 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
146 //filter += hFilterSize;
147 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
152 //FIXME all pal and rgb srcFormats could do this convertion as well
153 //FIXME all scalers more complex than bilinear could do half of this transform
154 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
157 for (i = 0; i < width; i++) {
158 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
159 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
162 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
165 for (i = 0; i < width; i++) {
166 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
167 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
170 static void lumRangeToJpeg_c(int16_t *dst, int width)
173 for (i = 0; i < width; i++)
174 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
176 static void lumRangeFromJpeg_c(int16_t *dst, int width)
179 for (i = 0; i < width; i++)
180 dst[i] = (dst[i]*14071 + 33561947)>>14;
183 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
186 int32_t *dstU = (int32_t *) _dstU;
187 int32_t *dstV = (int32_t *) _dstV;
188 for (i = 0; i < width; i++) {
189 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
190 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
193 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
196 int32_t *dstU = (int32_t *) _dstU;
197 int32_t *dstV = (int32_t *) _dstV;
198 for (i = 0; i < width; i++) {
199 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
200 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
203 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
206 int32_t *dst = (int32_t *) _dst;
207 for (i = 0; i < width; i++)
208 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
210 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
213 int32_t *dst = (int32_t *) _dst;
214 for (i = 0; i < width; i++)
215 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
218 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
219 const uint8_t *src, int srcW, int xInc)
223 for (i=0;i<dstWidth;i++) {
224 register unsigned int xx=xpos>>16;
225 register unsigned int xalpha=(xpos&0xFFFF)>>9;
226 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
229 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
230 dst[i] = src[srcW-1]*128;
233 // *** horizontal scale Y line to temp buffer
234 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
235 const uint8_t *src_in[4], int srcW, int xInc,
236 const int16_t *hLumFilter,
237 const int16_t *hLumFilterPos, int hLumFilterSize,
238 uint8_t *formatConvBuffer,
239 uint32_t *pal, int isAlpha)
241 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
242 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
243 const uint8_t *src = src_in[isAlpha ? 3 : 0];
246 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
247 src= formatConvBuffer;
248 } else if (c->readLumPlanar && !isAlpha) {
249 c->readLumPlanar(formatConvBuffer, src_in, srcW);
250 src = formatConvBuffer;
253 if (!c->hyscale_fast) {
254 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
255 } else { // fast bilinear upscale / crap downscale
256 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
260 convertRange(dst, dstWidth);
263 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
264 int dstWidth, const uint8_t *src1,
265 const uint8_t *src2, int srcW, int xInc)
269 for (i=0;i<dstWidth;i++) {
270 register unsigned int xx=xpos>>16;
271 register unsigned int xalpha=(xpos&0xFFFF)>>9;
272 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
273 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
276 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
277 dst1[i] = src1[srcW-1]*128;
278 dst2[i] = src2[srcW-1]*128;
282 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
283 const uint8_t *src_in[4],
284 int srcW, int xInc, const int16_t *hChrFilter,
285 const int16_t *hChrFilterPos, int hChrFilterSize,
286 uint8_t *formatConvBuffer, uint32_t *pal)
288 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
290 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
291 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
292 src1= formatConvBuffer;
294 } else if (c->readChrPlanar) {
295 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
296 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
297 src1= formatConvBuffer;
301 if (!c->hcscale_fast) {
302 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
303 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
304 } else { // fast bilinear upscale / crap downscale
305 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
308 if (c->chrConvertRange)
309 c->chrConvertRange(dst1, dst2, dstWidth);
312 #define DEBUG_SWSCALE_BUFFERS 0
313 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
315 static int swScale(SwsContext *c, const uint8_t* src[],
316 int srcStride[], int srcSliceY,
317 int srcSliceH, uint8_t* dst[], int dstStride[])
319 /* load a few things into local vars to make the code more readable? and faster */
320 const int srcW= c->srcW;
321 const int dstW= c->dstW;
322 const int dstH= c->dstH;
323 const int chrDstW= c->chrDstW;
324 const int chrSrcW= c->chrSrcW;
325 const int lumXInc= c->lumXInc;
326 const int chrXInc= c->chrXInc;
327 const enum PixelFormat dstFormat= c->dstFormat;
328 const int flags= c->flags;
329 int16_t *vLumFilterPos= c->vLumFilterPos;
330 int16_t *vChrFilterPos= c->vChrFilterPos;
331 int16_t *hLumFilterPos= c->hLumFilterPos;
332 int16_t *hChrFilterPos= c->hChrFilterPos;
333 int16_t *hLumFilter= c->hLumFilter;
334 int16_t *hChrFilter= c->hChrFilter;
335 int32_t *lumMmxFilter= c->lumMmxFilter;
336 int32_t *chrMmxFilter= c->chrMmxFilter;
337 const int vLumFilterSize= c->vLumFilterSize;
338 const int vChrFilterSize= c->vChrFilterSize;
339 const int hLumFilterSize= c->hLumFilterSize;
340 const int hChrFilterSize= c->hChrFilterSize;
341 int16_t **lumPixBuf= c->lumPixBuf;
342 int16_t **chrUPixBuf= c->chrUPixBuf;
343 int16_t **chrVPixBuf= c->chrVPixBuf;
344 int16_t **alpPixBuf= c->alpPixBuf;
345 const int vLumBufSize= c->vLumBufSize;
346 const int vChrBufSize= c->vChrBufSize;
347 uint8_t *formatConvBuffer= c->formatConvBuffer;
348 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
349 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
351 uint32_t *pal=c->pal_yuv;
352 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
354 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
355 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
356 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
357 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
358 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
359 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
361 /* vars which will change and which we need to store back in the context */
363 int lumBufIndex= c->lumBufIndex;
364 int chrBufIndex= c->chrBufIndex;
365 int lastInLumBuf= c->lastInLumBuf;
366 int lastInChrBuf= c->lastInChrBuf;
368 if (isPacked(c->srcFormat)) {
376 srcStride[3]= srcStride[0];
378 srcStride[1]<<= c->vChrDrop;
379 srcStride[2]<<= c->vChrDrop;
381 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
382 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
383 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
384 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
385 srcSliceY, srcSliceH, dstY, dstH);
386 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
387 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
389 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
390 static int warnedAlready=0; //FIXME move this into the context perhaps
391 if (flags & SWS_PRINT_INFO && !warnedAlready) {
392 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
393 " ->cannot do aligned memory accesses anymore\n");
398 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
399 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
400 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
402 static int warnedAlready=0;
403 int cpu_flags = av_get_cpu_flags();
404 if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
405 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
410 /* Note the user might start scaling the picture in the middle so this
411 will not get executed. This is not really intended but works
412 currently, so people might do it. */
421 if (!should_dither) {
422 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
426 for (;dstY < dstH; dstY++) {
427 const int chrDstY= dstY>>c->chrDstVSubSample;
429 dst[0] + dstStride[0] * dstY,
430 dst[1] + dstStride[1] * chrDstY,
431 dst[2] + dstStride[2] * chrDstY,
432 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
434 int use_mmx_vfilter= c->use_mmx_vfilter;
436 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
437 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
438 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
439 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
440 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
441 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
444 //handle holes (FAST_BILINEAR & weird filters)
445 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
446 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
447 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
448 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
450 DEBUG_BUFFERS("dstY: %d\n", dstY);
451 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
452 firstLumSrcY, lastLumSrcY, lastInLumBuf);
453 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
454 firstChrSrcY, lastChrSrcY, lastInChrBuf);
456 // Do we have enough lines in this slice to output the dstY line
457 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
460 lastLumSrcY = srcSliceY + srcSliceH - 1;
461 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
462 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
463 lastLumSrcY, lastChrSrcY);
466 //Do horizontal scaling
467 while(lastInLumBuf < lastLumSrcY) {
468 const uint8_t *src1[4] = {
469 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
470 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
471 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
472 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
475 assert(lumBufIndex < 2*vLumBufSize);
476 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
477 assert(lastInLumBuf + 1 - srcSliceY >= 0);
478 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
479 hLumFilter, hLumFilterPos, hLumFilterSize,
482 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
483 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
484 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
488 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
489 lumBufIndex, lastInLumBuf);
491 while(lastInChrBuf < lastChrSrcY) {
492 const uint8_t *src1[4] = {
493 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
494 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
495 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
496 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
499 assert(chrBufIndex < 2*vChrBufSize);
500 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
501 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
502 //FIXME replace parameters through context struct (some at least)
504 if (c->needs_hcscale)
505 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
506 chrDstW, src1, chrSrcW, chrXInc,
507 hChrFilter, hChrFilterPos, hChrFilterSize,
508 formatConvBuffer, pal);
510 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
511 chrBufIndex, lastInChrBuf);
513 //wrap buf index around to stay inside the ring buffer
514 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
515 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
517 break; //we can't output a dstY line so let's try with the next slice
520 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
523 c->chrDither8 = dither_8x8_128[chrDstY & 7];
524 c->lumDither8 = dither_8x8_128[dstY & 7];
526 if (dstY >= dstH-2) {
527 // hmm looks like we can't use MMX here without overwriting this array's tail
528 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
529 &yuv2packed1, &yuv2packed2, &yuv2packedX);
534 const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
535 const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
536 const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
537 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
538 int16_t *vLumFilter= c->vLumFilter;
539 int16_t *vChrFilter= c->vChrFilter;
541 if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
542 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
544 vLumFilter += dstY * vLumFilterSize;
545 vChrFilter += chrDstY * vChrFilterSize;
547 // av_assert0(use_mmx_vfilter != (
548 // yuv2planeX == yuv2planeX_10BE_c
549 // || yuv2planeX == yuv2planeX_10LE_c
550 // || yuv2planeX == yuv2planeX_9BE_c
551 // || yuv2planeX == yuv2planeX_9LE_c
552 // || yuv2planeX == yuv2planeX_16BE_c
553 // || yuv2planeX == yuv2planeX_16LE_c
554 // || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
557 vLumFilter= c->lumMmxFilter;
558 vChrFilter= c->chrMmxFilter;
561 if (vLumFilterSize == 1) {
562 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
564 yuv2planeX(vLumFilter, vLumFilterSize,
565 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
568 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
570 yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
571 } else if (vChrFilterSize == 1) {
572 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
573 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
575 yuv2planeX(vChrFilter, vChrFilterSize,
576 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
577 yuv2planeX(vChrFilter, vChrFilterSize,
578 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
582 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
584 vLumFilter= c->alpMmxFilter;
586 if (vLumFilterSize == 1) {
587 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
589 yuv2planeX(vLumFilter, vLumFilterSize,
590 alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
594 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
595 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
596 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <= 2) { //unscaled RGB
597 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
598 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
599 alpPixBuf ? *alpSrcPtr : NULL,
600 dest[0], dstW, chrAlpha, dstY);
601 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
602 int lumAlpha = vLumFilter[2 * dstY + 1];
603 int chrAlpha = vChrFilter[2 * dstY + 1];
605 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
607 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
608 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
609 alpPixBuf ? alpSrcPtr : NULL,
610 dest[0], dstW, lumAlpha, chrAlpha, dstY);
611 } else { //general RGB
612 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
613 lumSrcPtr, vLumFilterSize,
614 vChrFilter + dstY * vChrFilterSize,
615 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
616 alpSrcPtr, dest[0], dstW, dstY);
622 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
623 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
626 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
627 __asm__ volatile("sfence":::"memory");
631 /* store changed local vars back in the context */
633 c->lumBufIndex= lumBufIndex;
634 c->chrBufIndex= chrBufIndex;
635 c->lastInLumBuf= lastInLumBuf;
636 c->lastInChrBuf= lastInChrBuf;
638 return dstY - lastDstY;
641 static av_cold void sws_init_swScale_c(SwsContext *c)
643 enum PixelFormat srcFormat = c->srcFormat;
645 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
646 &c->yuv2nv12cX, &c->yuv2packed1,
647 &c->yuv2packed2, &c->yuv2packedX);
649 ff_sws_init_input_funcs(c);
652 if (c->srcBpc == 8) {
653 if (c->dstBpc <= 10) {
654 c->hyScale = c->hcScale = hScale8To15_c;
655 if (c->flags & SWS_FAST_BILINEAR) {
656 c->hyscale_fast = hyscale_fast_c;
657 c->hcscale_fast = hcscale_fast_c;
660 c->hyScale = c->hcScale = hScale8To19_c;
663 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
666 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
667 if (c->dstBpc <= 10) {
669 c->lumConvertRange = lumRangeFromJpeg_c;
670 c->chrConvertRange = chrRangeFromJpeg_c;
672 c->lumConvertRange = lumRangeToJpeg_c;
673 c->chrConvertRange = chrRangeToJpeg_c;
677 c->lumConvertRange = lumRangeFromJpeg16_c;
678 c->chrConvertRange = chrRangeFromJpeg16_c;
680 c->lumConvertRange = lumRangeToJpeg16_c;
681 c->chrConvertRange = chrRangeToJpeg16_c;
686 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
687 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
688 c->needs_hcscale = 1;
691 SwsFunc ff_getSwsFunc(SwsContext *c)
693 sws_init_swScale_c(c);
696 ff_sws_init_swScale_mmx(c);
698 ff_sws_init_swScale_altivec(c);