static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
- uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+ uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
{
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
* YV12 to RGB without scaling or interpolating
*/
static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
- const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
+ const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
{
const int yalpha1=0;
int i;
#endif
}
+static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
+ const uint8_t *src, long width)
+{
#if COMPILE_TEMPLATE_MMX
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat)
+ __asm__ volatile(
+ "movq "MANGLE(bm01010101)", %%mm4 \n\t"
+ "mov %0, %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1, %%"REG_a",2), %%mm0 \n\t"
+ "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pand %%mm4, %%mm0 \n\t"
+ "pand %%mm4, %%mm1 \n\t"
+ "psrlw $8, %%mm2 \n\t"
+ "psrlw $8, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%2, %%"REG_a") \n\t"
+ "movq %%mm2, (%3, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
+ : "%"REG_a
+ );
+#else
+ int i;
+ for (i = 0; i < width; i++) {
+ dst1[i] = src[2*i+0];
+ dst2[i] = src[2*i+1];
+ }
+#endif
+}
+
+static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src1, const uint8_t *src2,
+ long width, uint32_t *unused)
+{
+ RENAME(nvXXtoUV)(dstU, dstV, src1, width);
+}
+
+static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src1, const uint8_t *src2,
+ long width, uint32_t *unused)
+{
+ RENAME(nvXXtoUV)(dstV, dstU, src1, width);
+}
+
+#if COMPILE_TEMPLATE_MMX
+static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
{
if(srcFormat == PIX_FMT_BGR24) {
);
}
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, int srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
{
__asm__ volatile(
"movq 24+%4, %%mm6 \n\t"
static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
int flags, const int16_t *hLumFilter,
const int16_t *hLumFilterPos, int hLumFilterSize,
- int srcFormat, uint8_t *formatConvBuffer,
+ enum PixelFormat srcFormat, uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
int32_t av_unused *mmx2FilterPos = c->lumMmx2FilterPos;
inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
int srcW, int xInc, int flags, const int16_t *hChrFilter,
const int16_t *hChrFilterPos, int hChrFilterSize,
- int srcFormat, uint8_t *formatConvBuffer,
+ enum PixelFormat srcFormat, uint8_t *formatConvBuffer,
uint32_t *pal)
{
int32_t av_unused *mmx2FilterPos = c->chrMmx2FilterPos;
/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
which is needed to support GCC 4.0. */
-#if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4)
:: "m" (src1), "m" (dst), "g" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
#else
:: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
}
}
+#define DEBUG_SWSCALE_BUFFERS 0
+#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
+
static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])
{
const int chrSrcW= c->chrSrcW;
const int lumXInc= c->lumXInc;
const int chrXInc= c->chrXInc;
- const int dstFormat= c->dstFormat;
- const int srcFormat= c->srcFormat;
+ const enum PixelFormat dstFormat= c->dstFormat;
+ const enum PixelFormat srcFormat= c->srcFormat;
const int flags= c->flags;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
srcStride[1]<<= c->vChrDrop;
srcStride[2]<<= c->vChrDrop;
- //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
- // (int)dst[0], (int)dst[1], (int)dst[2]);
-
- //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
- //dstStride[0],dstStride[1],dstStride[2]);
+ DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
+ src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
+ dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
+ DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
+ srcSliceY, srcSliceH, dstY, dstH);
+ DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
+ vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
static int warnedAlready=0; //FIXME move this into the context perhaps
will not get executed. This is not really intended but works
currently, so people might do it. */
if (srcSliceY ==0) {
- lumBufIndex=0;
- chrBufIndex=0;
+ lumBufIndex=-1;
+ chrBufIndex=-1;
dstY=0;
lastInLumBuf= -1;
lastInChrBuf= -1;
int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
int enough_lines;
- //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
- // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
//handle holes (FAST_BILINEAR & weird filters)
if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
- //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
}
- /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
- firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
- lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
- vChrBufSize, vLumBufSize);*/
+ DEBUG_BUFFERS("dstY: %d\n", dstY);
+ DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
+ firstLumSrcY, lastLumSrcY, lastInLumBuf);
+ DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
+ firstChrSrcY, lastChrSrcY, lastInChrBuf);
//Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) {
uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
lumBufIndex++;
- //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
+ DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
+ lumBufIndex, lastInLumBuf);
assert(lumBufIndex < 2*vLumBufSize);
assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
assert(lastInLumBuf + 1 - srcSliceY >= 0);
- //printf("%d %d\n", lumBufIndex, vLumBufSize);
RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
flags, hLumFilter, hLumFilterPos, hLumFilterSize,
c->srcFormat, formatConvBuffer,
uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++;
+ DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
+ chrBufIndex, lastInChrBuf);
assert(chrBufIndex < 2*vChrBufSize);
assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat);
} else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
- int16_t *lumBuf = lumPixBuf[0];
- int16_t *chrBuf= chrPixBuf[0];
- int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
+ int16_t *lumBuf = lumSrcPtr[0];
+ int16_t *chrBuf= chrSrcPtr[0];
+ int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
} else { //General YV12
c->yuv2yuvX(c,
switch(srcFormat) {
case PIX_FMT_YUYV422 : c->hcscale_internal = RENAME(yuy2ToUV); break;
case PIX_FMT_UYVY422 : c->hcscale_internal = RENAME(uyvyToUV); break;
+ case PIX_FMT_NV12 : c->hcscale_internal = RENAME(nv12ToUV); break;
+ case PIX_FMT_NV21 : c->hcscale_internal = RENAME(nv21ToUV); break;
case PIX_FMT_RGB8 :
case PIX_FMT_BGR8 :
case PIX_FMT_PAL8 :
case PIX_FMT_BGR4_BYTE:
case PIX_FMT_RGB4_BYTE: c->hcscale_internal = palToUV; break;
- case PIX_FMT_YUV420PBE:
- case PIX_FMT_YUV422PBE:
- case PIX_FMT_YUV444PBE: c->hcscale_internal = RENAME(BEToUV); break;
- case PIX_FMT_YUV420PLE:
- case PIX_FMT_YUV422PLE:
- case PIX_FMT_YUV444PLE: c->hcscale_internal = RENAME(LEToUV); break;
+ case PIX_FMT_YUV420P16BE:
+ case PIX_FMT_YUV422P16BE:
+ case PIX_FMT_YUV444P16BE: c->hcscale_internal = RENAME(BEToUV); break;
+ case PIX_FMT_YUV420P16LE:
+ case PIX_FMT_YUV422P16LE:
+ case PIX_FMT_YUV444P16LE: c->hcscale_internal = RENAME(LEToUV); break;
}
if (c->chrSrcHSubSample) {
switch(srcFormat) {
c->hascale_internal = NULL;
switch (srcFormat) {
case PIX_FMT_YUYV422 :
- case PIX_FMT_YUV420PBE:
- case PIX_FMT_YUV422PBE:
- case PIX_FMT_YUV444PBE:
+ case PIX_FMT_YUV420P16BE:
+ case PIX_FMT_YUV422P16BE:
+ case PIX_FMT_YUV444P16BE:
case PIX_FMT_GRAY16BE : c->hyscale_internal = RENAME(yuy2ToY); break;
case PIX_FMT_UYVY422 :
- case PIX_FMT_YUV420PLE:
- case PIX_FMT_YUV422PLE:
- case PIX_FMT_YUV444PLE:
+ case PIX_FMT_YUV420P16LE:
+ case PIX_FMT_YUV422P16LE:
+ case PIX_FMT_YUV444P16LE:
case PIX_FMT_GRAY16LE : c->hyscale_internal = RENAME(uyvyToY); break;
case PIX_FMT_BGR24 : c->hyscale_internal = RENAME(bgr24ToY); break;
case PIX_FMT_BGR565 : c->hyscale_internal = bgr16ToY; break;