uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
{
#ifdef HAVE_MMX
- if (c->flags & SWS_ACCURATE_RND){
- if (uDest){
- YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
- YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
- }
+ if(!(c->flags & SWS_BITEXACT)){
+ if (c->flags & SWS_ACCURATE_RND){
+ if (uDest){
+ YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+ YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+ }
- YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
- }else{
- if (uDest){
- YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
- YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
- }
+ YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+ }else{
+ if (uDest){
+ YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+ YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+ }
- YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+ YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+ }
+ return;
}
-#else
+#endif
#ifdef HAVE_ALTIVEC
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, uDest, vDest, dstW, chrDstW);
#endif //!HAVE_ALTIVEC
-#endif /* HAVE_MMX */
}
static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
{
+ int i;
#ifdef HAVE_MMX
- long p= uDest ? 3 : 1;
- uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
- uint8_t *dst[3]= {dest, uDest, vDest};
- long counter[3] = {dstW, chrDstW, chrDstW};
-
- if (c->flags & SWS_ACCURATE_RND){
- while(p--){
- asm volatile(
- YSCALEYUV2YV121_ACCURATE
- :: "r" (src[p]), "r" (dst[p] + counter[p]),
- "g" (-counter[p])
- : "%"REG_a
- );
- }
- }else{
- while(p--){
- asm volatile(
- YSCALEYUV2YV121
- :: "r" (src[p]), "r" (dst[p] + counter[p]),
- "g" (-counter[p])
- : "%"REG_a
- );
+ if(!(c->flags & SWS_BITEXACT)){
+ long p= uDest ? 3 : 1;
+ uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
+ uint8_t *dst[3]= {dest, uDest, vDest};
+ long counter[3] = {dstW, chrDstW, chrDstW};
+
+ if (c->flags & SWS_ACCURATE_RND){
+ while(p--){
+ asm volatile(
+ YSCALEYUV2YV121_ACCURATE
+ :: "r" (src[p]), "r" (dst[p] + counter[p]),
+ "g" (-counter[p])
+ : "%"REG_a
+ );
+ }
+ }else{
+ while(p--){
+ asm volatile(
+ YSCALEYUV2YV121
+ :: "r" (src[p]), "r" (dst[p] + counter[p]),
+ "g" (-counter[p])
+ : "%"REG_a
+ );
+ }
}
+ return;
}
-
-#else
- int i;
+#endif
for (i=0; i<dstW; i++)
{
int val= (lumSrc[i]+64)>>7;
uDest[i]= u;
vDest[i]= v;
}
-#endif
}
{
#ifdef HAVE_MMX
long dummy=0;
- if (c->flags & SWS_ACCURATE_RND){
- switch(c->dstFormat){
- case PIX_FMT_RGB32:
- YSCALEYUV2PACKEDX_ACCURATE
- YSCALEYUV2RGBX
- WRITEBGR32(%4, %5, %%REGa)
+ if(!(c->flags & SWS_BITEXACT)){
+ if (c->flags & SWS_ACCURATE_RND){
+ switch(c->dstFormat){
+ case PIX_FMT_RGB32:
+ YSCALEYUV2PACKEDX_ACCURATE
+ YSCALEYUV2RGBX
+ WRITEBGR32(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_BGR24:
- YSCALEYUV2PACKEDX_ACCURATE
- YSCALEYUV2RGBX
- "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
- "add %4, %%"REG_c" \n\t"
- WRITEBGR24(%%REGc, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ case PIX_FMT_BGR24:
+ YSCALEYUV2PACKEDX_ACCURATE
+ YSCALEYUV2RGBX
+ "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
+ "add %4, %%"REG_c" \n\t"
+ WRITEBGR24(%%REGc, %5, %%REGa)
- :: "r" (&c->redDither),
- "m" (dummy), "m" (dummy), "m" (dummy),
- "r" (dest), "m" (dstW)
- : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
- );
- return;
- case PIX_FMT_RGB555:
- YSCALEYUV2PACKEDX_ACCURATE
- YSCALEYUV2RGBX
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ :: "r" (&c->redDither),
+ "m" (dummy), "m" (dummy), "m" (dummy),
+ "r" (dest), "m" (dstW)
+ : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+ );
+ return;
+ case PIX_FMT_RGB555:
+ YSCALEYUV2PACKEDX_ACCURATE
+ YSCALEYUV2RGBX
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
- "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
+ "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
- WRITERGB15(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_RGB565:
- YSCALEYUV2PACKEDX_ACCURATE
- YSCALEYUV2RGBX
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ WRITERGB15(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ case PIX_FMT_RGB565:
+ YSCALEYUV2PACKEDX_ACCURATE
+ YSCALEYUV2RGBX
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
- "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
+ "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
- WRITERGB16(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_YUYV422:
- YSCALEYUV2PACKEDX_ACCURATE
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
- "psraw $3, %%mm3 \n\t"
- "psraw $3, %%mm4 \n\t"
- "psraw $3, %%mm1 \n\t"
- "psraw $3, %%mm7 \n\t"
- WRITEYUY2(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- }
- }else{
- switch(c->dstFormat)
- {
- case PIX_FMT_RGB32:
- YSCALEYUV2PACKEDX
- YSCALEYUV2RGBX
- WRITEBGR32(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_BGR24:
- YSCALEYUV2PACKEDX
- YSCALEYUV2RGBX
- "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
- "add %4, %%"REG_c" \n\t"
- WRITEBGR24(%%REGc, %5, %%REGa)
-
- :: "r" (&c->redDither),
- "m" (dummy), "m" (dummy), "m" (dummy),
- "r" (dest), "m" (dstW)
- : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
- );
- return;
- case PIX_FMT_RGB555:
- YSCALEYUV2PACKEDX
- YSCALEYUV2RGBX
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ WRITERGB16(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ case PIX_FMT_YUYV422:
+ YSCALEYUV2PACKEDX_ACCURATE
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+ "psraw $3, %%mm3 \n\t"
+ "psraw $3, %%mm4 \n\t"
+ "psraw $3, %%mm1 \n\t"
+ "psraw $3, %%mm7 \n\t"
+ WRITEYUY2(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ }
+ }else{
+ switch(c->dstFormat)
+ {
+ case PIX_FMT_RGB32:
+ YSCALEYUV2PACKEDX
+ YSCALEYUV2RGBX
+ WRITEBGR32(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ case PIX_FMT_BGR24:
+ YSCALEYUV2PACKEDX
+ YSCALEYUV2RGBX
+ "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
+ "add %4, %%"REG_c" \n\t"
+ WRITEBGR24(%%REGc, %5, %%REGa)
+
+ :: "r" (&c->redDither),
+ "m" (dummy), "m" (dummy), "m" (dummy),
+ "r" (dest), "m" (dstW)
+ : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+ );
+ return;
+ case PIX_FMT_RGB555:
+ YSCALEYUV2PACKEDX
+ YSCALEYUV2RGBX
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
- "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
+ "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
#endif
- WRITERGB15(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_RGB565:
- YSCALEYUV2PACKEDX
- YSCALEYUV2RGBX
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ WRITERGB15(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ case PIX_FMT_RGB565:
+ YSCALEYUV2PACKEDX
+ YSCALEYUV2RGBX
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
- "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
+ "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
#endif
- WRITERGB16(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_YUYV422:
- YSCALEYUV2PACKEDX
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
- "psraw $3, %%mm3 \n\t"
- "psraw $3, %%mm4 \n\t"
- "psraw $3, %%mm1 \n\t"
- "psraw $3, %%mm7 \n\t"
- WRITEYUY2(%4, %5, %%REGa)
- YSCALEYUV2PACKEDX_END
- return;
+ WRITERGB16(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ case PIX_FMT_YUYV422:
+ YSCALEYUV2PACKEDX
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+ "psraw $3, %%mm3 \n\t"
+ "psraw $3, %%mm4 \n\t"
+ "psraw $3, %%mm1 \n\t"
+ "psraw $3, %%mm7 \n\t"
+ WRITEYUY2(%4, %5, %%REGa)
+ YSCALEYUV2PACKEDX_END
+ return;
+ }
}
}
#endif /* HAVE_MMX */
{
#endif // if 0
#ifdef HAVE_MMX
+ if(!(c->flags & SWS_BITEXACT)){
switch(c->dstFormat)
{
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
return;
default: break;
}
+ }
#endif //HAVE_MMX
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
}
}
#ifdef HAVE_MMX
- if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
- {
- switch(dstFormat)
+ if(!(flags & SWS_BITEXACT)){
+ if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
{
- case PIX_FMT_RGB32:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1(%%REGBP, %5)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
-
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_BGR24:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1(%%REGBP, %5)
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
-
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_RGB555:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1(%%REGBP, %5)
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ switch(dstFormat)
+ {
+ case PIX_FMT_RGB32:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1(%%REGBP, %5)
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_BGR24:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1(%%REGBP, %5)
+ WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_RGB555:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1(%%REGBP, %5)
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
- "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
+ "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_RGB565:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1(%%REGBP, %5)
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_RGB565:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1(%%REGBP, %5)
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
- "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
+ "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_YUYV422:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2PACKED1(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
-
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_YUYV422:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2PACKED1(%%REGBP, %5)
+ WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ }
}
- }
- else
- {
- switch(dstFormat)
+ else
{
- case PIX_FMT_RGB32:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
-
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_BGR24:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
-
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_RGB555:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ switch(dstFormat)
+ {
+ case PIX_FMT_RGB32:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_BGR24:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_RGB555:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
- "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
+ "paddusb "MANGLE(g5Dither)", %%mm4 \n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_RGB565:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_RGB565:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
- "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
- "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
- "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
+ "paddusb "MANGLE(b5Dither)", %%mm2 \n\t"
+ "paddusb "MANGLE(g6Dither)", %%mm4 \n\t"
+ "paddusb "MANGLE(r5Dither)", %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ case PIX_FMT_YUYV422:
+ asm volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2PACKED1b(%%REGBP, %5)
+ WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
- case PIX_FMT_YUYV422:
- asm volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2PACKED1b(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
-
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- return;
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ return;
+ }
}
}
#endif /* HAVE_MMX */
"push %%"REG_b" \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
- "movq "MANGLE(w02)", %%mm6 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
ASMALIGN(4)
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
- "psrad $8, %%mm0 \n\t"
- "psrad $8, %%mm3 \n\t"
- "packssdw %%mm3, %%mm0 \n\t"
- "pmaddwd %%mm6, %%mm0 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+ "punpckldq %%mm3, %%mm0 \n\t"
+ "punpckhdq %%mm3, %%mm4 \n\t"
+ "paddd %%mm4, %%mm0 \n\t"
+ "psrad $7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
"push %%"REG_b" \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
- "movq "MANGLE(w02)", %%mm6 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
ASMALIGN(4)
"pmaddwd %%mm2, %%mm5 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm5, %%mm3 \n\t"
-
- "psrad $8, %%mm0 \n\t"
- "psrad $8, %%mm3 \n\t"
- "packssdw %%mm3, %%mm0 \n\t"
- "pmaddwd %%mm6, %%mm0 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+ "punpckldq %%mm3, %%mm0 \n\t"
+ "punpckhdq %%mm3, %%mm4 \n\t"
+ "paddd %%mm4, %%mm0 \n\t"
+ "psrad $7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
dst-= counter/2;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
- "movq "MANGLE(w02)", %%mm6 \n\t"
ASMALIGN(4)
"1: \n\t"
"mov %2, %%"REG_c" \n\t"
"cmp %4, %%"REG_c" \n\t"
" jb 2b \n\t"
"add %6, %1 \n\t"
- "psrad $8, %%mm4 \n\t"
- "psrad $8, %%mm5 \n\t"
- "packssdw %%mm5, %%mm4 \n\t"
- "pmaddwd %%mm6, %%mm4 \n\t"
+ "movq %%mm4, %%mm0 \n\t"
+ "punpckldq %%mm5, %%mm4 \n\t"
+ "punpckhdq %%mm5, %%mm0 \n\t"
+ "paddd %%mm0, %%mm4 \n\t"
+ "psrad $7, %%mm4 \n\t"
"packssdw %%mm4, %%mm4 \n\t"
"mov %3, %%"REG_a" \n\t"
"movd %%mm4, (%%"REG_a", %0) \n\t"
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
//filter += hFilterSize;
- dst[i] = av_clip(val>>7, 0, (1<<15)-1); // the cubic equation does overflow ...
+ dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
//dst[i] = val>>7;
}
#endif /* HAVE_ALTIVEC */
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, dstW, dstY);
}else{
- RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
- dest, dstW, chrAlpha, dstFormat, flags, dstY);
+ RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+ dest, dstW, chrAlpha, dstFormat, flags, dstY);
}
}
else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, dstW, dstY);
}else{
- RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
- dest, dstW, lumAlpha, chrAlpha, dstY);
+ RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+ dest, dstW, lumAlpha, chrAlpha, dstY);
}
}
else //general RGB
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, dstW, dstY);
}else{
- RENAME(yuv2packedX)(c,
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
- dest, dstW, dstY);
+ RENAME(yuv2packedX)(c,
+ vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+ vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+ dest, dstW, dstY);
}
}
}
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, dstW, dstY);
}else{
- yuv2packedXinC(c,
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
- dest, dstW, dstY);
+ yuv2packedXinC(c,
+ vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+ vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+ dest, dstW, dstY);
}
}
}