X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libswscale%2Fx86%2Fswscale_template.c;h=97d8cae6134a9eaa6fef82d0ad1af167a035606e;hb=20aec597d05f1930dbd4e4c5ab8ee837dea5b5f3;hp=7c304706798eb23297252e2a71e51ab2ddc083de;hpb=94346ab593f3fc0822f11d3e3f378f9944ad2d40;p=ffmpeg diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 7c304706798..97d8cae6134 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -38,88 +38,6 @@ #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) -#if !COMPILE_TEMPLATE_MMXEXT -static av_always_inline void -dither_8to16(const uint8_t *srcDither, int rot) -{ - if (rot) { - __asm__ volatile("pxor %%mm0, %%mm0\n\t" - "movq (%0), %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "psrlq $24, %%mm3\n\t" - "psllq $40, %%mm4\n\t" - "por %%mm4, %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "punpcklbw %%mm0, %%mm3\n\t" - "punpckhbw %%mm0, %%mm4\n\t" - :: "r"(srcDither) - ); - } else { - __asm__ volatile("pxor %%mm0, %%mm0\n\t" - "movq (%0), %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "punpcklbw %%mm0, %%mm3\n\t" - "punpckhbw %%mm0, %%mm4\n\t" - :: "r"(srcDither) - ); - } -} -#endif - -static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset) -{ - dither_8to16(dither, offset); - filterSize--; - __asm__ volatile( - "movd %0, %%mm1\n\t" - "punpcklwd %%mm1, %%mm1\n\t" - "punpckldq %%mm1, %%mm1\n\t" - "psllw $3, %%mm1\n\t" - "paddw %%mm1, %%mm3\n\t" - "paddw %%mm1, %%mm4\n\t" - "psraw $4, %%mm3\n\t" - "psraw $4, %%mm4\n\t" - ::"m"(filterSize) - ); - - __asm__ volatile(\ - "movq %%mm3, %%mm6\n\t" - "movq %%mm4, %%mm7\n\t" - "movl %3, %%ecx\n\t" - "mov %0, %%"FF_REG_d" \n\t"\ - "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\ - "movq (%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\ - "movq 8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\ - "add $16, %%"FF_REG_d" \n\t"\ - "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ - "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t"\ - "pmulhw %%mm0, %%mm5 \n\t"\ - "paddw %%mm2, %%mm3 \n\t"\ - "paddw %%mm5, %%mm4 \n\t"\ - " jnz 1b \n\t"\ - "psraw $3, %%mm3 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "packuswb %%mm4, %%mm3 \n\t" - MOVNTQ2 " %%mm3, (%1, %%"FF_REG_c")\n\t" - "add $8, %%"FF_REG_c" \n\t"\ - "cmp %2, %%"FF_REG_c" \n\t"\ - "movq %%mm6, %%mm3\n\t" - "movq %%mm7, %%mm4\n\t" - "mov %0, %%"FF_REG_d" \n\t"\ - "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "g" (filter), - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) - : "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c - ); -} - #define YSCALEYUV2PACKEDX_UV \ __asm__ volatile(\ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\ @@ -1499,8 +1417,8 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c) enum AVPixelFormat dstFormat = c->dstFormat; c->use_mmx_vfilter= 0; - if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12 - && dstFormat != AV_PIX_FMT_NV21 && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE + if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) + && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE && !(c->flags & SWS_BITEXACT)) { if (c->flags & SWS_ACCURATE_RND) { if (!(c->flags & SWS_FULL_CHR_H_INT)) { @@ -1517,7 +1435,6 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c) } } else { c->use_mmx_vfilter= 1; - c->yuv2planeX = RENAME(yuv2yuvX ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;