From 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 9 Aug 2016 20:20:00 +0200 Subject: [PATCH] audiodsp/x86: yasmify vector_clipf_sse --- libavcodec/x86/Makefile | 1 - libavcodec/x86/audiodsp.asm | 43 +++++++++++++++++++++++++ libavcodec/x86/audiodsp_init.c | 2 +- libavcodec/x86/audiodsp_mmx.c | 58 ---------------------------------- 4 files changed, 44 insertions(+), 60 deletions(-) delete mode 100644 libavcodec/x86/audiodsp_mmx.c diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 204c8563407..872b7faddb1 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o # GCC inline assembly optimizations # subsystems -MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ x86/hpeldsp_mmx.o diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index d7e63eb0cbc..1bc7e32a689 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 %else VECTOR_CLIP_INT32 6, 1, 0, 0 %endif + +; void ff_vector_clipf_sse(float *dst, const float *src, +; int len, float min, float max) +INIT_XMM sse +cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max +%if ARCH_X86_32 + VBROADCASTSS m0, minm + VBROADCASTSS m1, maxm +%elif WIN64 + VBROADCASTSS m0, m3 + VBROADCASTSS m1, maxm +%else ; 64bit sysv + VBROADCASTSS m0, m0 + VBROADCASTSS m1, m1 +%endif + + movsxdifnidn lenq, lend + +.loop + mova m2, [srcq + 4 * lenq - 4 * mmsize] + mova m3, [srcq + 4 * lenq - 3 * mmsize] + mova m4, [srcq + 4 * lenq - 2 * mmsize] + mova m5, [srcq + 4 * lenq - 1 * mmsize] + + maxps m2, m0 + maxps m3, m0 + maxps m4, m0 + maxps m5, m0 + + minps m2, m1 + minps m3, m1 + minps m4, m1 + minps m5, m1 + + mova [dstq + 4 * lenq - 4 * mmsize], m2 + mova [dstq + 4 * lenq - 3 * mmsize], m3 + mova [dstq + 4 * lenq - 2 * mmsize], m4 + mova [dstq + 4 * lenq - 1 * mmsize], m5 + + sub lenq, mmsize + jg .loop + + RET diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index 8eb2e56bdda..23731158e56 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) if (EXTERNAL_MMXEXT(cpu_flags)) c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; - if (INLINE_SSE(cpu_flags)) + if (EXTERNAL_SSE(cpu_flags)) c->vector_clipf = ff_vector_clipf_sse; if (EXTERNAL_SSE2(cpu_flags)) { diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c deleted file mode 100644 index 04cbb90706b..00000000000 --- a/libavcodec/x86/audiodsp_mmx.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" -#include "libavutil/x86/asm.h" -#include "audiodsp.h" - -#if HAVE_INLINE_ASM - -void ff_vector_clipf_sse(float *dst, const float *src, - int len, float min, float max) -{ - x86_reg i = (len - 16) * 4; - __asm__ volatile ( - "movss %3, %%xmm4 \n\t" - "movss %4, %%xmm5 \n\t" - "shufps $0, %%xmm4, %%xmm4 \n\t" - "shufps $0, %%xmm5, %%xmm5 \n\t" - "1: \n\t" - "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel - "movaps 16(%2, %0), %%xmm1 \n\t" - "movaps 32(%2, %0), %%xmm2 \n\t" - "movaps 48(%2, %0), %%xmm3 \n\t" - "maxps %%xmm4, %%xmm0 \n\t" - "maxps %%xmm4, %%xmm1 \n\t" - "maxps %%xmm4, %%xmm2 \n\t" - "maxps %%xmm4, %%xmm3 \n\t" - "minps %%xmm5, %%xmm0 \n\t" - "minps %%xmm5, %%xmm1 \n\t" - "minps %%xmm5, %%xmm2 \n\t" - "minps %%xmm5, %%xmm3 \n\t" - "movaps %%xmm0, (%1, %0) \n\t" - "movaps %%xmm1, 16(%1, %0) \n\t" - "movaps %%xmm2, 32(%1, %0) \n\t" - "movaps %%xmm3, 48(%1, %0) \n\t" - "sub $64, %0 \n\t" - "jge 1b \n\t" - : "+&r" (i) - : "r" (dst), "r" (src), "m" (min), "m" (max) - : "memory"); -} - -#endif /* HAVE_INLINE_ASM */ -- 2.39.5