From: Ronald S. Bultje Date: Fri, 26 Dec 2014 20:15:50 +0000 (-0500) Subject: vp9/x86: make filter_48/84/88_h work on 32-bit. X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=b26bc3520f9ae0f025d2b3787a3fa33febae24af;p=ffmpeg vp9/x86: make filter_48/84/88_h work on 32-bit. --- diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 37b5e0fc2e5..7bb31ff742d 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -348,17 +348,11 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \ dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \ - if (ARCH_X86_64) { \ - dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ - } \ + dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ - if (ARCH_X86_64) { \ - dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ - } \ + dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ - if (ARCH_X86_64) { \ - dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ - } \ + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ } while (0) diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm index deec5302656..838b78530ef 100644 --- a/libavcodec/x86/vp9lpf.asm +++ b/libavcodec/x86/vp9lpf.asm @@ -937,9 +937,12 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride, mova m3, [P0] mova m4, [Q0] mova m5, [Q1] +%if ARCH_X86_64 mova m6, [Q2] +%endif mova m7, [Q3] DEFINE_REAL_P7_TO_Q7 +%if ARCH_X86_64 SBUTTERFLY bw, 0, 1, 8 SBUTTERFLY bw, 2, 3, 8 SBUTTERFLY bw, 4, 5, 8 @@ -952,22 +955,47 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride, SBUTTERFLY dq, 1, 5, 8 SBUTTERFLY dq, 2, 6, 8 SBUTTERFLY dq, 3, 7, 8 - movh [P7], m0 - movhps [P6], m0 - movh [Q0], m1 - movhps [Q1], m1 - movh [P3], m2 - movhps [P2], m2 - movh [Q4], m3 - movhps [Q5], m3 - movh [P5], m4 - movhps [P4], m4 - movh [Q2], m5 - movhps [Q3], m5 - movh [P1], m6 - movhps [P0], m6 - movh [Q6], m7 - movhps [Q7], m7 +%else + SBUTTERFLY bw, 0, 1, 6 + mova [rsp+64], m1 + mova m6, [rsp+96] + SBUTTERFLY bw, 2, 3, 1 + SBUTTERFLY bw, 4, 5, 1 + SBUTTERFLY bw, 6, 7, 1 + SBUTTERFLY wd, 0, 2, 1 + mova [rsp+96], m2 + mova m1, [rsp+64] + SBUTTERFLY wd, 1, 3, 2 + SBUTTERFLY wd, 4, 6, 2 + SBUTTERFLY wd, 5, 7, 2 + SBUTTERFLY dq, 0, 4, 2 + SBUTTERFLY dq, 1, 5, 2 + movh [Q0], m1 + movhps [Q1], m1 + mova m2, [rsp+96] + SBUTTERFLY dq, 2, 6, 1 + SBUTTERFLY dq, 3, 7, 1 +%endif + SWAP 3, 6 + SWAP 1, 4 + movh [P7], m0 + movhps [P6], m0 + movh [P5], m1 + movhps [P4], m1 + movh [P3], m2 + movhps [P2], m2 + movh [P1], m3 + movhps [P0], m3 +%if ARCH_X86_64 + movh [Q0], m4 + movhps [Q1], m4 +%endif + movh [Q2], m5 + movhps [Q3], m5 + movh [Q4], m6 + movhps [Q5], m6 + movh [Q6], m7 + movhps [Q7], m7 %endif %endif @@ -977,7 +1005,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride, %macro LPF_16_VH 5 INIT_XMM %5 LOOPFILTER v, %1, %2, 0, %4 -%if ARCH_X86_64 || %1 == 44 +%if ARCH_X86_64 || %1 != 16 LOOPFILTER h, %1, %2, %3, %4 %endif %endmacro