From 81943a10b5007825892bac4582659fa7f74c4025 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Fri, 25 Jul 2014 15:08:49 +0200 Subject: [PATCH] x86: hevc_mc: load less data in epel filters Before: 5679 decicycles in epel_bi, 2059976 runs, 37176 skips 3468 decicycles in epel_uni, 1040886 runs, 7690 skips After: 5323 decicycles in epel_bi, 2059493 runs, 37659 skips 3262 decicycles in epel_uni, 1040871 runs, 7705 skips Signed-off-by: Michael Niedermayer --- libavcodec/x86/hevc_mc.asm | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 545f5560793..9cfebb89c01 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -176,15 +176,23 @@ QPEL_TABLE 12, 4, w, sse4 %else %define rfilterq %2 %endif - movdqu m0, [rfilterq ] ;load 128bit of x +%if (%1 == 8 && %4 <= 4) +%define %%load movd +%elif (%1 == 8 && %4 <= 8) || (%1 > 8 && %4 <= 4) +%define %%load movq +%else +%define %%load movdqu +%endif + + %%load m0, [rfilterq ] %ifnum %3 - movdqu m1, [rfilterq+ %3] ;load 128bit of x+stride - movdqu m2, [rfilterq+2*%3] ;load 128bit of x+2*stride - movdqu m3, [rfilterq+3*%3] ;load 128bit of x+3*stride + %%load m1, [rfilterq+ %3] + %%load m2, [rfilterq+2*%3] + %%load m3, [rfilterq+3*%3] %else - movdqu m1, [rfilterq+ %3q] ;load 128bit of x+stride - movdqu m2, [rfilterq+2*%3q] ;load 128bit of x+2*stride - movdqu m3, [rfilterq+r3srcq] ;load 128bit of x+2*stride + %%load m1, [rfilterq+ %3q] + %%load m2, [rfilterq+2*%3q] + %%load m3, [rfilterq+r3srcq] %endif %if %1 == 8 -- 2.39.2