From 156ea66c91b1986a87916f187216978d686725f6 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sun, 27 Jan 2019 11:06:34 +0100 Subject: [PATCH] h264/x86: sign extend int stride in deblock functions Fixes checkasm errors after adding the h264 deblock tests. --- libavcodec/x86/h264_deblock.asm | 8 ++++++++ libavcodec/x86/h264_deblock_10bit.asm | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 33fd5a9dd7f..4b9cf85d16c 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -288,6 +288,7 @@ cextern pb_3 ;----------------------------------------------------------------------------- %macro DEBLOCK_LUMA 0 cglobal deblock_v_luma_8, 5,5,10 + movsxdifnidn r1, r1d movd m8, [r4] ; tc0 lea r4, [r1*3] dec r2d ; alpha-1 @@ -335,6 +336,7 @@ cglobal deblock_v_luma_8, 5,5,10 INIT_MMX cpuname cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 movsxd r7, r1d + movsxdifnidn r1, r1d lea r8, [r7+r7*2] lea r6, [r0-4] lea r5, [r0-4+r8] @@ -395,6 +397,7 @@ DEBLOCK_LUMA ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_%1_luma_8, 5,5,8,2*%2 + movsxdifnidn r1, r1d lea r4, [r1*3] dec r2 ; alpha-1 neg r4 @@ -445,6 +448,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2 ;----------------------------------------------------------------------------- INIT_MMX cpuname cglobal deblock_h_luma_8, 0,5,8,0x60+12 + movsxdifnidn r1, r1d mov r0, r0mp mov r3, r1m lea r4, [r3*3] @@ -646,6 +650,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,0x10 %else cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50 %endif + movsxdifnidn r1, r1d lea r4, [r1*4] lea r5, [r1*3] ; 3*stride dec r2d ; alpha-1 @@ -703,6 +708,7 @@ INIT_MMX cpuname ;----------------------------------------------------------------------------- cglobal deblock_h_luma_intra_8, 4,9,0,0x80 movsxd r7, r1d + movsxdifnidn r1, r1d lea r8, [r7*3] lea r6, [r0-4] lea r5, [r0-4+r8] @@ -782,6 +788,7 @@ DEBLOCK_LUMA_INTRA v8 INIT_MMX mmxext %macro CHROMA_V_START 0 + movsxdifnidn r1, r1d dec r2d ; alpha-1 dec r3d ; beta-1 mov t5, r0 @@ -790,6 +797,7 @@ INIT_MMX mmxext %endmacro %macro CHROMA_H_START 0 + movsxdifnidn r1, r1d dec r2d dec r3d sub r0, 2 diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index d049c62bf2f..1a424b7f435 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -162,6 +162,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) %define ms2 [rsp+mmsize*2] %define am [rsp+mmsize*3] %define bm [rsp+mmsize*4] + movsxdifnidn r1, r1d SUB rsp, pad shl r2d, 2 shl r3d, 2 @@ -219,6 +220,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) %define p2m [rsp+mmsize*4] %define am [rsp+mmsize*5] %define bm [rsp+mmsize*6] + movsxdifnidn r1, r1d SUB rsp, pad shl r2d, 2 shl r3d, 2 @@ -349,6 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15 %define mask0 m7 %define mask1 m10 %define mask2 m11 + movsxdifnidn r1, r1d shl r2d, 2 shl r3d, 2 LOAD_AB m12, m13, r2d, r3d @@ -377,6 +380,7 @@ cglobal deblock_v_luma_10, 5,5,15 REP_RET cglobal deblock_h_luma_10, 5,7,15 + movsxdifnidn r1, r1d shl r2d, 2 shl r3d, 2 LOAD_AB m12, m13, r2d, r3d @@ -492,6 +496,7 @@ DEBLOCK_LUMA_64 CAT_XDEFINE t, i, [rsp+mmsize*(i-4)] %assign i i+1 %endrep + movsxdifnidn r1, r1d SUB rsp, pad %endmacro @@ -615,6 +620,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16 %define q2 m13 %define aa m5 %define bb m14 + movsxdifnidn r1, r1d lea r4, [r1*4] lea r5, [r1*3] ; 3*stride neg r4 @@ -668,6 +674,7 @@ cglobal deblock_h_luma_intra_10, 4,7,16 %define p3 m4 %define spill [rsp] %assign pad 24-(stack_offset&15) + movsxdifnidn r1, r1d SUB rsp, pad lea r4, [r1*4] lea r5, [r1*3] ; 3*stride @@ -852,6 +859,7 @@ DEBLOCK_LUMA_INTRA ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) + movsxdifnidn r1, r1d mov r5, r0 sub r0, r1 sub r0, r1 @@ -887,6 +895,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) ; int beta) ;----------------------------------------------------------------------------- cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) + movsxdifnidn r1, r1d mov r4, r0 sub r0, r1 sub r0, r1 -- 2.39.5