From d02d04a18f300ebe97319ca6e91fc943cb14f58b Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 22 Apr 2015 14:53:01 -0400 Subject: [PATCH] vp9: remove one optimization branch in iadst16 which causes overflows. See sample vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm from the vp9 test vector set which reproduces the issue. This probably costs a few cycles, but I don't think there's an easy way to workaround that. Signed-off-by: Michael Niedermayer --- libavcodec/x86/vp9itxfm.asm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index bfe427fa703..a9d45a4b46f 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -1699,7 +1699,9 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx SUMSUB_BA w, 5, 7, 4 PSIGNW m5, [pw_m1] ; m12=out15[w], m8=t3[w] -%if cpuflag(ssse3) + ; unfortunately, the code below overflows in some cases, e.g. + ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +%if 0 ; cpuflag(ssse3) SUMSUB_BA w, 7, 6, 4 pmulhrsw m7, [pw_m11585x2] ; m8=out7[w] pmulhrsw m6, [pw_11585x2] ; m1=out8[w] -- 2.39.2