DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL };
extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128;
extern const uint64_t ff_pw_255;
+extern const xmm_reg ff_pw_256;
+extern const xmm_reg ff_pw_512;
+extern const xmm_reg ff_pw_m1;
extern const xmm_reg ff_pb_1;
extern const xmm_reg ff_pb_3;
SECTION_RODATA
+cextern pw_512
cextern pw_16
cextern pw_8
cextern pw_4
pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
pw_m3: times 8 dw -3
pw_pixel_max: times 8 dw ((1 << 10)-1)
-pw_512: times 8 dw 512
pd_17: times 4 dd 17
pd_16: times 4 dd 16
SECTION_RODATA
pw_pixel_max: times 8 dw ((1 << 10)-1)
-pw_m1: times 8 dw -1
pw_m2: times 8 dw -2
pd_1 : times 4 dd 1
cextern pw_4
cextern pw_8
+cextern pw_m1
SECTION .text
INIT_XMM sse2
filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9
filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11
-pw_256: times 8 dw 256
pw_20091: times 4 dw 20091
pw_17734: times 4 dw 17734
cextern pw_3
cextern pw_4
cextern pw_64
+cextern pw_256
SECTION .text
SECTION_RODATA
-; FIXME share with vp8dsp.asm
-pw_256: times 8 dw 256
+cextern pw_256
%macro F8_TAPS 8
times 8 db %1, %2