]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/x86/snowdsp.c
Merge commit '42dc214323637464759354912e18b2bee1884dd1'
[ffmpeg] / libavcodec / x86 / snowdsp.c
index e2ad511d0a416a7143df685408e4edd79027fdd6..218e6864db6f7efa4fdeeef87829c006cb558167 100644 (file)
@@ -390,10 +390,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
 
 #if HAVE_7REGS
 #define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
-        ""op" ("r",%%"REG_d"), %%"t0"      \n\t"\
-        ""op" 16("r",%%"REG_d"), %%"t1"    \n\t"\
-        ""op" 32("r",%%"REG_d"), %%"t2"    \n\t"\
-        ""op" 48("r",%%"REG_d"), %%"t3"    \n\t"
+        ""op" ("r",%%"FF_REG_d"), %%"t0"      \n\t"\
+        ""op" 16("r",%%"FF_REG_d"), %%"t1"    \n\t"\
+        ""op" 32("r",%%"FF_REG_d"), %%"t2"    \n\t"\
+        ""op" 48("r",%%"FF_REG_d"), %%"t3"    \n\t"
 
 #define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
         snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
@@ -408,10 +408,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
         "psubw %%"s3", %%"t3" \n\t"
 
 #define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
-        "movdqa %%"s0", ("w",%%"REG_d")      \n\t"\
-        "movdqa %%"s1", 16("w",%%"REG_d")    \n\t"\
-        "movdqa %%"s2", 32("w",%%"REG_d")    \n\t"\
-        "movdqa %%"s3", 48("w",%%"REG_d")    \n\t"
+        "movdqa %%"s0", ("w",%%"FF_REG_d")    \n\t"\
+        "movdqa %%"s1", 16("w",%%"FF_REG_d")  \n\t"\
+        "movdqa %%"s2", 32("w",%%"FF_REG_d")  \n\t"\
+        "movdqa %%"s3", 48("w",%%"FF_REG_d")  \n\t"
 
 #define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
         "psraw $"n", %%"t0" \n\t"\
@@ -477,14 +477,14 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
         "psrlw $13, %%xmm5                           \n\t"
         "paddw %%xmm7, %%xmm5                        \n\t"
         snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6")
-        "movq   (%2,%%"REG_d"), %%xmm1        \n\t"
-        "movq  8(%2,%%"REG_d"), %%xmm3        \n\t"
+        "movq   (%2,%%"FF_REG_d"), %%xmm1            \n\t"
+        "movq  8(%2,%%"FF_REG_d"), %%xmm3            \n\t"
         "paddw %%xmm7, %%xmm1                        \n\t"
         "paddw %%xmm7, %%xmm3                        \n\t"
         "pavgw %%xmm1, %%xmm0                        \n\t"
         "pavgw %%xmm3, %%xmm2                        \n\t"
-        "movq 16(%2,%%"REG_d"), %%xmm1        \n\t"
-        "movq 24(%2,%%"REG_d"), %%xmm3        \n\t"
+        "movq 16(%2,%%"FF_REG_d"), %%xmm1            \n\t"
+        "movq 24(%2,%%"FF_REG_d"), %%xmm3            \n\t"
         "paddw %%xmm7, %%xmm1                        \n\t"
         "paddw %%xmm7, %%xmm3                        \n\t"
         "pavgw %%xmm1, %%xmm4                        \n\t"
@@ -504,17 +504,17 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
         snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6")
 
         "2:                                          \n\t"
-        "sub $64, %%"REG_d"                          \n\t"
+        "sub $64, %%"FF_REG_d"                       \n\t"
         "jge 1b                                      \n\t"
         :"+d"(i)
         :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
 }
 
 #define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
-        ""op" ("r",%%"REG_d"), %%"t0"   \n\t"\
-        ""op" 8("r",%%"REG_d"), %%"t1"  \n\t"\
-        ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
-        ""op" 24("r",%%"REG_d"), %%"t3" \n\t"
+        ""op" ("r",%%"FF_REG_d"), %%"t0"   \n\t"\
+        ""op" 8("r",%%"FF_REG_d"), %%"t1"  \n\t"\
+        ""op" 16("r",%%"FF_REG_d"), %%"t2" \n\t"\
+        ""op" 24("r",%%"FF_REG_d"), %%"t3" \n\t"
 
 #define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
         snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
@@ -523,10 +523,10 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
         snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
 
 #define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
-        "movq %%"s0", ("w",%%"REG_d")   \n\t"\
-        "movq %%"s1", 8("w",%%"REG_d")  \n\t"\
-        "movq %%"s2", 16("w",%%"REG_d") \n\t"\
-        "movq %%"s3", 24("w",%%"REG_d") \n\t"
+        "movq %%"s0", ("w",%%"FF_REG_d")   \n\t"\
+        "movq %%"s1", 8("w",%%"FF_REG_d")  \n\t"\
+        "movq %%"s2", 16("w",%%"FF_REG_d") \n\t"\
+        "movq %%"s3", 24("w",%%"FF_REG_d") \n\t"
 
 #define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
         "movq %%"s0", %%"t0" \n\t"\
@@ -571,14 +571,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
         "psrlw $13, %%mm5                            \n\t"
         "paddw %%mm7, %%mm5                          \n\t"
         snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6")
-        "movq   (%2,%%"REG_d"), %%mm1         \n\t"
-        "movq  8(%2,%%"REG_d"), %%mm3         \n\t"
+        "movq   (%2,%%"FF_REG_d"), %%mm1             \n\t"
+        "movq  8(%2,%%"FF_REG_d"), %%mm3             \n\t"
         "paddw %%mm7, %%mm1                          \n\t"
         "paddw %%mm7, %%mm3                          \n\t"
         "pavgw %%mm1, %%mm0                          \n\t"
         "pavgw %%mm3, %%mm2                          \n\t"
-        "movq 16(%2,%%"REG_d"), %%mm1         \n\t"
-        "movq 24(%2,%%"REG_d"), %%mm3         \n\t"
+        "movq 16(%2,%%"FF_REG_d"), %%mm1             \n\t"
+        "movq 24(%2,%%"FF_REG_d"), %%mm3             \n\t"
         "paddw %%mm7, %%mm1                          \n\t"
         "paddw %%mm7, %%mm3                          \n\t"
         "pavgw %%mm1, %%mm4                          \n\t"
@@ -598,7 +598,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
         snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6")
 
         "2:                                          \n\t"
-        "sub $32, %%"REG_d"                          \n\t"
+        "sub $32, %%"FF_REG_d"                       \n\t"
         "jge 1b                                      \n\t"
         :"+d"(i)
         :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
@@ -610,39 +610,39 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
     IDWTELEM * * dst_array = sb->line + src_y;\
     x86_reg tmp;\
     __asm__ volatile(\
-             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %7, %%"FF_REG_c"          \n\t"\
              "mov  %6, %2                    \n\t"\
-             "mov  %4, %%"REG_S"             \n\t"\
+             "mov  %4, %%"FF_REG_S"          \n\t"\
              "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
              "pcmpeqd %%xmm3, %%xmm3         \n\t"\
              "psllw $15, %%xmm3              \n\t"\
              "psrlw $12, %%xmm3              \n\t" /* FRAC_BITS >> 1 */\
              "1:                             \n\t"\
-             "mov %1, %%"REG_D"              \n\t"\
-             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
-             "add %3, %%"REG_D"              \n\t"
+             "mov %1, %%"FF_REG_D"           \n\t"\
+             "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\
+             "add %3, %%"FF_REG_D"           \n\t"
 
 #define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
-             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
-             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
-             "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\
+             "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\
+             "movq (%%"FF_REG_d"), %%"out_reg1"                           \n\t"\
+             "movq (%%"FF_REG_d", %%"FF_REG_c"), %%"out_reg2"             \n\t"\
              "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
              "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
-             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
-             "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
+             "movq "s_offset"(%%"FF_REG_S"), %%xmm0    \n\t"\
+             "movq "s_offset"+16(%%"FF_REG_S"), %%xmm4 \n\t"\
              "punpcklbw %%xmm7, %%xmm0       \n\t"\
              "punpcklbw %%xmm7, %%xmm4       \n\t"\
              "pmullw %%xmm0, %%"out_reg1"    \n\t"\
              "pmullw %%xmm4, %%"out_reg2"    \n\t"
 
 #define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
-             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
-             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
-             "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\
+             "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\
+             "movq (%%"FF_REG_d"), %%"out_reg1"                           \n\t"\
+             "movq 8(%%"FF_REG_d"), %%"out_reg2"                          \n\t"\
              "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
              "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
-             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
-             "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
+             "movq "s_offset"(%%"FF_REG_S"), %%xmm0   \n\t"\
+             "movq "s_offset"+8(%%"FF_REG_S"), %%xmm4 \n\t"\
              "punpcklbw %%xmm7, %%xmm0       \n\t"\
              "punpcklbw %%xmm7, %%xmm4       \n\t"\
              "pmullw %%xmm0, %%"out_reg1"    \n\t"\
@@ -659,12 +659,12 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
              "paddusw %%xmm6, %%xmm5         \n\t"
 
 #define snow_inner_add_yblock_sse2_end_common1\
-             "add $32, %%"REG_S"             \n\t"\
-             "add %%"REG_c", %0              \n\t"\
-             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
-             "add %%"REG_c", (%%"REG_a")     \n\t"
+             "add $32, %%"FF_REG_S"                            \n\t"\
+             "add %%"FF_REG_c", %0                             \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", (%%"FF_REG_a")                 \n\t"
 
 #define snow_inner_add_yblock_sse2_end_common2\
              "jnz 1b                         \n\t"\
@@ -672,18 +672,18 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
              :\
              "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
              XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\
-             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+             "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d"");
 
 #define snow_inner_add_yblock_sse2_end_8\
-             "sal $1, %%"REG_c"              \n\t"\
-             "add"OPSIZE" $"PTR_SIZE"*2, %1  \n\t"\
+             "sal $1, %%"FF_REG_c"                \n\t"\
+             "add"FF_OPSIZE" $"FF_PTR_SIZE"*2, %1 \n\t"\
              snow_inner_add_yblock_sse2_end_common1\
-             "sar $1, %%"REG_c"              \n\t"\
+             "sar $1, %%"FF_REG_c"           \n\t"\
              "sub $2, %2                     \n\t"\
              snow_inner_add_yblock_sse2_end_common2
 
 #define snow_inner_add_yblock_sse2_end_16\
-             "add"OPSIZE" $"PTR_SIZE"*1, %1  \n\t"\
+             "add"FF_OPSIZE" $"FF_PTR_SIZE"*1, %1 \n\t"\
              snow_inner_add_yblock_sse2_end_common1\
              "dec %2                         \n\t"\
              snow_inner_add_yblock_sse2_end_common2
@@ -696,28 +696,28 @@ snow_inner_add_yblock_sse2_accum_8("2", "8")
 snow_inner_add_yblock_sse2_accum_8("1", "128")
 snow_inner_add_yblock_sse2_accum_8("0", "136")
 
-             "mov %0, %%"REG_d"              \n\t"
-             "movdqa (%%"REG_D"), %%xmm0     \n\t"
+             "mov %0, %%"FF_REG_d"           \n\t"
+             "movdqa (%%"FF_REG_D"), %%xmm0  \n\t"
              "movdqa %%xmm1, %%xmm2          \n\t"
 
              "punpckhwd %%xmm7, %%xmm1       \n\t"
              "punpcklwd %%xmm7, %%xmm2       \n\t"
              "paddd %%xmm2, %%xmm0           \n\t"
-             "movdqa 16(%%"REG_D"), %%xmm2   \n\t"
+             "movdqa 16(%%"FF_REG_D"), %%xmm2\n\t"
              "paddd %%xmm1, %%xmm2           \n\t"
              "paddd %%xmm3, %%xmm0           \n\t"
              "paddd %%xmm3, %%xmm2           \n\t"
 
-             "mov %1, %%"REG_D"              \n\t"
-             "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
-             "add %3, %%"REG_D"              \n\t"
+             "mov %1, %%"FF_REG_D"           \n\t"
+             "mov "FF_PTR_SIZE"(%%"FF_REG_D"), %%"FF_REG_D"; \n\t"
+             "add %3, %%"FF_REG_D"           \n\t"
 
-             "movdqa (%%"REG_D"), %%xmm4     \n\t"
+             "movdqa (%%"FF_REG_D"), %%xmm4  \n\t"
              "movdqa %%xmm5, %%xmm6          \n\t"
              "punpckhwd %%xmm7, %%xmm5       \n\t"
              "punpcklwd %%xmm7, %%xmm6       \n\t"
              "paddd %%xmm6, %%xmm4           \n\t"
-             "movdqa 16(%%"REG_D"), %%xmm6   \n\t"
+             "movdqa 16(%%"FF_REG_D"), %%xmm6\n\t"
              "paddd %%xmm5, %%xmm6           \n\t"
              "paddd %%xmm3, %%xmm4           \n\t"
              "paddd %%xmm3, %%xmm6           \n\t"
@@ -726,13 +726,13 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
              "psrad $8, %%xmm2               \n\t" /* FRAC_BITS. */
              "packssdw %%xmm2, %%xmm0        \n\t"
              "packuswb %%xmm7, %%xmm0        \n\t"
-             "movq %%xmm0, (%%"REG_d")       \n\t"
+             "movq %%xmm0, (%%"FF_REG_d")    \n\t"
 
              "psrad $8, %%xmm4               \n\t" /* FRAC_BITS. */
              "psrad $8, %%xmm6               \n\t" /* FRAC_BITS. */
              "packssdw %%xmm6, %%xmm4        \n\t"
              "packuswb %%xmm7, %%xmm4        \n\t"
-             "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t"
+             "movq %%xmm4, (%%"FF_REG_d",%%"FF_REG_c"); \n\t"
 snow_inner_add_yblock_sse2_end_8
 }
 
@@ -744,18 +744,18 @@ snow_inner_add_yblock_sse2_accum_16("2", "16")
 snow_inner_add_yblock_sse2_accum_16("1", "512")
 snow_inner_add_yblock_sse2_accum_16("0", "528")
 
-             "mov %0, %%"REG_d"              \n\t"
+             "mov %0, %%"FF_REG_d"           \n\t"
              "psrlw $4, %%xmm1               \n\t"
              "psrlw $4, %%xmm5               \n\t"
-             "paddw   (%%"REG_D"), %%xmm1    \n\t"
-             "paddw 16(%%"REG_D"), %%xmm5    \n\t"
+             "paddw   (%%"FF_REG_D"), %%xmm1 \n\t"
+             "paddw 16(%%"FF_REG_D"), %%xmm5 \n\t"
              "paddw %%xmm3, %%xmm1           \n\t"
              "paddw %%xmm3, %%xmm5           \n\t"
              "psraw $4, %%xmm1               \n\t" /* FRAC_BITS. */
              "psraw $4, %%xmm5               \n\t" /* FRAC_BITS. */
              "packuswb %%xmm5, %%xmm1        \n\t"
 
-             "movdqu %%xmm1, (%%"REG_d")       \n\t"
+             "movdqu %%xmm1, (%%"FF_REG_d")  \n\t"
 
 snow_inner_add_yblock_sse2_end_16
 }
@@ -764,30 +764,30 @@ snow_inner_add_yblock_sse2_end_16
     IDWTELEM * * dst_array = sb->line + src_y;\
     x86_reg tmp;\
     __asm__ volatile(\
-             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %7, %%"FF_REG_c"          \n\t"\
              "mov  %6, %2                    \n\t"\
-             "mov  %4, %%"REG_S"             \n\t"\
+             "mov  %4, %%"FF_REG_S"          \n\t"\
              "pxor %%mm7, %%mm7              \n\t" /* 0 */\
              "pcmpeqd %%mm3, %%mm3           \n\t"\
              "psllw $15, %%mm3               \n\t"\
              "psrlw $12, %%mm3               \n\t" /* FRAC_BITS >> 1 */\
              "1:                             \n\t"\
-             "mov %1, %%"REG_D"              \n\t"\
-             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
-             "add %3, %%"REG_D"              \n\t"
+             "mov %1, %%"FF_REG_D"           \n\t"\
+             "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\
+             "add %3, %%"FF_REG_D"           \n\t"
 
 #define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
-             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
-             "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\
-             "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\
+             "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\
+             "movd "d_offset"(%%"FF_REG_d"), %%"out_reg1"                 \n\t"\
+             "movd "d_offset"+4(%%"FF_REG_d"), %%"out_reg2"               \n\t"\
              "punpcklbw %%mm7, %%"out_reg1" \n\t"\
              "punpcklbw %%mm7, %%"out_reg2" \n\t"\
-             "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\
-             "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
+             "movd "s_offset"(%%"FF_REG_S"), %%mm0   \n\t"\
+             "movd "s_offset"+4(%%"FF_REG_S"), %%mm4 \n\t"\
              "punpcklbw %%mm7, %%mm0       \n\t"\
              "punpcklbw %%mm7, %%mm4       \n\t"\
-             "pmullw %%mm0, %%"out_reg1"    \n\t"\
-             "pmullw %%mm4, %%"out_reg2"    \n\t"
+             "pmullw %%mm0, %%"out_reg1"   \n\t"\
+             "pmullw %%mm4, %%"out_reg2"   \n\t"
 
 #define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \
              snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\
@@ -795,32 +795,32 @@ snow_inner_add_yblock_sse2_end_16
              "paddusw %%mm6, %%mm5         \n\t"
 
 #define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
-             "mov %0, %%"REG_d"              \n\t"\
+             "mov %0, %%"FF_REG_d"           \n\t"\
              "psrlw $4, %%mm1                \n\t"\
              "psrlw $4, %%mm5                \n\t"\
-             "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\
-             "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
+             "paddw "read_offset"(%%"FF_REG_D"), %%mm1   \n\t"\
+             "paddw "read_offset"+8(%%"FF_REG_D"), %%mm5 \n\t"\
              "paddw %%mm3, %%mm1             \n\t"\
              "paddw %%mm3, %%mm5             \n\t"\
              "psraw $4, %%mm1                \n\t"\
              "psraw $4, %%mm5                \n\t"\
              "packuswb %%mm5, %%mm1          \n\t"\
-             "movq %%mm1, "write_offset"(%%"REG_d") \n\t"
+             "movq %%mm1, "write_offset"(%%"FF_REG_d") \n\t"
 
 #define snow_inner_add_yblock_mmx_end(s_step)\
-             "add $"s_step", %%"REG_S"             \n\t"\
-             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
-             "add %%"REG_c", (%%"REG_a")     \n\t"\
-             "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\
-             "add %%"REG_c", %0              \n\t"\
+             "add $"s_step", %%"FF_REG_S"                      \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", (%%"FF_REG_a")                 \n\t"\
+             "add"FF_OPSIZE " $"FF_PTR_SIZE"*1, %1             \n\t"\
+             "add %%"FF_REG_c", %0                             \n\t"\
              "dec %2                         \n\t"\
              "jnz 1b                         \n\t"\
              :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
              :\
              "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
-             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+             "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d"");
 
 static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
                       int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){