]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/h264chroma_mmi.c
Merge commit 'f126365cdad6327e6def8ceb918eaf538fbb97b3'
[ffmpeg] / libavcodec / mips / h264chroma_mmi.c
1 /*
2  * Loongson SIMD optimized h264chroma
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 #include "h264chroma_mips.h"
26 #include "constants.h"
27 #include "libavutil/mips/asmdefs.h"
28
29 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
30         int h, int x, int y)
31 {
32     const int A = (8 - x) * (8 - y);
33     const int B = x * (8 - y);
34     const int C = (8 - x) * y;
35     const int D = x * y;
36     const int E = B + C;
37     double ftmp[10];
38     uint64_t tmp[1];
39     mips_reg addr[1];
40
41     if (D) {
42         __asm__ volatile (
43             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
44             "dli        %[tmp0],    0x06                                \n\t"
45             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
46             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
47             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
48             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
49             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
50             "1:                                                         \n\t"
51             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
52             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
53             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
54             "gsldlc1    %[ftmp2],   0x08(%[src])                        \n\t"
55             "gsldrc1    %[ftmp2],   0x01(%[src])                        \n\t"
56             "gsldlc1    %[ftmp3],   0x07(%[addr0])                      \n\t"
57             "gsldrc1    %[ftmp3],   0x00(%[addr0])                      \n\t"
58             "gsldlc1    %[ftmp4],   0x08(%[addr0])                      \n\t"
59             "gsldrc1    %[ftmp4],   0x01(%[addr0])                      \n\t"
60
61             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
62             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
63             "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
64             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
65             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
66             "pmullh     %[ftmp7],   %[ftmp7],       %[B]                \n\t"
67             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
68             "pmullh     %[ftmp6],   %[ftmp6],       %[A]                \n\t"
69             "pmullh     %[ftmp8],   %[ftmp8],       %[B]                \n\t"
70             "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
71
72             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
73             "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]            \n\t"
74             "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]            \n\t"
75             "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]            \n\t"
76             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
77             "pmullh     %[ftmp7],   %[ftmp7],       %[D]                \n\t"
78             "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
79             "pmullh     %[ftmp6],   %[ftmp6],       %[C]                \n\t"
80             "pmullh     %[ftmp8],   %[ftmp8],       %[D]                \n\t"
81             "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
82
83             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
84             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
85             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
86             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
87             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
88             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
89             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
90             "addi       %[h],       %[h],           -0x01               \n\t"
91             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
92             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
93             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
94             "bnez       %[h],       1b                                  \n\t"
95             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
96               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
97               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
98               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
99               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
100               [tmp0]"=&r"(tmp[0]),
101               [addr0]"=&r"(addr[0]),
102               [dst]"+&r"(dst),              [src]"+&r"(src),
103               [h]"+&r"(h)
104             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
105               [A]"f"(A),                    [B]"f"(B),
106               [C]"f"(C),                    [D]"f"(D)
107             : "memory"
108         );
109     } else if (E) {
110         const int step = C ? stride : 1;
111
112         __asm__ volatile (
113             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
114             "dli        %[tmp0],    0x06                                \n\t"
115             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
116             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
117             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
118             "1:                                                         \n\t"
119             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
120             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
121             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
122             "gsldlc1    %[ftmp2],   0x07(%[addr0])                      \n\t"
123             "gsldrc1    %[ftmp2],   0x00(%[addr0])                      \n\t"
124
125             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
126             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
127             "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t"
128             "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
129             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
130             "pmullh     %[ftmp5],   %[ftmp5],       %[E]                \n\t"
131             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]            \n\t"
132             "pmullh     %[ftmp4],   %[ftmp4],       %[A]                \n\t"
133             "pmullh     %[ftmp6],   %[ftmp6],       %[E]                \n\t"
134             "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
135
136             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
137             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
138             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
139             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
140             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
141             "addi       %[h],       %[h],           -0x01               \n\t"
142             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
143             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
144             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
145             "bnez       %[h],       1b                                  \n\t"
146             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
147               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
148               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
149               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
150               [tmp0]"=&r"(tmp[0]),
151               [addr0]"=&r"(addr[0]),
152               [dst]"+&r"(dst),              [src]"+&r"(src),
153               [h]"+&r"(h)
154             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
155               [ff_pw_32]"f"(ff_pw_32),
156               [A]"f"(A),                    [E]"f"(E)
157             : "memory"
158         );
159     } else {
160         __asm__ volatile (
161             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
162             "dli        %[tmp0],    0x06                                \n\t"
163             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
164             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
165             "1:                                                         \n\t"
166             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
167             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
168             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
169             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
170             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
171             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
172             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
173             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
174             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
175             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
176             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
177             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
178             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
179
180             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
181             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
182             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
183             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
184             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
185             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
186             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
187             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
188             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
189             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
190             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
191             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
192             "addi       %[h],       %[h],           -0x02               \n\t"
193             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
194
195             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
196             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
197             "bnez       %[h],       1b                                  \n\t"
198             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
199               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
200               [ftmp4]"=&f"(ftmp[4]),
201               [tmp0]"=&r"(tmp[0]),
202               [dst]"+&r"(dst),              [src]"+&r"(src),
203               [h]"+&r"(h)
204             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
205               [A]"f"(A)
206             : "memory"
207         );
208     }
209 }
210
211 void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
212         int h, int x, int y)
213 {
214     const int A = (8 - x) * (8 - y);
215     const int B = x * (8 - y);
216     const int C = (8 - x) * y;
217     const int D = x * y;
218     const int E = B + C;
219     double ftmp[10];
220     uint64_t tmp[1];
221     mips_reg addr[1];
222
223     if (D) {
224         __asm__ volatile (
225             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
226             "dli        %[tmp0],    0x06                                \n\t"
227             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
228             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
229             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
230             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
231             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
232             "1:                                                         \n\t"
233             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
234             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
235             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
236             "gsldlc1    %[ftmp2],   0x08(%[src])                        \n\t"
237             "gsldrc1    %[ftmp2],   0x01(%[src])                        \n\t"
238             "gsldlc1    %[ftmp3],   0x07(%[addr0])                      \n\t"
239             "gsldrc1    %[ftmp3],   0x00(%[addr0])                      \n\t"
240             "gsldlc1    %[ftmp4],   0x08(%[addr0])                      \n\t"
241             "gsldrc1    %[ftmp4],   0x01(%[addr0])                      \n\t"
242
243             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
244             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
245             "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
246             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
247             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
248             "pmullh     %[ftmp7],   %[ftmp7],       %[B]                \n\t"
249             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
250             "pmullh     %[ftmp6],   %[ftmp6],       %[A]                \n\t"
251             "pmullh     %[ftmp8],   %[ftmp8],       %[B]                \n\t"
252             "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
253
254             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
255             "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]            \n\t"
256             "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]            \n\t"
257             "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]            \n\t"
258             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
259             "pmullh     %[ftmp7],   %[ftmp7],       %[D]                \n\t"
260             "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
261             "pmullh     %[ftmp6],   %[ftmp6],       %[C]                \n\t"
262             "pmullh     %[ftmp8],   %[ftmp8],       %[D]                \n\t"
263             "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
264
265             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
266             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
267             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
268             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
269             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
270             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
271             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
272             "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
273             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
274             "addi       %[h],       %[h],           -0x01               \n\t"
275             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
276             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
277             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
278             "bnez       %[h],       1b                                  \n\t"
279             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
280               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
281               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
282               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
283               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
284               [tmp0]"=&r"(tmp[0]),
285               [addr0]"=&r"(addr[0]),
286               [dst]"+&r"(dst),              [src]"+&r"(src),
287               [h]"+&r"(h)
288             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
289               [A]"f"(A),                    [B]"f"(B),
290               [C]"f"(C),                    [D]"f"(D)
291             : "memory"
292         );
293     } else if (E) {
294         const int step = C ? stride : 1;
295
296         __asm__ volatile (
297             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
298             "dli        %[tmp0],    0x06                                \n\t"
299             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
300             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
301             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
302             "1:                                                         \n\t"
303             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
304             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
305             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
306             "gsldlc1    %[ftmp2],   0x07(%[addr0])                      \n\t"
307             "gsldrc1    %[ftmp2],   0x00(%[addr0])                      \n\t"
308
309             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
310             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
311             "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t"
312             "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
313             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
314             "pmullh     %[ftmp5],   %[ftmp5],       %[E]                \n\t"
315             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]            \n\t"
316             "pmullh     %[ftmp4],   %[ftmp4],       %[A]                \n\t"
317             "pmullh     %[ftmp6],   %[ftmp6],       %[E]                \n\t"
318             "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
319
320             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
321             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
322             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
323             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
324             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
325             "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
326             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
327             "addi       %[h],       %[h],           -0x01               \n\t"
328             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
329             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
330             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
331             "bnez       %[h],       1b                                  \n\t"
332             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
333               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
334               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
335               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
336               [tmp0]"=&r"(tmp[0]),
337               [addr0]"=&r"(addr[0]),
338               [dst]"+&r"(dst),              [src]"+&r"(src),
339               [h]"+&r"(h)
340             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
341               [ff_pw_32]"f"(ff_pw_32),
342               [A]"f"(A),                    [E]"f"(E)
343             : "memory"
344         );
345     } else {
346         __asm__ volatile (
347             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
348             "dli        %[tmp0],    0x06                                \n\t"
349             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
350             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
351             "1:                                                         \n\t"
352             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
353             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
354             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
355             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
356             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
357             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
358             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
359             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
360             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
361             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
362             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
363             "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
364             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
365             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
366             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
367             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
368
369             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
370             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
371             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
372             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
373             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
374             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
375             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
376             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
377             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
378             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
379             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
380             "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
381             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
382             "addi       %[h],       %[h],           -0x02               \n\t"
383             "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
384
385             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
386             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
387             "bnez       %[h],       1b                                  \n\t"
388             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
389               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
390               [ftmp4]"=&f"(ftmp[4]),
391               [tmp0]"=&r"(tmp[0]),
392               [dst]"+&r"(dst),              [src]"+&r"(src),
393               [h]"+&r"(h)
394             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
395               [A]"f"(A)
396             : "memory"
397         );
398     }
399 }
400
401 void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
402         int h, int x, int y)
403 {
404     const int A = (8 - x) * (8 - y);
405     const int B = x * (8 - y);
406     const int C = (8 - x) *  y;
407     const int D = x *  y;
408     const int E = B + C;
409     double ftmp[8];
410     uint64_t tmp[1];
411     mips_reg addr[1];
412     uint64_t low32;
413
414     if (D) {
415         __asm__ volatile (
416             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
417             "dli        %[tmp0],    0x06                                \n\t"
418             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
419             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
420             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
421             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
422             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
423             "1:                                                         \n\t"
424             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
425             "uld        %[low32],   0x00(%[src])                        \n\t"
426             "mtc1       %[low32],   %[ftmp1]                            \n\t"
427             "uld        %[low32],   0x01(%[src])                        \n\t"
428             "mtc1       %[low32],   %[ftmp2]                            \n\t"
429             "uld        %[low32],   0x00(%[addr0])                      \n\t"
430             "mtc1       %[low32],   %[ftmp3]                            \n\t"
431             "uld        %[low32],   0x01(%[addr0])                      \n\t"
432             "mtc1       %[low32],   %[ftmp4]                            \n\t"
433
434             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
435             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
436             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
437             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
438             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
439
440             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
441             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
442             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
443             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
444             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
445
446             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
447             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
448             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
449             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
450             "addi       %[h],       %[h],           -0x01               \n\t"
451             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
452             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
453             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
454             "bnez       %[h],       1b                                  \n\t"
455             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
456               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
457               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
458               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
459               [tmp0]"=&r"(tmp[0]),
460               [addr0]"=&r"(addr[0]),
461               [dst]"+&r"(dst),              [src]"+&r"(src),
462               [h]"+&r"(h),
463               [low32]"=&r"(low32)
464             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
465               [A]"f"(A),                    [B]"f"(B),
466               [C]"f"(C),                    [D]"f"(D)
467             : "memory"
468         );
469     } else if (E) {
470         const int step = C ? stride : 1;
471
472         __asm__ volatile (
473             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
474             "dli        %[tmp0],    0x06                                \n\t"
475             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
476             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
477             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
478             "1:                                                         \n\t"
479             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
480             "uld        %[low32],   0x00(%[src])                        \n\t"
481             "mtc1       %[low32],   %[ftmp1]                            \n\t"
482             "uld        %[low32],   0x00(%[addr0])                      \n\t"
483             "mtc1       %[low32],   %[ftmp2]                            \n\t"
484
485             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
486             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
487             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
488             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
489             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
490
491             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
492             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
493             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
494             "addi       %[h],       %[h],           -0x01               \n\t"
495             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
496             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
497             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
498             "bnez       %[h],       1b                                  \n\t"
499             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
500               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
501               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
502               [tmp0]"=&r"(tmp[0]),
503               [addr0]"=&r"(addr[0]),
504               [dst]"+&r"(dst),              [src]"+&r"(src),
505               [h]"+&r"(h),
506               [low32]"=&r"(low32)
507             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
508               [ff_pw_32]"f"(ff_pw_32),
509               [A]"f"(A),                    [E]"f"(E)
510             : "memory"
511         );
512     } else {
513         __asm__ volatile (
514             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
515             "dli        %[tmp0],    0x06                                \n\t"
516             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
517             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
518             "1:                                                         \n\t"
519             "uld        %[low32],   0x00(%[src])                        \n\t"
520             "mtc1       %[low32],   %[ftmp1]                            \n\t"
521             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
522             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
523             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
524             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
525             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
526             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
527             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
528             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
529
530             "uld        %[low32],   0x00(%[src])                        \n\t"
531             "mtc1       %[low32],   %[ftmp1]                            \n\t"
532             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
533             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
534             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
535             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
536             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
537             "addi       %[h],       %[h],           -0x02               \n\t"
538             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
539
540             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
541             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
542             "bnez       %[h],       1b                                  \n\t"
543             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
544               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
545               [tmp0]"=&r"(tmp[0]),
546               [dst]"+&r"(dst),              [src]"+&r"(src),
547               [h]"+&r"(h),
548               [low32]"=&r"(low32)
549             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
550               [A]"f"(A)
551             : "memory"
552         );
553     }
554 }
555
556 void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
557         int h, int x, int y)
558 {
559     const int A = (8 - x) *(8 - y);
560     const int B = x * (8 - y);
561     const int C = (8 - x) * y;
562     const int D = x * y;
563     const int E = B + C;
564     double ftmp[8];
565     uint64_t tmp[1];
566     mips_reg addr[1];
567     uint64_t low32;
568
569     if (D) {
570         __asm__ volatile (
571             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
572             "dli        %[tmp0],    0x06                                \n\t"
573             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
574             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
575             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
576             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
577             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
578             "1:                                                         \n\t"
579             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
580             "uld        %[low32],   0x00(%[src])                        \n\t"
581             "mtc1       %[low32],   %[ftmp1]                            \n\t"
582             "uld        %[low32],   0x01(%[src])                        \n\t"
583             "mtc1       %[low32],   %[ftmp2]                            \n\t"
584             "uld        %[low32],   0x00(%[addr0])                      \n\t"
585             "mtc1       %[low32],   %[ftmp3]                            \n\t"
586             "uld        %[low32],   0x01(%[addr0])                      \n\t"
587             "mtc1       %[low32],   %[ftmp4]                            \n\t"
588
589             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
590             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
591             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
592             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
593             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
594
595             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
596             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
597             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
598             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
599             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
600
601             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
602             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
603             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
604             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
605             "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
606             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
607             "addi       %[h],       %[h],           -0x01               \n\t"
608             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
609             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
610             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
611             "bnez       %[h],       1b                                  \n\t"
612             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
613               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
614               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
615               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
616               [tmp0]"=&r"(tmp[0]),
617               [addr0]"=&r"(addr[0]),
618               [dst]"+&r"(dst),              [src]"+&r"(src),
619               [h]"+&r"(h),
620               [low32]"=&r"(low32)
621             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
622               [A]"f"(A),                    [B]"f"(B),
623               [C]"f"(C),                    [D]"f"(D)
624             : "memory"
625         );
626     } else if (E) {
627         const int step = C ? stride : 1;
628
629         __asm__ volatile (
630             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
631             "dli        %[tmp0],    0x06                                \n\t"
632             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
633             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
634             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
635             "1:                                                         \n\t"
636             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
637             "uld        %[low32],   0x00(%[src])                        \n\t"
638             "mtc1       %[low32],   %[ftmp1]                            \n\t"
639             "uld        %[low32],   0x00(%[addr0])                      \n\t"
640             "mtc1       %[low32],   %[ftmp2]                            \n\t"
641
642             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
643             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
644             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
645             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
646             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
647
648             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
649             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
650             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
651             "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
652             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
653             "addi       %[h],       %[h],           -0x01               \n\t"
654             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
655             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
656             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
657             "bnez       %[h],       1b                                  \n\t"
658             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
659               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
660               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
661               [tmp0]"=&r"(tmp[0]),
662               [addr0]"=&r"(addr[0]),
663               [dst]"+&r"(dst),              [src]"+&r"(src),
664               [h]"+&r"(h),
665               [low32]"=&r"(low32)
666             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
667               [ff_pw_32]"f"(ff_pw_32),
668               [A]"f"(A),                    [E]"f"(E)
669             : "memory"
670         );
671     } else {
672         __asm__ volatile (
673             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
674             "dli        %[tmp0],    0x06                                \n\t"
675             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
676             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
677             "1:                                                         \n\t"
678             "uld        %[low32],   0x00(%[src])                        \n\t"
679             "mtc1       %[low32],   %[ftmp1]                            \n\t"
680             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
681             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
682             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
683             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
684             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
685             "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
686             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
687             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
688             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
689             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
690
691             "uld        %[low32],   0x00(%[src])                        \n\t"
692             "mtc1       %[low32],   %[ftmp1]                            \n\t"
693             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
694             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
695             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
696             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
697             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
698             "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
699             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
700             "addi       %[h],       %[h],           -0x02               \n\t"
701             "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
702
703             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
704             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
705             "bnez       %[h],       1b                                  \n\t"
706             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
707               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
708               [tmp0]"=&r"(tmp[0]),
709               [dst]"+&r"(dst),              [src]"+&r"(src),
710               [h]"+&r"(h),
711               [low32]"=&r"(low32)
712             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
713               [A]"f"(A)
714             : "memory"
715         );
716     }
717 }