]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/h264dsp_mmi.c
Merge commit 'e2399e0c1aeb110456405d23e211066fab6cb041'
[ffmpeg] / libavcodec / mips / h264dsp_mmi.c
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *                    Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavcodec/bit_depth_template.c"
27 #include "h264dsp_mips.h"
28 #include "libavutil/mips/mmiutils.h"
29
30 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
31 {
32     double ftmp[9];
33     DECLARE_VAR_LOW32;
34     DECLARE_VAR_ALL64;
35
36     __asm__ volatile (
37         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
38         MMI_LDC1(%[ftmp1], %[src], 0x00)
39         MMI_LDC1(%[ftmp2], %[src], 0x08)
40         MMI_LDC1(%[ftmp3], %[src], 0x10)
41         MMI_LDC1(%[ftmp4], %[src], 0x18)
42         MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
43         MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
44         MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
45         MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
46         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
47         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
48         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
49         "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
50         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
51         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
52         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
53         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
54         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
55         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
56         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
57         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
58         MMI_SWC1(%[ftmp1], %[dst0], 0x00)
59         MMI_SWC1(%[ftmp2], %[dst1], 0x00)
60         MMI_SWC1(%[ftmp3], %[dst2], 0x00)
61         MMI_SWC1(%[ftmp4], %[dst3], 0x00)
62         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
63           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
64           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
65           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
66           RESTRICT_ASM_LOW32
67           RESTRICT_ASM_ALL64
68           [ftmp8]"=&f"(ftmp[8])
69         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
70           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
71           [src]"r"(src)
72         : "memory"
73     );
74
75     memset(src, 0, 32);
76 }
77
78 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
79 {
80     double ftmp[12];
81     uint64_t tmp[1];
82     DECLARE_VAR_LOW32;
83     DECLARE_VAR_ALL64;
84     DECLARE_VAR_ADDRT;
85
86     __asm__ volatile (
87         "dli        %[tmp0],    0x01                                    \n\t"
88         MMI_LDC1(%[ftmp0], %[block], 0x00)
89         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
90         MMI_LDC1(%[ftmp1], %[block], 0x08)
91         "dli        %[tmp0],    0x06                                    \n\t"
92         MMI_LDC1(%[ftmp2], %[block], 0x10)
93         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
94         "psrah      %[ftmp4],   %[ftmp1],       %[ftmp8]                \n\t"
95         MMI_LDC1(%[ftmp3], %[block], 0x18)
96         "psrah      %[ftmp5],   %[ftmp3],       %[ftmp8]                \n\t"
97         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
98         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
99         "paddh      %[ftmp10],  %[ftmp2],       %[ftmp0]                \n\t"
100         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
101         "paddh      %[ftmp11],  %[ftmp5],       %[ftmp10]               \n\t"
102         "psubh      %[ftmp2],   %[ftmp10],      %[ftmp5]                \n\t"
103         "paddh      %[ftmp10],  %[ftmp4],       %[ftmp0]                \n\t"
104         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
105         "punpckhhw  %[ftmp1],   %[ftmp11],      %[ftmp10]               \n\t"
106         "punpcklhw  %[ftmp5],   %[ftmp11],      %[ftmp10]               \n\t"
107         "punpckhhw  %[ftmp4],   %[ftmp0],       %[ftmp2]                \n\t"
108         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
109         "punpckhwd  %[ftmp2],   %[ftmp5],       %[ftmp0]                \n\t"
110         "punpcklwd  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
111         "punpcklwd  %[ftmp10],  %[ftmp1],       %[ftmp4]                \n\t"
112         "punpckhwd  %[ftmp0],   %[ftmp1],       %[ftmp4]                \n\t"
113         "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_32]             \n\t"
114         "psrah      %[ftmp4],   %[ftmp2],       %[ftmp8]                \n\t"
115         "psrah      %[ftmp3],   %[ftmp0],       %[ftmp8]                \n\t"
116         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
117         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
118         "paddh      %[ftmp1],   %[ftmp10],      %[ftmp5]                \n\t"
119         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
120         "paddh      %[ftmp10],  %[ftmp3],       %[ftmp1]                \n\t"
121         "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
122         "paddh      %[ftmp11],  %[ftmp4],       %[ftmp5]                \n\t"
123         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
124         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
125         MMI_SDC1(%[ftmp7], %[block], 0x00)
126         MMI_SDC1(%[ftmp7], %[block], 0x08)
127         MMI_SDC1(%[ftmp7], %[block], 0x10)
128         MMI_SDC1(%[ftmp7], %[block], 0x18)
129         MMI_ULWC1(%[ftmp2], %[dst], 0x00)
130         "psrah      %[ftmp3],   %[ftmp10],      %[ftmp9]                \n\t"
131         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
132         "psrah      %[ftmp4],   %[ftmp11],      %[ftmp9]                \n\t"
133         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
134         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
135         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
136         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
137         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
138         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
139         MMI_SWC1(%[ftmp2], %[dst], 0x00)
140         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
142         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
143         MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
145         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
147         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
148         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
149         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
150         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
151         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
152         MMI_SWC1(%[ftmp2], %[dst], 0x00)
153         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
154         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
156           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
157           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
158           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
159           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
160           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
161           RESTRICT_ASM_LOW32
162           RESTRICT_ASM_ALL64
163           RESTRICT_ASM_ADDRT
164           [tmp0]"=&r"(tmp[0])
165         : [dst]"r"(dst),                    [block]"r"(block),
166           [stride]"r"((mips_reg)stride),    [ff_pw_32]"f"(ff_pw_32)
167         : "memory"
168     );
169
170     memset(block, 0, 32);
171 }
172
173 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
174 {
175     double ftmp[16];
176     uint64_t tmp[7];
177     mips_reg addr[1];
178     DECLARE_VAR_LOW32;
179     DECLARE_VAR_ALL64;
180     DECLARE_VAR_ADDRT;
181
182     __asm__ volatile (
183         "lhu        %[tmp0],    0x00(%[block])                          \n\t"
184         PTR_ADDI   "$29,        $29,            -0x20                   \n\t"
185         PTR_ADDIU  "%[tmp0],    %[tmp0],        0x20                    \n\t"
186         MMI_LDC1(%[ftmp1], %[block], 0x10)
187         "sh         %[tmp0],    0x00(%[block])                          \n\t"
188         MMI_LDC1(%[ftmp2], %[block], 0x20)
189         "dli        %[tmp0],    0x01                                    \n\t"
190         MMI_LDC1(%[ftmp3], %[block], 0x30)
191         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
192         MMI_LDC1(%[ftmp5], %[block], 0x50)
193         MMI_LDC1(%[ftmp6], %[block], 0x60)
194         MMI_LDC1(%[ftmp7], %[block], 0x70)
195         "mov.d      %[ftmp0],   %[ftmp1]                                \n\t"
196         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
197         "psrah      %[ftmp4],   %[ftmp5],       %[ftmp8]                \n\t"
198         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
199         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
200         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
201         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
202         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
203         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
204         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
205         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
206         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
207         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
208         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
209         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
210         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
211         "dli        %[tmp0],    0x02                                    \n\t"
212         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
213         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
214         "mov.d      %[ftmp7],   %[ftmp1]                                \n\t"
215         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
216         "psrah      %[ftmp3],   %[ftmp4],       %[ftmp9]                \n\t"
217         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
218         "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
219         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
220         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
221         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
222         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
223         "mov.d      %[ftmp5],   %[ftmp6]                                \n\t"
224         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
225         "psrah      %[ftmp4],   %[ftmp2],       %[ftmp8]                \n\t"
226         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
227         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
228         MMI_LDC1(%[ftmp2], %[block], 0x00)
229         MMI_LDC1(%[ftmp5], %[block], 0x40)
230         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
231         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
232         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
233         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
234         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
235         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
236         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
237         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
238         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
239         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
240         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
241         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
242         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
243         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
244         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
245         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
246         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
247         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
248         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
249         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
250         MMI_SDC1(%[ftmp6], %[block], 0x00)
251         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
252         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp0]                \n\t"
253         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
254         "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp1]                \n\t"
255         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
256         "punpckhwd  %[ftmp1],   %[ftmp7],       %[ftmp3]                \n\t"
257         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
258         "punpckhwd  %[ftmp3],   %[ftmp6],       %[ftmp0]                \n\t"
259         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
260         MMI_LDC1(%[ftmp0], %[block], 0x00)
261         MMI_SDC1(%[ftmp7], $29, 0x00)
262         MMI_SDC1(%[ftmp1], $29, 0x10)
263         "dmfc1      %[tmp1],    %[ftmp6]                                \n\t"
264         "dmfc1      %[tmp3],    %[ftmp3]                                \n\t"
265         "punpckhhw  %[ftmp3],   %[ftmp5],       %[ftmp2]                \n\t"
266         "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
267         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp0]                \n\t"
268         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
269         "punpckhwd  %[ftmp0],   %[ftmp5],       %[ftmp4]                \n\t"
270         "punpcklwd  %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
271         "punpckhwd  %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
272         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
273         MMI_SDC1(%[ftmp5], $29, 0x08)
274         MMI_SDC1(%[ftmp0], $29, 0x18)
275         "dmfc1      %[tmp2],    %[ftmp3]                                \n\t"
276         "dmfc1      %[tmp4],    %[ftmp4]                                \n\t"
277         MMI_LDC1(%[ftmp1], %[block], 0x18)
278         MMI_LDC1(%[ftmp6], %[block], 0x28)
279         MMI_LDC1(%[ftmp2], %[block], 0x38)
280         MMI_LDC1(%[ftmp0], %[block], 0x58)
281         MMI_LDC1(%[ftmp3], %[block], 0x68)
282         MMI_LDC1(%[ftmp4], %[block], 0x78)
283         "mov.d      %[ftmp7],   %[ftmp1]                                \n\t"
284         "psrah      %[ftmp5],   %[ftmp0],       %[ftmp8]                \n\t"
285         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
286         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
287         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
288         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
289         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
290         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
291         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
292         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
293         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
294         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
295         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
296         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
297         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
298         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
299         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
300         "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
301         "psrah      %[ftmp2],   %[ftmp5],       %[ftmp9]                \n\t"
302         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
303         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
304         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
305         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
306         "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
307         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
308         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
309         "mov.d      %[ftmp0],   %[ftmp3]                                \n\t"
310         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
311         "psrah      %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
312         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
313         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
314         MMI_LDC1(%[ftmp6], %[block], 0x08)
315         MMI_LDC1(%[ftmp0], %[block], 0x48)
316         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
317         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
318         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
319         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
320         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
321         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
322         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
323         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
324         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
325         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
326         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
327         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
328         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
329         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
330         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
331         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
332         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
333         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
334         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
335         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
336         MMI_SDC1(%[ftmp3], %[block], 0x08)
337         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
338         "punpckhhw  %[ftmp3],   %[ftmp4],       %[ftmp7]                \n\t"
339         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
340         "punpckhhw  %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
341         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
342         "punpckhwd  %[ftmp1],   %[ftmp4],       %[ftmp2]                \n\t"
343         "punpcklwd  %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
344         "punpckhwd  %[ftmp2],   %[ftmp3],       %[ftmp7]                \n\t"
345         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
346         MMI_LDC1(%[ftmp7], %[block], 0x08)
347         "dmfc1      %[tmp5],    %[ftmp4]                                \n\t"
348         "mov.d      %[ftmp10],  %[ftmp1]                                \n\t"
349         "mov.d      %[ftmp12],  %[ftmp3]                                \n\t"
350         "mov.d      %[ftmp14],  %[ftmp2]                                \n\t"
351         "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp6]                \n\t"
352         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
353         "punpckhhw  %[ftmp6],   %[ftmp5],       %[ftmp7]                \n\t"
354         "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
355         "punpckhwd  %[ftmp7],   %[ftmp0],       %[ftmp5]                \n\t"
356         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
357         "punpckhwd  %[ftmp5],   %[ftmp2],       %[ftmp6]                \n\t"
358         "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
359         "dmfc1      %[tmp6],    %[ftmp0]                                \n\t"
360         "mov.d      %[ftmp11],  %[ftmp7]                                \n\t"
361         "mov.d      %[ftmp13],  %[ftmp2]                                \n\t"
362         "mov.d      %[ftmp15],  %[ftmp5]                                \n\t"
363         PTR_ADDIU  "%[addr0],   %[dst],         0x04                    \n\t"
364         "mov.d      %[ftmp7],   %[ftmp10]                               \n\t"
365         "dmtc1      %[tmp3],    %[ftmp6]                                \n\t"
366         MMI_LDC1(%[ftmp1], $29, 0x10)
367         "dmtc1      %[tmp1],    %[ftmp3]                                \n\t"
368         "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
369         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
370         "psrah      %[ftmp0],   %[ftmp7],       %[ftmp8]                \n\t"
371         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
372         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
373         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
374         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp14]               \n\t"
375         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
376         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
377         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
378         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
379         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
380         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp14]               \n\t"
381         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp14]               \n\t"
382         "psrah      %[ftmp5],   %[ftmp14],      %[ftmp8]                \n\t"
383         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
384         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
385         "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
386         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
387         "psrah      %[ftmp6],   %[ftmp0],       %[ftmp9]                \n\t"
388         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
389         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
390         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
391         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
392         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
393         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
394         "mov.d      %[ftmp7],   %[ftmp12]                               \n\t"
395         "psrah      %[ftmp2],   %[ftmp12],      %[ftmp8]                \n\t"
396         "psrah      %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
397         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
398         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
399         MMI_LDC1(%[ftmp3], $29, 0x00)
400         "dmtc1      %[tmp5],    %[ftmp7]                                \n\t"
401         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
402         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
403         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
404         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
405         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
406         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
407         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
408         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
409         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
410         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
411         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
412         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
413         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
414         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
415         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
416         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
417         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
418         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
419         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
420         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
421         MMI_SDC1(%[ftmp3], $29, 0x00)
422         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
423         MMI_SDC1(%[ftmp0], $29, 0x10)
424         "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
425         "xor        %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
426         MMI_SDC1(%[ftmp2], %[block], 0x00)
427         MMI_SDC1(%[ftmp2], %[block], 0x08)
428         MMI_SDC1(%[ftmp2], %[block], 0x10)
429         MMI_SDC1(%[ftmp2], %[block], 0x18)
430         MMI_SDC1(%[ftmp2], %[block], 0x20)
431         MMI_SDC1(%[ftmp2], %[block], 0x28)
432         MMI_SDC1(%[ftmp2], %[block], 0x30)
433         MMI_SDC1(%[ftmp2], %[block], 0x38)
434         MMI_SDC1(%[ftmp2], %[block], 0x40)
435         MMI_SDC1(%[ftmp2], %[block], 0x48)
436         MMI_SDC1(%[ftmp2], %[block], 0x50)
437         MMI_SDC1(%[ftmp2], %[block], 0x58)
438         MMI_SDC1(%[ftmp2], %[block], 0x60)
439         MMI_SDC1(%[ftmp2], %[block], 0x68)
440         MMI_SDC1(%[ftmp2], %[block], 0x70)
441         MMI_SDC1(%[ftmp2], %[block], 0x78)
442         "dli        %[tmp3],    0x06                                    \n\t"
443         "mtc1       %[tmp3],    %[ftmp10]                               \n\t"
444         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
445         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
446         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
447         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
448         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
449         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
450         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
451         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
452         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
453         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
454         MMI_SWC1(%[ftmp3], %[dst], 0x00)
455         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
456         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
457         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
458         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
459         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
460         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp10]               \n\t"
461         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
462         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
463         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
464         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
465         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
466         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
467         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
468         MMI_SWC1(%[ftmp3], %[dst], 0x00)
469         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
470         MMI_LDC1(%[ftmp5], $29, 0x00)
471         MMI_LDC1(%[ftmp4], $29, 0x10)
472         "dmtc1      %[tmp1],    %[ftmp6]                                \n\t"
473         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
474         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
475         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
476         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
477         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
478         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
479         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
480         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
481         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
482         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
483         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
484         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
485         MMI_SWC1(%[ftmp3], %[dst], 0x00)
486         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
487         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
488         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
489         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
490         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
491         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
492         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp10]               \n\t"
493         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
494         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
495         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
496         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
497         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
498         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
499         MMI_SWC1(%[ftmp3], %[dst], 0x00)
500         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
501         "dmtc1      %[tmp4],    %[ftmp1]                                \n\t"
502         "dmtc1      %[tmp2],    %[ftmp6]                                \n\t"
503         MMI_LDC1(%[ftmp4], $29, 0x18)
504         "mov.d      %[ftmp5],   %[ftmp4]                                \n\t"
505         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
506         "psrah      %[ftmp7],   %[ftmp11],      %[ftmp8]                \n\t"
507         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
508         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
509         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp15]               \n\t"
510         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp11]               \n\t"
511         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
512         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
513         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
514         "psubh      %[ftmp3],   %[ftmp11],      %[ftmp1]                \n\t"
515         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
516         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp15]               \n\t"
517         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp15]               \n\t"
518         "psrah      %[ftmp2],   %[ftmp15],      %[ftmp8]                \n\t"
519         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
520         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
521         "mov.d      %[ftmp2],   %[ftmp4]                                \n\t"
522         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
523         "psrah      %[ftmp1],   %[ftmp7],       %[ftmp9]                \n\t"
524         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
525         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
526         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
527         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
528         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
529         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
530         "mov.d      %[ftmp3],   %[ftmp13]                               \n\t"
531         "psrah      %[ftmp0],   %[ftmp13],      %[ftmp8]                \n\t"
532         "psrah      %[ftmp7],   %[ftmp6],       %[ftmp8]                \n\t"
533         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
534         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
535         MMI_LDC1(%[ftmp6], $29, 0x08)
536         "dmtc1      %[tmp6],    %[ftmp3]                                \n\t"
537         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
538         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
539         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
540         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
541         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
542         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
543         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
544         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
545         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
546         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
547         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
548         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
549         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
550         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
551         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
552         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
553         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
554         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
555         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
556         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
557         MMI_SDC1(%[ftmp6], $29, 0x08)
558         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
559         MMI_SDC1(%[ftmp7], $29, 0x18)
560         "dmfc1      %[tmp2],    %[ftmp0]                                \n\t"
561         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
562         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
563         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
564         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
565         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
566         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
567         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
568         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
569         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
570         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
571         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
572         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
573         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
574         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
575         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
576         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
577         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
578         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
579         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
580         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
581         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
582         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
583         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
584         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
585         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
586         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
587         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
588         MMI_LDC1(%[ftmp2], $29, 0x08)
589         MMI_LDC1(%[ftmp5], $29, 0x18)
590         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
591         "dmtc1      %[tmp2],    %[ftmp1]                                \n\t"
592         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
593         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
594         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
595         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp10]               \n\t"
596         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
597         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
598         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
599         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
600         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
601         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
602         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
603         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
604         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
605         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
606         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
607         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
608         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
609         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
610         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
611         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
612         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
613         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
614         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
615         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
616         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
617         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
618         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
619         PTR_ADDIU  "$29,        $29,            0x20                    \n\t"
620         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
621           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
622           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
623           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
624           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
625           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
626           [ftmp12]"=&f"(ftmp[12]),          [ftmp13]"=&f"(ftmp[13]),
627           [ftmp14]"=&f"(ftmp[14]),          [ftmp15]"=&f"(ftmp[15]),
628           [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
629           [tmp2]"=&r"(tmp[2]),              [tmp3]"=&r"(tmp[3]),
630           [tmp4]"=&r"(tmp[4]),              [tmp5]"=&r"(tmp[5]),
631           [tmp6]"=&r"(tmp[6]),
632           RESTRICT_ASM_LOW32
633           RESTRICT_ASM_ALL64
634           RESTRICT_ASM_ADDRT
635           [addr0]"=&r"(addr[0])
636         : [dst]"r"(dst),                    [block]"r"(block),
637           [stride]"r"((mips_reg)stride)
638         : "$29","memory"
639     );
640
641     memset(block, 0, 128);
642 }
643
644 void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
645 {
646     int dc = (block[0] + 32) >> 6;
647     double ftmp[6];
648     DECLARE_VAR_LOW32;
649
650     block[0] = 0;
651
652     __asm__ volatile (
653         "mtc1       %[dc],      %[ftmp5]                                \n\t"
654         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
655         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
656         MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
657         MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
658         MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
659         MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
660         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
661         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
662         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
663         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
664         "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
665         "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
666         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
667         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
668         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
669         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
670         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
671         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
672         MMI_SWC1(%[ftmp1], %[dst0], 0x00)
673         MMI_SWC1(%[ftmp2], %[dst1], 0x00)
674         MMI_SWC1(%[ftmp3], %[dst2], 0x00)
675         MMI_SWC1(%[ftmp4], %[dst3], 0x00)
676         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
677           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
678           [ftmp4]"=&f"(ftmp[4]),
679           RESTRICT_ASM_LOW32
680           [ftmp5]"=&f"(ftmp[5])
681         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
682           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
683           [dc]"r"(dc)
684         : "memory"
685     );
686 }
687
688 void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
689 {
690     int dc = (block[0] + 32) >> 6;
691     double ftmp[10];
692     DECLARE_VAR_ALL64;
693
694     block[0] = 0;
695
696     __asm__ volatile (
697         "mtc1       %[dc],      %[ftmp5]                                \n\t"
698         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
699         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
700         MMI_LDC1(%[ftmp1], %[dst0], 0x00)
701         MMI_LDC1(%[ftmp2], %[dst1], 0x00)
702         MMI_LDC1(%[ftmp3], %[dst2], 0x00)
703         MMI_LDC1(%[ftmp4], %[dst3], 0x00)
704         "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
705         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
706         "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
707         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
708         "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
709         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
710         "punpckhbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
711         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
712         "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
713         "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
714         "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
715         "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
716         "paddsh     %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
717         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
718         "paddsh     %[ftmp9],   %[ftmp9],       %[ftmp5]                \n\t"
719         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
720         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
721         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
722         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
723         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
724         MMI_SDC1(%[ftmp1], %[dst0], 0x00)
725         MMI_SDC1(%[ftmp2], %[dst1], 0x00)
726         MMI_SDC1(%[ftmp3], %[dst2], 0x00)
727         MMI_SDC1(%[ftmp4], %[dst3], 0x00)
728
729         MMI_LDC1(%[ftmp1], %[dst4], 0x00)
730         MMI_LDC1(%[ftmp2], %[dst5], 0x00)
731         MMI_LDC1(%[ftmp3], %[dst6], 0x00)
732         MMI_LDC1(%[ftmp4], %[dst7], 0x00)
733         "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
734         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
735         "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
736         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
737         "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
738         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
739         "punpckhbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
740         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
741         "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
742         "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
743         "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
744         "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
745         "paddsh     %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
746         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
747         "paddsh     %[ftmp9],   %[ftmp9],       %[ftmp5]                \n\t"
748         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
749         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
750         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
751         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
752         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
753         MMI_SDC1(%[ftmp1], %[dst4], 0x00)
754         MMI_SDC1(%[ftmp2], %[dst5], 0x00)
755         MMI_SDC1(%[ftmp3], %[dst6], 0x00)
756         MMI_SDC1(%[ftmp4], %[dst7], 0x00)
757         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
758           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
759           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
760           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
761           [ftmp8]"=&f"(ftmp[8]),
762           RESTRICT_ASM_ALL64
763           [ftmp9]"=&f"(ftmp[9])
764         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
765           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
766           [dst4]"r"(dst+4*stride),          [dst5]"r"(dst+5*stride),
767           [dst6]"r"(dst+6*stride),          [dst7]"r"(dst+7*stride),
768           [dc]"r"(dc)
769         : "memory"
770     );
771 }
772
773 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
774         int16_t *block, int stride, const uint8_t nnzc[15*8])
775 {
776     int i;
777     for(i=0; i<16; i++){
778         int nnz = nnzc[ scan8[i] ];
779         if(nnz){
780             if(nnz==1 && ((int16_t*)block)[i*16])
781                 ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
782                         stride);
783             else
784                 ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
785                         stride);
786         }
787     }
788 }
789
790 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
791         int16_t *block, int stride, const uint8_t nnzc[15*8])
792 {
793     int i;
794     for(i=0; i<16; i++){
795         if(nnzc[ scan8[i] ])
796             ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
797         else if(((int16_t*)block)[i*16])
798             ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
799                     stride);
800     }
801 }
802
803 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
804         int16_t *block, int stride, const uint8_t nnzc[15*8])
805 {
806     int i;
807     for(i=0; i<16; i+=4){
808         int nnz = nnzc[ scan8[i] ];
809         if(nnz){
810             if(nnz==1 && ((int16_t*)block)[i*16])
811                 ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
812                         block + i*16, stride);
813             else
814                 ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
815                         stride);
816         }
817     }
818 }
819
820 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
821         int16_t *block, int stride, const uint8_t nnzc[15*8])
822 {
823     int i, j;
824     for(j=1; j<3; j++){
825         for(i=j*16; i<j*16+4; i++){
826             if(nnzc[ scan8[i] ])
827                 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
828                         block + i*16, stride);
829             else if(((int16_t*)block)[i*16])
830                 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
831                         block + i*16, stride);
832         }
833     }
834 }
835
836 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
837         int16_t *block, int stride, const uint8_t nnzc[15*8])
838 {
839     int i, j;
840
841     for(j=1; j<3; j++){
842         for(i=j*16; i<j*16+4; i++){
843             if(nnzc[ scan8[i] ])
844                 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
845                         block + i*16, stride);
846             else if(((int16_t*)block)[i*16])
847                 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
848                         block + i*16, stride);
849         }
850     }
851
852     for(j=1; j<3; j++){
853         for(i=j*16+4; i<j*16+8; i++){
854             if(nnzc[ scan8[i+4] ])
855                 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
856                         block + i*16, stride);
857             else if(((int16_t*)block)[i*16])
858                 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
859                         block + i*16, stride);
860         }
861     }
862 }
863
864 void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
865         int qmul)
866 {
867     double ftmp[10];
868     uint64_t tmp[2];
869     DECLARE_VAR_ALL64;
870
871     __asm__ volatile (
872         ".set       noreorder                                           \n\t"
873         "dli        %[tmp0],    0x08                                    \n\t"
874         MMI_LDC1(%[ftmp3], %[input], 0x18)
875         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
876         MMI_LDC1(%[ftmp2], %[input], 0x10)
877         "dli        %[tmp0],    0x20                                    \n\t"
878         MMI_LDC1(%[ftmp1], %[input], 0x08)
879         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
880         MMI_LDC1(%[ftmp0], %[input], 0x00)
881         "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
882         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
883         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
884         "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
885         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
886         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
887         "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
888         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
889         "psubh      %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
890         "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
891         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
892         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
893         "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
894         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
895         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
896         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
897         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
898         "punpckhwd  %[ftmp2],   %[ftmp3],       %[ftmp0]                \n\t"
899         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
900         "mov.d      %[ftmp0],   %[ftmp4]                                \n\t"
901         "punpcklwd  %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
902         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
903         "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
904         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
905         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
906         "mov.d      %[ftmp1],   %[ftmp2]                                \n\t"
907         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
908         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
909         "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
910         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
911         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
912         "mov.d      %[ftmp1],   %[ftmp4]                                \n\t"
913         "daddi      %[tmp0],    %[qmul],        -0x7fff                 \n\t"
914         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
915         "bgtz       %[tmp0],    1f                                      \n\t"
916         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
917         "ori        %[tmp0],    $0,             0x80                    \n\t"
918         "dsll       %[tmp0],    %[tmp0],        0x10                    \n\t"
919         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ff_pw_1]              \n\t"
920         "daddu      %[qmul],    %[qmul],        %[tmp0]                 \n\t"
921         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ff_pw_1]              \n\t"
922         "punpckhhw  %[ftmp5],   %[ftmp2],       %[ff_pw_1]              \n\t"
923         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ff_pw_1]              \n\t"
924         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
925         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
926         "pmaddhw    %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
927         "pmaddhw    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
928         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
929         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
930         "psraw      %[ftmp0],   %[ftmp0],       %[ftmp8]                \n\t"
931         "psraw      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
932         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
933         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
934         "packsswh   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
935         "packsswh   %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
936         "dmfc1      %[tmp1],    %[ftmp0]                                \n\t"
937         "dsrl       %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
938         "mfc1       %[input],   %[ftmp0]                                \n\t"
939         "sh         %[tmp1],    0x00(%[output])                         \n\t"
940         "sh         %[input],   0x80(%[output])                         \n\t"
941         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
942         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
943         "sh         %[tmp1],    0x20(%[output])                         \n\t"
944         "sh         %[input],   0xa0(%[output])                         \n\t"
945         "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
946         "dsrl       %[ftmp2],   %[ftmp2],       %[ftmp9]                \n\t"
947         "mfc1       %[input],   %[ftmp2]                                \n\t"
948         "sh         %[tmp1],    0x40(%[output])                         \n\t"
949         "sh         %[input],   0xc0(%[output])                         \n\t"
950         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
951         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
952         "sh         %[tmp1],    0x60(%[output])                         \n\t"
953         "sh         %[input],   0xe0(%[output])                         \n\t"
954         "punpckhhw  %[ftmp1],   %[ftmp3],       %[ff_pw_1]              \n\t"
955         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ff_pw_1]              \n\t"
956         "punpckhhw  %[ftmp5],   %[ftmp4],       %[ff_pw_1]              \n\t"
957         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ff_pw_1]              \n\t"
958         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
959         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
960         "pmaddhw    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
961         "pmaddhw    %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
962         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
963         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
964         "psraw      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
965         "psraw      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
966         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
967         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
968         "packsswh   %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
969         "packsswh   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
970         "dmfc1      %[tmp1],    %[ftmp3]                                \n\t"
971         "dsrl       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
972         "mfc1       %[input],   %[ftmp3]                                \n\t"
973         "sh         %[tmp1],    0x100(%[output])                        \n\t"
974         "sh         %[input],   0x180(%[output])                        \n\t"
975         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
976         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
977         "sh         %[tmp1],    0x120(%[output])                        \n\t"
978         "sh         %[input],   0x1a0(%[output])                        \n\t"
979         "dmfc1      %[tmp1],    %[ftmp4]                                \n\t"
980         "dsrl       %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
981         "mfc1       %[input],   %[ftmp4]                                \n\t"
982         "sh         %[tmp1],    0x140(%[output])                        \n\t"
983         "sh         %[input],   0x1c0(%[output])                        \n\t"
984         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
985         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
986         "sh         %[tmp1],    0x160(%[output])                        \n\t"
987         "j          2f                                                  \n\t"
988         "sh         %[input],   0x1e0(%[output])                        \n\t"
989         "1:                                                             \n\t"
990         "ori        %[tmp0],    $0,             0x1f                    \n\t"
991 #if HAVE_LOONGSON3
992         "clz        %[tmp1],    %[qmul]                                 \n\t"
993 #elif HAVE_LOONGSON2
994 #endif
995         "ori        %[input],   $0,             0x07                    \n\t"
996         "dsubu      %[tmp1],    %[tmp0],        %[tmp1]                 \n\t"
997         "ori        %[tmp0],    $0,             0x80                    \n\t"
998         "dsll       %[tmp0],    %[tmp0],        0x10                    \n\t"
999         "daddu      %[qmul],    %[qmul],        %[tmp0]                 \n\t"
1000         "dsubu      %[tmp0],    %[tmp1],        %[input]                \n\t"
1001         "movn       %[tmp1],    %[input],       %[tmp0]                 \n\t"
1002         PTR_ADDIU  "%[input],   %[input],       0x01                    \n\t"
1003         "andi       %[tmp0],    %[tmp1],        0xff                    \n\t"
1004         "srlv       %[qmul],    %[qmul],        %[tmp0]                 \n\t"
1005         PTR_SUBU   "%[input],   %[input],       %[tmp1]                 \n\t"
1006         "mtc1       %[input],   %[ftmp6]                                \n\t"
1007         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ff_pw_1]              \n\t"
1008         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ff_pw_1]              \n\t"
1009         "punpckhhw  %[ftmp5],   %[ftmp2],       %[ff_pw_1]              \n\t"
1010         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ff_pw_1]              \n\t"
1011         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
1012         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1013         "pmaddhw    %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
1014         "pmaddhw    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1015         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1016         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1017         "psraw      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
1018         "psraw      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1019         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
1020         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1021         "packsswh   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
1022         "packsswh   %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1023         "dmfc1      %[tmp1],    %[ftmp0]                                \n\t"
1024         "dsrl       %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
1025         "sh         %[tmp1],    0x00(%[output])                         \n\t"
1026         "mfc1       %[input],   %[ftmp0]                                \n\t"
1027         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1028         "sh         %[input],   0x80(%[output])                         \n\t"
1029         "sh         %[tmp1],    0x20(%[output])                         \n\t"
1030         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1031         "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
1032         "sh         %[input],   0xa0(%[output])                         \n\t"
1033         "dsrl       %[ftmp2],   %[ftmp2],       %[ftmp9]                \n\t"
1034         "sh         %[tmp1],    0x40(%[output])                         \n\t"
1035         "mfc1       %[input],   %[ftmp2]                                \n\t"
1036         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1037         "sh         %[input],   0xc0(%[output])                         \n\t"
1038         "sh         %[tmp1],    0x60(%[output])                         \n\t"
1039         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1040         "sh         %[input],   0xe0(%[output])                         \n\t"
1041         "punpckhhw  %[ftmp1],   %[ftmp3],       %[ff_pw_1]              \n\t"
1042         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ff_pw_1]              \n\t"
1043         "punpckhhw  %[ftmp5],   %[ftmp4],       %[ff_pw_1]              \n\t"
1044         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ff_pw_1]              \n\t"
1045         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
1046         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1047         "pmaddhw    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1048         "pmaddhw    %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
1049         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1050         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1051         "psraw      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
1052         "psraw      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1053         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
1054         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1055         "packsswh   %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
1056         "packsswh   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1057         "dmfc1      %[tmp1],    %[ftmp3]                                \n\t"
1058         "dsrl       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
1059         "mfc1       %[input],   %[ftmp3]                                \n\t"
1060         "sh         %[tmp1],    0x100(%[output])                        \n\t"
1061         "sh         %[input],   0x180(%[output])                        \n\t"
1062         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1063         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1064         "sh         %[tmp1],    0x120(%[output])                        \n\t"
1065         "sh         %[input],   0x1a0(%[output])                        \n\t"
1066         "dmfc1      %[tmp1],    %[ftmp4]                                \n\t"
1067         "dsrl       %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
1068         "mfc1       %[input],   %[ftmp4]                                \n\t"
1069         "sh         %[tmp1],    0x140(%[output])                        \n\t"
1070         "sh         %[input],   0x1c0(%[output])                        \n\t"
1071         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1072         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1073         "sh         %[tmp1],    0x160(%[output])                        \n\t"
1074         "sh         %[input],   0x1e0(%[output])                        \n\t"
1075         "2:                                                             \n\t"
1076         ".set       reorder                                             \n\t"
1077         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1078           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1079           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1080           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1081           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1082           [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
1083           RESTRICT_ASM_ALL64
1084           [output]"+&r"(output),            [input]"+&r"(input),
1085           [qmul]"+&r"(qmul)
1086         : [ff_pw_1]"f"(ff_pw_1)
1087         : "memory"
1088     );
1089 }
1090
1091 void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1092 {
1093     int temp[8];
1094     int t[8];
1095
1096     temp[0] = block[0] + block[16];
1097     temp[1] = block[0] - block[16];
1098     temp[2] = block[32] + block[48];
1099     temp[3] = block[32] - block[48];
1100     temp[4] = block[64] + block[80];
1101     temp[5] = block[64] - block[80];
1102     temp[6] = block[96] + block[112];
1103     temp[7] = block[96] - block[112];
1104
1105     t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1106     t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1107     t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1108     t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1109     t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1110     t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1111     t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1112     t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1113
1114     block[  0]= (t[0]*qmul + 128) >> 8;
1115     block[ 32]= (t[1]*qmul + 128) >> 8;
1116     block[ 64]= (t[2]*qmul + 128) >> 8;
1117     block[ 96]= (t[3]*qmul + 128) >> 8;
1118     block[ 16]= (t[4]*qmul + 128) >> 8;
1119     block[ 48]= (t[5]*qmul + 128) >> 8;
1120     block[ 80]= (t[6]*qmul + 128) >> 8;
1121     block[112]= (t[7]*qmul + 128) >> 8;
1122 }
1123
1124 void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1125 {
1126     int a,b,c,d;
1127
1128     d = block[0] - block[16];
1129     a = block[0] + block[16];
1130     b = block[32] - block[48];
1131     c = block[32] + block[48];
1132     block[0] = ((a+c)*qmul) >> 7;
1133     block[16]= ((d+b)*qmul) >> 7;
1134     block[32]= ((a-c)*qmul) >> 7;
1135     block[48]= ((d-b)*qmul) >> 7;
1136 }
1137
1138 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1139         int log2_denom, int weight, int offset)
1140 {
1141     int y;
1142     double ftmp[8];
1143     DECLARE_VAR_ALL64;
1144
1145     offset <<= log2_denom;
1146
1147     if (log2_denom)
1148         offset += 1 << (log2_denom - 1);
1149
1150     for (y=0; y<height; y++, block+=stride) {
1151         __asm__ volatile (
1152             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1153             MMI_LDC1(%[ftmp1], %[block0], 0x00)
1154             MMI_LDC1(%[ftmp2], %[block1], 0x00)
1155             "mtc1       %[weight],  %[ftmp3]                            \n\t"
1156             "mtc1       %[offset],  %[ftmp4]                            \n\t"
1157             "mtc1       %[log2_denom],              %[ftmp5]            \n\t"
1158             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1159             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1160             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
1161             "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
1162             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1163             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1164             "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1165             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1166             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1167             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t"
1168             "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1169             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp4]            \n\t"
1170             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1171             "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1172             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1173             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1174             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1175             "psrah      %[ftmp2],   %[ftmp2],       %[ftmp5]            \n\t"
1176             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1177             "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1178             MMI_SDC1(%[ftmp1], %[block0], 0x00)
1179             MMI_SDC1(%[ftmp2], %[block1], 0x00)
1180             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1181               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1182               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1183               [ftmp6]"=&f"(ftmp[6]),
1184               RESTRICT_ASM_ALL64
1185               [ftmp7]"=&f"(ftmp[7])
1186             : [block0]"r"(block),           [block1]"r"(block+8),
1187               [weight]"r"(weight),          [offset]"r"(offset),
1188               [log2_denom]"r"(log2_denom)
1189             : "memory"
1190         );
1191     }
1192 }
1193
1194 void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
1195         ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1196         int offset)
1197 {
1198     int y;
1199     double ftmp[9];
1200     DECLARE_VAR_ALL64;
1201
1202     offset = ((offset + 1) | 1) << log2_denom;
1203
1204     for (y=0; y<height; y++, dst+=stride, src+=stride) {
1205         __asm__ volatile (
1206             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1207             MMI_LDC1(%[ftmp1], %[src0], 0x00)
1208             MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1209             "mtc1       %[weights], %[ftmp3]                            \n\t"
1210             "mtc1       %[weightd], %[ftmp4]                            \n\t"
1211             "mtc1       %[offset],  %[ftmp5]                            \n\t"
1212             "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1213             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1214             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1215             "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1216             "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1217             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1218             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1219             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1220             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1221             "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1222             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1223             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1224             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1225             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1226             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1227             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1228             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1229             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1230             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1231             MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1232             MMI_LDC1(%[ftmp1], %[src1], 0x00)
1233             MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1234             "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1235             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1236             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1237             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1238             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1239             "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1240             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1241             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1242             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1243             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1244             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1245             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1246             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1247             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1248             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1249             MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1250             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1251               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1252               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1253               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1254               RESTRICT_ASM_ALL64
1255               [ftmp8]"=&f"(ftmp[8])
1256             : [dst0]"r"(dst),               [dst1]"r"(dst+8),
1257               [src0]"r"(src),               [src1]"r"(src+8),
1258               [weights]"r"(weights),        [weightd]"r"(weightd),
1259               [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1260             : "memory"
1261         );
1262     }
1263 }
1264
1265 void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1266         int log2_denom, int weight, int offset)
1267 {
1268     int y;
1269     double ftmp[6];
1270     DECLARE_VAR_ALL64;
1271
1272     offset <<= log2_denom;
1273
1274     if (log2_denom)
1275         offset += 1 << (log2_denom - 1);
1276
1277     for (y=0; y<height; y++, block+=stride) {
1278         __asm__ volatile (
1279             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1280             MMI_LDC1(%[ftmp1], %[block], 0x00)
1281             "mtc1       %[weight],  %[ftmp2]                            \n\t"
1282             "mtc1       %[offset],  %[ftmp3]                            \n\t"
1283             "mtc1       %[log2_denom],              %[ftmp5]            \n\t"
1284             "pshufh     %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1285             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1286             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
1287             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1288             "pmullh     %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"
1289             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1290             "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1291             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1292             "psrah      %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t"
1293             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1294             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1295             MMI_SDC1(%[ftmp1], %[block], 0x00)
1296             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1297               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1298               [ftmp4]"=&f"(ftmp[4]),
1299               RESTRICT_ASM_ALL64
1300               [ftmp5]"=&f"(ftmp[5])
1301             : [block]"r"(block),            [weight]"r"(weight),
1302               [offset]"r"(offset),          [log2_denom]"r"(log2_denom)
1303             : "memory"
1304         );
1305     }
1306 }
1307
1308 void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
1309         ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1310         int offset)
1311 {
1312     int y;
1313     double ftmp[9];
1314     DECLARE_VAR_ALL64;
1315
1316     offset = ((offset + 1) | 1) << log2_denom;
1317
1318     for (y=0; y<height; y++, dst+=stride, src+=stride) {
1319         __asm__ volatile (
1320             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1321             MMI_LDC1(%[ftmp1], %[src], 0x00)
1322             MMI_LDC1(%[ftmp2], %[dst], 0x00)
1323             "mtc1       %[weights], %[ftmp3]                            \n\t"
1324             "mtc1       %[weightd], %[ftmp4]                            \n\t"
1325             "mtc1       %[offset],  %[ftmp5]                            \n\t"
1326             "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1327             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1328             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1329             "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1330             "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1331             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1332             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1333             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1334             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1335             "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1336             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1337             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1338             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1339             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1340             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1341             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1342             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1343             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1344             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1345             MMI_SDC1(%[ftmp1], %[dst], 0x00)
1346             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1347               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1348               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1349               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1350               RESTRICT_ASM_ALL64
1351               [ftmp8]"=&f"(ftmp[8])
1352             : [dst]"r"(dst),                [src]"r"(src),
1353               [weights]"r"(weights),        [weightd]"r"(weightd),
1354               [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1355             : "memory"
1356         );
1357     }
1358 }
1359
1360 void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1361         int log2_denom, int weight, int offset)
1362 {
1363     int y;
1364     double ftmp[5];
1365     DECLARE_VAR_LOW32;
1366
1367     offset <<= log2_denom;
1368
1369     if (log2_denom)
1370         offset += 1 << (log2_denom - 1);
1371
1372     for (y=0; y<height; y++, block+=stride) {
1373         __asm__ volatile (
1374             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1375             MMI_ULWC1(%[ftmp1], %[block], 0x00)
1376             "mtc1       %[weight],  %[ftmp2]                            \n\t"
1377             "mtc1       %[offset],  %[ftmp3]                            \n\t"
1378             "mtc1       %[log2_denom],              %[ftmp4]            \n\t"
1379             "pshufh     %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1380             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1381             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1382             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1383             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1384             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1385             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1386             MMI_SWC1(%[ftmp1], %[block], 0x00)
1387             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1388               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1389               RESTRICT_ASM_LOW32
1390               [ftmp4]"=&f"(ftmp[4])
1391             : [block]"r"(block),            [weight]"r"(weight),
1392               [offset]"r"(offset),          [log2_denom]"r"(log2_denom)
1393             : "memory"
1394         );
1395     }
1396 }
1397
1398 void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
1399         ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1400         int offset)
1401 {
1402     int y;
1403     double ftmp[7];
1404     DECLARE_VAR_LOW32;
1405
1406     offset = ((offset + 1) | 1) << log2_denom;
1407
1408     for (y=0; y<height; y++, dst+=stride, src+=stride) {
1409         __asm__ volatile (
1410             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1411             MMI_ULWC1(%[ftmp1], %[src], 0x00)
1412             MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1413             "mtc1       %[weight],  %[ftmp3]                            \n\t"
1414             "mtc1       %[weightd], %[ftmp4]                            \n\t"
1415             "mtc1       %[offset],  %[ftmp5]                            \n\t"
1416             "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1417             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1418             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1419             "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1420             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1421             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1422             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1423             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1424             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1425             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1426             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1427             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1428             MMI_SWC1(%[ftmp1], %[dst], 0x00)
1429             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1430               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1431               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1432               RESTRICT_ASM_LOW32
1433               [ftmp6]"=&f"(ftmp[6])
1434             : [dst]"r"(dst),                [src]"r"(src),
1435               [weight]"r"(weights),         [weightd]"r"(weightd),
1436               [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1437             : "memory"
1438         );
1439     }
1440 }
1441
1442 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1443         int8_t *tc0)
1444 {
1445     double ftmp[12];
1446     mips_reg addr[2];
1447     DECLARE_VAR_LOW32;
1448     DECLARE_VAR_ALL64;
1449     DECLARE_VAR_ADDRT;
1450
1451     __asm__ volatile (
1452         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
1453         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1454         PTR_ADDU   "%[addr1],   %[stride],      %[addr0]                \n\t"
1455         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1456         PTR_SUBU   "%[addr1],   $0,             %[addr1]                \n\t"
1457         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1458         PTR_ADDU   "%[addr1],   %[addr1],       %[pix]                  \n\t"
1459         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1460         MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1461         MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1462         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1463         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1464         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1465         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1466         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1467         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1468         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1469         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1470         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1471         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1472         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1473         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1474         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1475         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1476         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1477         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1478         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1479         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1480         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1481         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1482         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1483         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1484         "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
1485         MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1486         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1487         "punpcklbh  %[ftmp9],   %[ftmp5],       %[ftmp5]                \n\t"
1488         "pcmpgtb    %[ftmp5],   %[ftmp9],       %[ftmp4]                \n\t"
1489         MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1490         "and        %[ftmp10],  %[ftmp5],       %[ftmp8]                \n\t"
1491         "psubusb    %[ftmp8],   %[ftmp4],       %[ftmp2]                \n\t"
1492         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp4]                \n\t"
1493         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1494         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1495         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1496         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1497         "and        %[ftmp5],   %[ftmp10],      %[ftmp9]                \n\t"
1498         "psubb      %[ftmp8],   %[ftmp5],       %[ftmp7]                \n\t"
1499         "and        %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1500         "pavgb      %[ftmp5],   %[ftmp2],       %[ftmp3]                \n\t"
1501         MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1502         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1503         "xor        %[ftmp5],   %[ftmp5],       %[ftmp11]               \n\t"
1504         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1505         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1506         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp7]                \n\t"
1507         "paddusb    %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
1508         "pmaxub     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1509         "pminub     %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
1510         MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1511         MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1512         "psubusb    %[ftmp4],   %[ftmp5],       %[ftmp3]                \n\t"
1513         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp5]                \n\t"
1514         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1515         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1516         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
1517         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1518         "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1519         "and        %[ftmp6],   %[ftmp9],       %[ftmp7]                \n\t"
1520         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1521         "pavgb      %[ftmp7],   %[ftmp2],       %[ftmp3]                \n\t"
1522         MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1523         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1524         "xor        %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1525         "and        %[ftmp7],   %[ftmp7],       %[ff_pb_1]              \n\t"
1526         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1527         "psubusb    %[ftmp7],   %[ftmp4],       %[ftmp6]                \n\t"
1528         "paddusb    %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1529         "pmaxub     %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1530         "pminub     %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1531         MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1532         "xor        %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1533         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1534         "and        %[ftmp6],   %[ftmp6],       %[ff_pb_1]              \n\t"
1535         "xor        %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1536         "xor        %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
1537         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
1538         "pavgb      %[ftmp4],   %[ftmp4],       %[ff_pb_3]              \n\t"
1539         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
1540         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1541         "paddusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1542         "psubusb    %[ftmp7],   %[ff_pb_A1],    %[ftmp4]                \n\t"
1543         "psubusb    %[ftmp4],   %[ftmp4],       %[ff_pb_A1]             \n\t"
1544         "pminub     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1545         "pminub     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
1546         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1547         "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1548         "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1549         "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1550         MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1551         MMI_SDC1(%[ftmp3], %[pix], 0x00)
1552         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1553           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1554           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1555           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1556           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1557           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1558           RESTRICT_ASM_LOW32
1559           RESTRICT_ASM_ALL64
1560           RESTRICT_ASM_ADDRT
1561           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1])
1562         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1563           [alpha]"r"((mips_reg)alpha),      [beta]"r"((mips_reg)beta),
1564           [tc0]"r"(tc0),                    [ff_pb_1]"f"(ff_pb_1),
1565           [ff_pb_3]"f"(ff_pb_3),            [ff_pb_A1]"f"(ff_pb_A1)
1566         : "memory"
1567     );
1568 }
1569
1570 static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
1571         int beta)
1572 {
1573     DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1574     double ftmp[16];
1575     uint64_t tmp[1];
1576     mips_reg addr[3];
1577     DECLARE_VAR_ALL64;
1578     DECLARE_VAR_ADDRT;
1579
1580     __asm__ volatile (
1581         "ori        %[tmp0],    $0,             0x01                    \n\t"
1582         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1583         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
1584         PTR_SLL    "%[addr0],   %[stride],      0x02                    \n\t"
1585         PTR_ADDU   "%[addr2],   %[stride],      %[stride]               \n\t"
1586         PTR_ADDIU  "%[alpha],   %[alpha],       -0x01                   \n\t"
1587         PTR_SLL    "%[ftmp11],  %[ftmp9],       %[ftmp9]                \n\t"
1588         "bltz       %[alpha],   1f                                      \n\t"
1589         PTR_ADDU   "%[addr1],   %[addr2],       %[stride]               \n\t"
1590         PTR_ADDIU  "%[beta],    %[beta],        -0x01                   \n\t"
1591         "bltz       %[beta],    1f                                      \n\t"
1592         PTR_SUBU   "%[addr0],   $0,             %[addr0]                \n\t"
1593         PTR_ADDU   "%[addr0],   %[addr0],       %[pix]                  \n\t"
1594         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1595         MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1596         MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1597         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1598         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1599         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1600         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1601         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1602         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1603         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1604         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1605         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1606         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1607         MMI_SDC1(%[ftmp5], %[stack], 0x10)
1608         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1609         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1610         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1611         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1612         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1613         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1614         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1615         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1616         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1617         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1618         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1619         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1620         MMI_LDC1(%[ftmp5], %[stack], 0x10)
1621         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1622         "ldc1       %[ftmp10],  %[ff_pb_1]                              \n\t"
1623         MMI_SDC1(%[ftmp8], %[stack], 0x20)
1624         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1625         "psubusb    %[ftmp8],   %[ftmp3],       %[ftmp2]                \n\t"
1626         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
1627         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp3]                \n\t"
1628         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1629         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1630         MMI_LDC1(%[ftmp15], %[stack], 0x20)
1631         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1632         "and        %[ftmp7],   %[ftmp7],       %[ftmp15]               \n\t"
1633         MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1634         "psubusb    %[ftmp8],   %[ftmp15],      %[ftmp2]                \n\t"
1635         "psubusb    %[ftmp5],   %[ftmp2],       %[ftmp15]               \n\t"
1636         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1637         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1638         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
1639         "and        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1640         MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1641         MMI_SDC1(%[ftmp5], %[stack], 0x30)
1642         "psubusb    %[ftmp8],   %[ftmp14],      %[ftmp3]                \n\t"
1643         "psubusb    %[ftmp5],   %[ftmp3],       %[ftmp14]               \n\t"
1644         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1645         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1646         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
1647         "and        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1648         MMI_SDC1(%[ftmp5], %[stack], 0x40)
1649         "pavgb      %[ftmp5],   %[ftmp15],      %[ftmp1]                \n\t"
1650         "pavgb      %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1651         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1652         MMI_SDC1(%[ftmp6], %[stack], 0x10)
1653         "paddb      %[ftmp7],   %[ftmp15],      %[ftmp1]                \n\t"
1654         "paddb      %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1655         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1656         "mov.d      %[ftmp8],   %[ftmp7]                                \n\t"
1657         MMI_SDC1(%[ftmp7], %[stack], 0x00)
1658         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
1659         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1660         "xor        %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1661         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1662         "psubb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1663         "pavgb      %[ftmp6],   %[ftmp15],      %[ftmp4]                \n\t"
1664         "psubb      %[ftmp7],   %[ftmp15],      %[ftmp4]                \n\t"
1665         "paddb      %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1666         "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1667         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1668         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1669         MMI_LDC1(%[ftmp13], %[stack], 0x10)
1670         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
1671         "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp11]               \n\t"
1672         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp13]               \n\t"
1673         "pavgb      %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1674         "xor        %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1675         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1676         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
1677         "xor        %[ftmp8],   %[ftmp2],       %[ftmp4]                \n\t"
1678         "pavgb      %[ftmp7],   %[ftmp2],       %[ftmp4]                \n\t"
1679         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1680         "psubb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1681         MMI_LDC1(%[ftmp13], %[stack], 0x30)
1682         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
1683         MMI_LDC1(%[ftmp12], %[stack], 0x20)
1684         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1685         "xor        %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
1686         "and        %[ftmp6],   %[ftmp6],       %[ftmp13]               \n\t"
1687         "and        %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1688         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1689         "xor        %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
1690         MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1691         MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1692         "paddb      %[ftmp7],   %[ftmp15],      %[ftmp6]                \n\t"
1693         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1694         MMI_LDC1(%[ftmp12], %[stack], 0x00)
1695         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1696         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1697         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1698         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1699         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1700         "xor        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1701         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1702         MMI_LDC1(%[ftmp12], %[stack], 0x30)
1703         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1704         "xor        %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
1705         "xor        %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1706         "and        %[ftmp5],   %[ftmp5],       %[ftmp12]               \n\t"
1707         "and        %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1708         "xor        %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
1709         "xor        %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1710         MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1711         MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1712         "pavgb      %[ftmp5],   %[ftmp14],      %[ftmp4]                \n\t"
1713         "pavgb      %[ftmp6],   %[ftmp3],       %[ftmp2]                \n\t"
1714         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1715         MMI_SDC1(%[ftmp6], %[stack], 0x10)
1716         "paddb      %[ftmp7],   %[ftmp14],      %[ftmp4]                \n\t"
1717         "paddb      %[ftmp8],   %[ftmp3],       %[ftmp2]                \n\t"
1718         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1719         "mov.d      %[ftmp8],   %[ftmp7]                                \n\t"
1720         MMI_SDC1(%[ftmp7], %[stack], 0x00)
1721         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
1722         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1723         "xor        %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1724         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1725         "psubb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1726         "pavgb      %[ftmp6],   %[ftmp14],      %[ftmp1]                \n\t"
1727         "paddb      %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1728         "psubb      %[ftmp7],   %[ftmp14],      %[ftmp1]                \n\t"
1729         "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1730         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1731         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1732         MMI_LDC1(%[ftmp12], %[stack], 0x10)
1733         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1734         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1735         "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp11]               \n\t"
1736         "pavgb      %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1737         "xor        %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1738         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1739         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
1740         "xor        %[ftmp8],   %[ftmp3],       %[ftmp1]                \n\t"
1741         "pavgb      %[ftmp7],   %[ftmp3],       %[ftmp1]                \n\t"
1742         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1743         MMI_LDC1(%[ftmp12], %[stack], 0x40)
1744         "psubb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1745         MMI_LDC1(%[ftmp13], %[stack], 0x20)
1746         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
1747         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1748         "xor        %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
1749         "and        %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1750         "and        %[ftmp7],   %[ftmp7],       %[ftmp13]               \n\t"
1751         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1752         "xor        %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
1753         MMI_SDC1(%[ftmp6], %[pix], 0x00)
1754         MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1755         "paddb      %[ftmp7],   %[ftmp14],      %[ftmp6]                \n\t"
1756         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1757         MMI_LDC1(%[ftmp12], %[stack], 0x00)
1758         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1759         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1760         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1761         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1762         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1763         "xor        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1764         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1765         MMI_LDC1(%[ftmp12], %[stack], 0x40)
1766         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1767         "xor        %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
1768         "xor        %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1769         "and        %[ftmp5],   %[ftmp5],       %[ftmp12]               \n\t"
1770         "and        %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1771         "xor        %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
1772         "xor        %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1773         MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1774         MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1775         "1:                                                             \n\t"
1776         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1777           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1778           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1779           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1780           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1781           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1782           [ftmp12]"=&f"(ftmp[12]),          [ftmp13]"=&f"(ftmp[13]),
1783           [ftmp14]"=&f"(ftmp[14]),          [ftmp15]"=&f"(ftmp[15]),
1784           [tmp0]"=&r"(tmp[0]),
1785           RESTRICT_ASM_ALL64
1786           RESTRICT_ASM_ADDRT
1787           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
1788           [addr2]"=&r"(addr[2]),
1789           [alpha]"+&r"(alpha),              [beta]"+&r"(beta)
1790         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1791           [stack]"r"(stack),                [ff_pb_1]"m"(ff_pb_1)
1792         : "memory"
1793     );
1794 }
1795
1796 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1797         int beta, int8_t *tc0)
1798 {
1799     double ftmp[9];
1800     mips_reg addr[1];
1801     DECLARE_VAR_LOW32;
1802     DECLARE_VAR_ALL64;
1803     DECLARE_VAR_ADDRT;
1804
1805     __asm__ volatile (
1806         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1807         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1808         "or         %[addr0],   $0,             %[pix]                  \n\t"
1809         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1810         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1811         MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1812         MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1813         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1814         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1815
1816         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1817         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1818         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1819         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1820         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1821         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1822         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1823         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1824         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1825         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1826         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1827         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1828         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1829         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1830         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1831         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1832         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1833         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1834         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1835         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1836         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1837         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1838         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1839         MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1840         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1841         "and        %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1842         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1843         "xor        %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1844         "xor        %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1845         "and        %[ftmp6],   %[ftmp6],       %[ff_pb_1]              \n\t"
1846         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
1847         "xor        %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
1848         "pavgb      %[ftmp4],   %[ftmp4],       %[ff_pb_3]              \n\t"
1849         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
1850         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1851         "paddusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1852         "psubusb    %[ftmp7],   %[ff_pb_A1],    %[ftmp4]                \n\t"
1853         "psubusb    %[ftmp4],   %[ftmp4],       %[ff_pb_A1]             \n\t"
1854         "pminub     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1855         "pminub     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
1856         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1857         "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1858         "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1859         "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1860
1861         MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1862         MMI_SDC1(%[ftmp3], %[pix], 0x00)
1863         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1864           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1865           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1866           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1867           [ftmp8]"=&f"(ftmp[8]),
1868           RESTRICT_ASM_LOW32
1869           RESTRICT_ASM_ALL64
1870           RESTRICT_ASM_ADDRT
1871           [addr0]"=&r"(addr[0])
1872         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1873           [alpha]"r"(alpha),                [beta]"r"(beta),
1874           [tc0]"r"(tc0),                    [ff_pb_1]"f"(ff_pb_1),
1875           [ff_pb_3]"f"(ff_pb_3),            [ff_pb_A1]"f"(ff_pb_A1)
1876         : "memory"
1877     );
1878 }
1879
1880 void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
1881         int beta)
1882 {
1883     double ftmp[9];
1884     mips_reg addr[1];
1885     DECLARE_VAR_ALL64;
1886     DECLARE_VAR_ADDRT;
1887
1888     __asm__ volatile (
1889         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1890         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1891         "or         %[addr0],   $0,             %[pix]                  \n\t"
1892         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1893         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1894         MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1895         MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1896         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1897         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1898
1899         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1900         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1901         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1902         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1903         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1904         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1905         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1906         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1907         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1908         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1909         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1910         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1911         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1912         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1913         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1914         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1915         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1916         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1917         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1918         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1919         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1920         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1921         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1922         "mov.d      %[ftmp6],   %[ftmp2]                                \n\t"
1923         "mov.d      %[ftmp7],   %[ftmp3]                                \n\t"
1924         "xor        %[ftmp5],   %[ftmp2],       %[ftmp4]                \n\t"
1925         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1926         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1927         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1928         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
1929         "xor        %[ftmp5],   %[ftmp3],       %[ftmp1]                \n\t"
1930         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1931         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
1932         "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
1933         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1934         "psubb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1935         "psubb      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1936         "and        %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
1937         "and        %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
1938         "paddb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1939         "paddb      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1940
1941         MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1942         MMI_SDC1(%[ftmp3], %[pix], 0x00)
1943         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1944           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1945           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1946           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1947           [ftmp8]"=&f"(ftmp[8]),
1948           RESTRICT_ASM_ALL64
1949           RESTRICT_ASM_ADDRT
1950           [addr0]"=&r"(addr[0])
1951         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1952           [alpha]"r"(alpha),                [beta]"r"(beta),
1953           [ff_pb_1]"f"(ff_pb_1)
1954         : "memory"
1955     );
1956 }
1957
1958 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1959         int8_t *tc0)
1960 {
1961     double ftmp[11];
1962     mips_reg addr[6];
1963     DECLARE_VAR_LOW32;
1964
1965     __asm__ volatile (
1966         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1967         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1968         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
1969         PTR_ADDI   "%[pix],     %[pix],         -0x02                   \n\t"
1970         PTR_ADDU   "%[addr1],   %[addr0],       %[stride]               \n\t"
1971         PTR_ADDU   "%[addr2],   %[addr0],       %[addr0]                \n\t"
1972         "or         %[addr5],   $0,             %[pix]                  \n\t"
1973         PTR_ADDU   "%[pix],     %[pix],         %[addr1]                \n\t"
1974         MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1975         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
1976         MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1977         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
1978         MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1979         MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1980         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
1981         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
1982         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
1983         "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp1]                \n\t"
1984         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
1985         MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1986         PTR_ADDU   "%[addr4],   %[pix],         %[addr0]                \n\t"
1987         MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1988         PTR_ADDU   "%[addr3],   %[pix],         %[addr1]                \n\t"
1989         MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1990         PTR_ADDU   "%[addr4],   %[pix],         %[addr2]                \n\t"
1991         MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1992         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1993         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1994         "mov.d      %[ftmp6],   %[ftmp4]                                \n\t"
1995         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1996         "punpckhhw  %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1997         "punpckhwd  %[ftmp1],   %[ftmp0],       %[ftmp4]                \n\t"
1998         "punpckhwd  %[ftmp3],   %[ftmp2],       %[ftmp6]                \n\t"
1999         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2000         "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2001         "mov.d      %[ftmp9],   %[ftmp0]                                \n\t"
2002         "mov.d      %[ftmp10],  %[ftmp3]                                \n\t"
2003
2004         "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
2005         "mtc1       %[alpha],   %[ftmp4]                                \n\t"
2006         "mtc1       %[beta],    %[ftmp5]                                \n\t"
2007         "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
2008         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
2009         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2010         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2011         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp1]                \n\t"
2012         "psubusb    %[ftmp7],   %[ftmp1],       %[ftmp2]                \n\t"
2013         "or         %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2014         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2015         "psubusb    %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
2016         "psubusb    %[ftmp4],   %[ftmp0],       %[ftmp1]                \n\t"
2017         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2018         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2019         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2020         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
2021         "psubusb    %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
2022         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2023         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2024         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2025         "xor        %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2026         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2027         MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2028         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2029         "and        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2030         "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2031         "xor        %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
2032         "xor        %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2033         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
2034         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
2035         "xor        %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
2036         "pavgb      %[ftmp3],   %[ftmp3],       %[ff_pb_3]              \n\t"
2037         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
2038         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2039         "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2040         "psubusb    %[ftmp6],   %[ff_pb_A1],    %[ftmp3]                \n\t"
2041         "psubusb    %[ftmp3],   %[ftmp3],       %[ff_pb_A1]             \n\t"
2042         "pminub     %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
2043         "pminub     %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
2044         "psubusb    %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
2045         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2046         "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2047         "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2048
2049         "punpckhwd  %[ftmp4],   %[ftmp9],       %[ftmp9]                \n\t"
2050         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2051         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2052         "punpcklbh  %[ftmp0],   %[ftmp9],       %[ftmp1]                \n\t"
2053         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
2054         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2055         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2056         MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2057         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2058         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2059         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2060         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2061         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2062         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2063         "punpckhwd  %[ftmp3],   %[ftmp10],      %[ftmp10]               \n\t"
2064         MMI_USWC1(%[ftmp0], %[pix], 0x00)
2065         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2066         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2067         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2068         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2069         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2070         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2071         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2072         PTR_ADDU   "%[addr3],   %[pix],         %[addr0]                \n\t"
2073         PTR_ADDU   "%[addr4],   %[pix],         %[addr1]                \n\t"
2074         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2075         MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2076         PTR_ADDU   "%[addr3],   %[pix],         %[addr2]                \n\t"
2077         "punpckhwd  %[ftmp9],   %[ftmp4],       %[ftmp4]                \n\t"
2078         MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2079         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2080           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2081           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2082           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2083           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2084           [ftmp10]"=&f"(ftmp[10]),
2085           RESTRICT_ASM_LOW32
2086           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2087           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2088           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2089           [pix]"+&r"(pix)
2090         : [alpha]"r"(alpha),                [beta]"r"(beta),
2091           [stride]"r"((mips_reg)stride),    [tc0]"r"(tc0),
2092           [ff_pb_1]"f"(ff_pb_1),            [ff_pb_3]"f"(ff_pb_3),
2093           [ff_pb_A1]"f"(ff_pb_A1)
2094         : "memory"
2095     );
2096 }
2097
2098 void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
2099         int beta)
2100 {
2101     double ftmp[11];
2102     mips_reg addr[6];
2103     DECLARE_VAR_LOW32;
2104
2105     __asm__ volatile (
2106         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
2107         "addi       %[beta],    %[beta],        -0x01                   \n\t"
2108         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2109         PTR_ADDI   "%[pix],     %[pix],         -0x02                   \n\t"
2110         PTR_ADDU   "%[addr1],   %[addr0],       %[stride]               \n\t"
2111         PTR_ADDU   "%[addr2],   %[addr0],       %[addr0]                \n\t"
2112         "or         %[addr5],   $0,             %[pix]                  \n\t"
2113         PTR_ADDU   "%[pix],     %[pix],         %[addr1]                \n\t"
2114         MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2115         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2116         MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2117         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2118         MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2119         MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2120         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2121         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2122         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2123         "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp1]                \n\t"
2124         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2125         MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2126         PTR_ADDU   "%[addr4],   %[pix],         %[addr0]                \n\t"
2127         MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2128         PTR_ADDU   "%[addr3],   %[pix],         %[addr1]                \n\t"
2129         MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2130         PTR_ADDU   "%[addr4],   %[pix],         %[addr2]                \n\t"
2131         MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2132         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2133         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
2134         "mov.d      %[ftmp6],   %[ftmp4]                                \n\t"
2135         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2136         "punpckhhw  %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
2137         "punpckhwd  %[ftmp1],   %[ftmp0],       %[ftmp4]                \n\t"
2138         "punpckhwd  %[ftmp3],   %[ftmp2],       %[ftmp6]                \n\t"
2139         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2140         "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2141
2142         "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
2143         "mtc1       %[alpha],   %[ftmp4]                                \n\t"
2144         "mtc1       %[beta],    %[ftmp5]                                \n\t"
2145         "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
2146         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
2147         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2148         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2149         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp1]                \n\t"
2150         "psubusb    %[ftmp7],   %[ftmp1],       %[ftmp2]                \n\t"
2151         "or         %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2152         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2153         "psubusb    %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
2154         "psubusb    %[ftmp4],   %[ftmp0],       %[ftmp1]                \n\t"
2155         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2156         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2157         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2158         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
2159         "psubusb    %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
2160         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2161         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2162         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2163         "xor        %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2164         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2165         "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
2166         "mov.d      %[ftmp6],   %[ftmp2]                                \n\t"
2167         "xor        %[ftmp4],   %[ftmp1],       %[ftmp3]                \n\t"
2168         "and        %[ftmp4],   %[ftmp4],       %[ff_pb_1]              \n\t"
2169         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2170         "psubusb    %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
2171         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
2172         "xor        %[ftmp4],   %[ftmp2],       %[ftmp0]                \n\t"
2173         "and        %[ftmp4],   %[ftmp4],       %[ff_pb_1]              \n\t"
2174         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
2175         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
2176         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2177         "psubb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2178         "psubb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2179         "and        %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
2180         "and        %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
2181         "paddb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2182         "paddb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2183
2184         "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2185         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2186         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2187         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2188         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2189         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2190         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2191         MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2192         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2193         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2194         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2195         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2196         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2197         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2198         "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2199         MMI_USWC1(%[ftmp0], %[pix], 0x00)
2200         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2201         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2202         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2203         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2204         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2205         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2206         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2207         PTR_ADDU   "%[addr3],   %[pix],         %[addr0]                \n\t"
2208         PTR_ADDU   "%[addr4],   %[pix],         %[addr1]                \n\t"
2209         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2210         PTR_ADDU   "%[addr3],   %[pix],         %[addr2]                \n\t"
2211         MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2212         "punpckhwd  %[ftmp9],   %[ftmp4],       %[ftmp4]                \n\t"
2213         MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2214         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2215           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2216           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2217           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2218           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2219           [ftmp10]"=&f"(ftmp[10]),
2220           RESTRICT_ASM_LOW32
2221           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2222           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2223           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2224           [pix]"+&r"(pix)
2225         : [alpha]"r"(alpha),                [beta]"r"(beta),
2226           [stride]"r"((mips_reg)stride),    [ff_pb_1]"f"(ff_pb_1)
2227         : "memory"
2228     );
2229 }
2230
2231 void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
2232         int8_t *tc0)
2233 {
2234     if ((tc0[0] & tc0[1]) >= 0)
2235         ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2236     if ((tc0[2] & tc0[3]) >= 0)
2237         ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2238 }
2239
2240 void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
2241         int beta)
2242 {
2243     deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2244     deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2245 }
2246
2247 void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
2248         int8_t *tc0)
2249 {
2250     DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2251     double ftmp[9];
2252     mips_reg addr[8];
2253     DECLARE_VAR_LOW32;
2254     DECLARE_VAR_ALL64;
2255
2256     __asm__ volatile (
2257         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2258         PTR_ADDI   "%[addr1],   %[pix],         -0x4                    \n\t"
2259         PTR_ADDU   "%[addr2],   %[stride],      %[addr0]                \n\t"
2260         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2261         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2262         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2263         MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2264         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2265         MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2266         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2267         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2268         MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2269         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2270         MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2271         PTR_ADDU   "%[addr3],   %[addr4],       %[addr2]                \n\t"
2272         MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2273         PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2274         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2275         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2276         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2277         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2278         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2279         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2280         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2281         MMI_SDC1(%[ftmp1], %[stack], 0x10)
2282         MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2283         PTR_ADDU   "%[addr7],   %[addr6],       %[addr6]                \n\t"
2284         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2285         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2286         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2287         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2288         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2289         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2290         MMI_LDC1(%[ftmp8], %[stack], 0x10)
2291         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2292         MMI_SDC1(%[ftmp0], %[stack], 0x00)
2293         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp8]                \n\t"
2294         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
2295         "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp5]                \n\t"
2296         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2297         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2298         "punpckhwd  %[ftmp5],   %[ftmp7],       %[ftmp3]                \n\t"
2299         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
2300         "punpckhwd  %[ftmp3],   %[ftmp1],       %[ftmp2]                \n\t"
2301         "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2302         MMI_SDC1(%[ftmp1], %[stack], 0x10)
2303         MMI_SDC1(%[ftmp3], %[stack], 0x20)
2304         MMI_SDC1(%[ftmp7], %[stack], 0x30)
2305         MMI_SDC1(%[ftmp5], %[stack], 0x40)
2306         MMI_SDC1(%[ftmp6], %[stack], 0x50)
2307         PTR_ADDU   "%[addr1],   %[addr1],       %[addr7]                \n\t"
2308         PTR_ADDU   "%[addr4],   %[addr4],       %[addr7]                \n\t"
2309         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2310         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2311         MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2312         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2313         MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2314         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2315         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2316         MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2317         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2318         MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2319         PTR_ADDU   "%[addr3],   %[addr4],       %[addr2]                \n\t"
2320         MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2321         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2322         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2323         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2324         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2325         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2326         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2327         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2328         MMI_SDC1(%[ftmp1], %[stack], 0x18)
2329         MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2330         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2331         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2332         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2333         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2334         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2335         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2336         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2337         MMI_LDC1(%[ftmp8], %[stack], 0x18)
2338         MMI_SDC1(%[ftmp0], %[stack], 0x08)
2339         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp8]                \n\t"
2340         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
2341         "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp5]                \n\t"
2342         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2343         "punpckhwd  %[ftmp5],   %[ftmp7],       %[ftmp3]                \n\t"
2344         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
2345         "punpckhwd  %[ftmp3],   %[ftmp1],       %[ftmp2]                \n\t"
2346         "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2347         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2348         MMI_SDC1(%[ftmp1], %[stack], 0x18)
2349         MMI_SDC1(%[ftmp3], %[stack], 0x28)
2350         MMI_SDC1(%[ftmp7], %[stack], 0x38)
2351         MMI_SDC1(%[ftmp5], %[stack], 0x48)
2352         MMI_SDC1(%[ftmp6], %[stack], 0x58)
2353         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2354           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2355           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2356           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2357           [ftmp8]"=&f"(ftmp[8]),
2358           RESTRICT_ASM_ALL64
2359           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2360           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2361           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2362           [addr6]"=&r"(addr[6]),            [addr7]"=&r"(addr[7])
2363         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2364           [stack]"r"(stack)
2365         : "memory"
2366     );
2367
2368     ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2369
2370     __asm__ volatile (
2371         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2372         PTR_ADDI   "%[addr1],   %[pix],         -0x02                   \n\t"
2373         PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2374         PTR_ADDU   "%[addr2],   %[addr0],       %[stride]               \n\t"
2375         PTR_ADDU   "%[addr7],   %[addr6],       %[addr6]                \n\t"
2376         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2377         MMI_LDC1(%[ftmp0], %[stack], 0x10)
2378         MMI_LDC1(%[ftmp1], %[stack], 0x20)
2379         MMI_LDC1(%[ftmp2], %[stack], 0x30)
2380         MMI_LDC1(%[ftmp3], %[stack], 0x40)
2381         "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2382         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2383         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2384         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2385         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2386         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2387         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2388         MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2389         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2390         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2391         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2392         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2393         MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2394         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2395         "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2396         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2397         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2398         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2399         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2400         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2401         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2402         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2403         PTR_ADDU   "%[addr3],   %[addr4],       %[addr0]                \n\t"
2404         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2405         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2406         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2407         MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2408         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2409         "punpckhwd  %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2410         PTR_ADDU   "%[addr1],   %[addr1],       %[addr7]                \n\t"
2411         MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2412         PTR_ADDU   "%[addr4],   %[addr4],       %[addr7]                \n\t"
2413         MMI_LDC1(%[ftmp0], %[stack], 0x18)
2414         MMI_LDC1(%[ftmp1], %[stack], 0x28)
2415         MMI_LDC1(%[ftmp2], %[stack], 0x38)
2416         MMI_LDC1(%[ftmp3], %[stack], 0x48)
2417         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2418         "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2419         PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2420         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2421         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2422         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2423         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2424         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2425         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2426         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2427         MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2428         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2429         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2430         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2431         MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2432         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2433         "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2434         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2435         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2436         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2437         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2438         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2439         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2440         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2441         PTR_ADDU   "%[addr3],   %[addr4],       %[addr0]                \n\t"
2442         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2443         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2444         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2445         MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2446         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2447         "punpckhwd  %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2448         MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2449         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2450           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2451           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2452           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2453           [ftmp8]"=&f"(ftmp[8]),
2454           RESTRICT_ASM_LOW32
2455           RESTRICT_ASM_ALL64
2456           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2457           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2458           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2459           [addr6]"=&r"(addr[6]),            [addr7]"=&r"(addr[7])
2460         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2461           [stack]"r"(stack)
2462         : "memory"
2463     );
2464 }
2465
2466 void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
2467         int beta)
2468 {
2469     DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2470     DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2471     double ftmp[9];
2472     mips_reg addr[7];
2473     DECLARE_VAR_ALL64;
2474
2475     __asm__ volatile (
2476         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2477         PTR_ADDI   "%[addr1],   %[pix],         -0x04                   \n\t"
2478         PTR_ADDU   "%[addr2],   %[addr0],       %[stride]               \n\t"
2479         PTR_ADDU   "%[addr3],   %[addr0],       %[addr0]                \n\t"
2480         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2481         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2482         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2483         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2484         MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2485         MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2486         PTR_ADDU   "%[addr5],   %[addr4],       %[stride]               \n\t"
2487         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2488         PTR_ADDU   "%[addr6],   %[addr4],       %[addr0]                \n\t"
2489         MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2490         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2491         MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2492         MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2493         PTR_ADDU   "%[addr5],   %[addr4],       %[addr3]                \n\t"
2494         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2495         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2496         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2497         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2498         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2499         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2500         MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2501         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2502         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2503         MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2504         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2505         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2506         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2507         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2508         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2509         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2510         MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2511         MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2512         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2513         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2514         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2515         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2516         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2517         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2518         MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2519         MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2520         MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2521         MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2522         MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2523         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2524         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2525         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2526         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2527         PTR_ADDU   "%[addr5],   %[addr3],       %[addr3]                \n\t"
2528         MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2529         MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2530         MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2531         MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2532         PTR_ADDU   "%[addr1],   %[addr1],       %[addr5]                \n\t"
2533         PTR_ADDU   "%[addr4],   %[addr4],       %[addr5]                \n\t"
2534         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2535         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2536         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2537         MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2538         MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2539         PTR_ADDU   "%[addr5],   %[addr4],       %[stride]               \n\t"
2540         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2541         PTR_ADDU   "%[addr6],   %[addr4],       %[addr0]                \n\t"
2542         MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2543         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2544         MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2545         MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2546         PTR_ADDU   "%[addr5],   %[addr4],       %[addr3]                \n\t"
2547         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2548         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2549         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2550         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2551         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2552         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2553         MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2554         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2555         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2556         MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2557         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2558         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2559         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2560         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2561         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2562         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2563         MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2564         MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2565         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2566         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2567         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2568         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2569         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2570         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2571         MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2572         MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2573         MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2574         MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2575         MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2576         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2577         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2578         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2579         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2580         MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2581         MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2582         MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2583         MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2584         PTR_S      "%[addr1],   0x00(%[pdat])                           \n\t"
2585         PTR_S      "%[addr2],   0x08(%[pdat])                           \n\t"
2586         PTR_S      "%[addr0],   0x10(%[pdat])                           \n\t"
2587         PTR_S      "%[addr3],   0x18(%[pdat])                           \n\t"
2588         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2589           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2590           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2591           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2592           [ftmp8]"=&f"(ftmp[8]),
2593           RESTRICT_ASM_ALL64
2594           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2595           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2596           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2597           [addr6]"=&r"(addr[6])
2598         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2599           [ptmp]"r"(ptmp),                  [pdat]"r"(pdat)
2600         : "memory"
2601     );
2602
2603     ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2604
2605     __asm__ volatile (
2606         PTR_L      "%[addr1],   0x00(%[pdat])                           \n\t"
2607         PTR_L      "%[addr2],   0x08(%[pdat])                           \n\t"
2608         PTR_L      "%[addr0],   0x10(%[pdat])                           \n\t"
2609         PTR_L      "%[addr3],   0x18(%[pdat])                           \n\t"
2610         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2611         MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2612         MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2613         MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2614         MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2615         MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2616         MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2617         MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2618         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2619         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2620         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2621         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2622         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2623         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2624         MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2625         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2626         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2627         MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2628         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2629         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2630         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2631         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2632         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2633         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2634         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2635         MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2636         MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2637         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2638         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2639         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2640         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2641         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2642         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2643         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2644         MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2645         PTR_ADDU   "%[addr6],   %[addr4],       %[stride]               \n\t"
2646         MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2647         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2648         MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2649         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2650         MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2651         MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2652         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2653         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2654         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2655         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2656         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2657         MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2658         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2659         MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2660         PTR_ADDU   "%[addr6],   %[addr4],       %[addr3]                \n\t"
2661         MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2662         PTR_ADDU   "%[addr5],   %[addr3],       %[addr3]                \n\t"
2663         MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2664         PTR_SUBU   "%[addr1],   %[addr1],       %[addr5]                \n\t"
2665         PTR_SUBU   "%[addr4],   %[addr4],       %[addr5]                \n\t"
2666         MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2667         MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2668         MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2669         MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2670         MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2671         MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2672         MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2673         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2674         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2675         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2676         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2677         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2678         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2679         MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2680         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2681         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2682         MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2683         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2684         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2685         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2686         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2687         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2688         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2689         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2690         MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2691         MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2692         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2693         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2694         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2695         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2696         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2697         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2698         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2699         MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2700         PTR_ADDU   "%[addr6],   %[addr4],       %[stride]               \n\t"
2701         MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2702         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2703         MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2704         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2705         MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2706         MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2707         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2708         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2709         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2710         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2711         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2712         MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2713         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2714         MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2715         PTR_ADDU   "%[addr6],   %[addr4],       %[addr3]                \n\t"
2716         MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2717         MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2718         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2719           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2720           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2721           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2722           [ftmp8]"=&f"(ftmp[8]),
2723           RESTRICT_ASM_ALL64
2724           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2725           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2726           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2727           [addr6]"=&r"(addr[6])
2728         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2729           [ptmp]"r"(ptmp),                  [pdat]"r"(pdat)
2730         : "memory"
2731     );
2732 }