]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/h264dsp_mmi.c
173e191c7719794685ab14663d8344014b89c0a7
[ffmpeg] / libavcodec / mips / h264dsp_mmi.c
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *                    Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavcodec/bit_depth_template.c"
27 #include "h264dsp_mips.h"
28 #include "libavutil/mips/mmiutils.h"
29
30 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
31 {
32     double ftmp[9];
33     DECLARE_VAR_LOW32;
34
35     __asm__ volatile (
36         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
37         MMI_LDC1(%[ftmp1], %[src], 0x00)
38         MMI_LDC1(%[ftmp2], %[src], 0x08)
39         MMI_LDC1(%[ftmp3], %[src], 0x10)
40         MMI_LDC1(%[ftmp4], %[src], 0x18)
41         /* memset(src, 0, 32); */
42         "gssqc1     %[ftmp0],   %[ftmp0],       0x00(%[src])            \n\t"
43         "gssqc1     %[ftmp0],   %[ftmp0],       0x10(%[src])            \n\t"
44         MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
45         MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
46         MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
47         MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
48         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
49         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
50         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
51         "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
52         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
53         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
54         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
55         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
56         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
57         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
58         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
59         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
60         MMI_SWC1(%[ftmp1], %[dst0], 0x00)
61         MMI_SWC1(%[ftmp2], %[dst1], 0x00)
62         MMI_SWC1(%[ftmp3], %[dst2], 0x00)
63         MMI_SWC1(%[ftmp4], %[dst3], 0x00)
64         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
65           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
66           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
67           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
68           RESTRICT_ASM_LOW32
69           [ftmp8]"=&f"(ftmp[8])
70         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
71           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
72           [src]"r"(src)
73         : "memory"
74     );
75
76 }
77
78 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
79 {
80     double ftmp[12];
81     uint64_t tmp[1];
82     DECLARE_VAR_LOW32;
83     DECLARE_VAR_ADDRT;
84
85     __asm__ volatile (
86         MMI_LDC1(%[ftmp0], %[block], 0x00)
87         MMI_LDC1(%[ftmp1], %[block], 0x08)
88         MMI_LDC1(%[ftmp2], %[block], 0x10)
89         MMI_LDC1(%[ftmp3], %[block], 0x18)
90         /* memset(block, 0, 32) */
91         "xor        %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
92         "gssqc1     %[ftmp4],   %[ftmp4],       0x00(%[block])          \n\t"
93         "gssqc1     %[ftmp4],   %[ftmp4],       0x10(%[block])          \n\t"
94         "dli        %[tmp0],    0x01                                    \n\t"
95         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
96         "dli        %[tmp0],    0x06                                    \n\t"
97         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
98         "psrah      %[ftmp4],   %[ftmp1],       %[ftmp8]                \n\t"
99         "psrah      %[ftmp5],   %[ftmp3],       %[ftmp8]                \n\t"
100         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
101         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
102         "paddh      %[ftmp10],  %[ftmp2],       %[ftmp0]                \n\t"
103         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
104         "paddh      %[ftmp11],  %[ftmp5],       %[ftmp10]               \n\t"
105         "psubh      %[ftmp2],   %[ftmp10],      %[ftmp5]                \n\t"
106         "paddh      %[ftmp10],  %[ftmp4],       %[ftmp0]                \n\t"
107         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
108         "punpckhhw  %[ftmp1],   %[ftmp11],      %[ftmp10]               \n\t"
109         "punpcklhw  %[ftmp5],   %[ftmp11],      %[ftmp10]               \n\t"
110         "punpckhhw  %[ftmp4],   %[ftmp0],       %[ftmp2]                \n\t"
111         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
112         "punpckhwd  %[ftmp2],   %[ftmp5],       %[ftmp0]                \n\t"
113         "punpcklwd  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
114         "punpcklwd  %[ftmp10],  %[ftmp1],       %[ftmp4]                \n\t"
115         "punpckhwd  %[ftmp0],   %[ftmp1],       %[ftmp4]                \n\t"
116         "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_32]             \n\t"
117         "psrah      %[ftmp4],   %[ftmp2],       %[ftmp8]                \n\t"
118         "psrah      %[ftmp3],   %[ftmp0],       %[ftmp8]                \n\t"
119         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
120         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
121         "paddh      %[ftmp1],   %[ftmp10],      %[ftmp5]                \n\t"
122         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
123         "paddh      %[ftmp10],  %[ftmp3],       %[ftmp1]                \n\t"
124         "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
125         "paddh      %[ftmp11],  %[ftmp4],       %[ftmp5]                \n\t"
126         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
127         MMI_ULWC1(%[ftmp2], %[dst], 0x00)
128         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
129         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
130         "psrah      %[ftmp3],   %[ftmp10],      %[ftmp9]                \n\t"
131         "psrah      %[ftmp4],   %[ftmp11],      %[ftmp9]                \n\t"
132         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
133         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
134         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
135         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
136         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
137         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
138         MMI_SWC1(%[ftmp2], %[dst], 0x00)
139         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
140         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
141         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
142         MMI_ULWC1(%[ftmp2], %[dst], 0x00)
143         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
144         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
145         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
146         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
147         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
148         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
149         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
150         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
151         MMI_SWC1(%[ftmp2], %[dst], 0x00)
152         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
153         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
154         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
155           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
156           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
157           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
158           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
159           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
160           RESTRICT_ASM_LOW32
161           RESTRICT_ASM_ADDRT
162           [tmp0]"=&r"(tmp[0])
163         : [dst]"r"(dst),                    [block]"r"(block),
164           [stride]"r"((mips_reg)stride),    [ff_pw_32]"f"(ff_pw_32)
165         : "memory"
166     );
167
168 }
169
170 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
171 {
172     double ftmp[16];
173     uint64_t tmp[7];
174     mips_reg addr[1];
175     DECLARE_VAR_LOW32;
176     DECLARE_VAR_ADDRT;
177
178     __asm__ volatile (
179         "lhu        %[tmp0],    0x00(%[block])                          \n\t"
180         PTR_ADDI   "$sp,        $sp,            -0x20                   \n\t"
181         PTR_ADDIU  "%[tmp0],    %[tmp0],        0x20                    \n\t"
182         MMI_LDC1(%[ftmp1], %[block], 0x10)
183         "sh         %[tmp0],    0x00(%[block])                          \n\t"
184         MMI_LDC1(%[ftmp2], %[block], 0x20)
185         "dli        %[tmp0],    0x01                                    \n\t"
186         MMI_LDC1(%[ftmp3], %[block], 0x30)
187         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
188         MMI_LDC1(%[ftmp5], %[block], 0x50)
189         MMI_LDC1(%[ftmp6], %[block], 0x60)
190         MMI_LDC1(%[ftmp7], %[block], 0x70)
191         "mov.d      %[ftmp0],   %[ftmp1]                                \n\t"
192         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
193         "psrah      %[ftmp4],   %[ftmp5],       %[ftmp8]                \n\t"
194         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
195         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
196         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
197         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
198         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
199         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
200         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
201         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
202         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
203         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
204         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
205         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
206         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
207         "dli        %[tmp0],    0x02                                    \n\t"
208         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
209         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
210         "mov.d      %[ftmp7],   %[ftmp1]                                \n\t"
211         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
212         "psrah      %[ftmp3],   %[ftmp4],       %[ftmp9]                \n\t"
213         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
214         "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
215         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
216         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
217         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
218         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
219         "mov.d      %[ftmp5],   %[ftmp6]                                \n\t"
220         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
221         "psrah      %[ftmp4],   %[ftmp2],       %[ftmp8]                \n\t"
222         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
223         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
224         MMI_LDC1(%[ftmp2], %[block], 0x00)
225         MMI_LDC1(%[ftmp5], %[block], 0x40)
226         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
227         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
228         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
229         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
230         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
231         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
232         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
233         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
234         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
235         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
236         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
237         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
238         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
239         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
240         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
241         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
242         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
243         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
244         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
245         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
246         MMI_SDC1(%[ftmp6], %[block], 0x00)
247         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
248         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp0]                \n\t"
249         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
250         "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp1]                \n\t"
251         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
252         "punpckhwd  %[ftmp1],   %[ftmp7],       %[ftmp3]                \n\t"
253         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
254         "punpckhwd  %[ftmp3],   %[ftmp6],       %[ftmp0]                \n\t"
255         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
256         MMI_LDC1(%[ftmp0], %[block], 0x00)
257         MMI_SDC1(%[ftmp7], $sp, 0x00)
258         MMI_SDC1(%[ftmp1], $sp, 0x10)
259         "dmfc1      %[tmp1],    %[ftmp6]                                \n\t"
260         "dmfc1      %[tmp3],    %[ftmp3]                                \n\t"
261         "punpckhhw  %[ftmp3],   %[ftmp5],       %[ftmp2]                \n\t"
262         "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
263         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp0]                \n\t"
264         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
265         "punpckhwd  %[ftmp0],   %[ftmp5],       %[ftmp4]                \n\t"
266         "punpcklwd  %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
267         "punpckhwd  %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
268         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
269         MMI_SDC1(%[ftmp5], $sp, 0x08)
270         MMI_SDC1(%[ftmp0], $sp, 0x18)
271         "dmfc1      %[tmp2],    %[ftmp3]                                \n\t"
272         "dmfc1      %[tmp4],    %[ftmp4]                                \n\t"
273         MMI_LDC1(%[ftmp1], %[block], 0x18)
274         MMI_LDC1(%[ftmp6], %[block], 0x28)
275         MMI_LDC1(%[ftmp2], %[block], 0x38)
276         MMI_LDC1(%[ftmp0], %[block], 0x58)
277         MMI_LDC1(%[ftmp3], %[block], 0x68)
278         MMI_LDC1(%[ftmp4], %[block], 0x78)
279         "mov.d      %[ftmp7],   %[ftmp1]                                \n\t"
280         "psrah      %[ftmp5],   %[ftmp0],       %[ftmp8]                \n\t"
281         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
282         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
283         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
284         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
285         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
286         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
287         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
288         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
289         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
290         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
291         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
292         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
293         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
294         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
295         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
296         "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
297         "psrah      %[ftmp2],   %[ftmp5],       %[ftmp9]                \n\t"
298         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
299         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
300         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
301         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
302         "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
303         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
304         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
305         "mov.d      %[ftmp0],   %[ftmp3]                                \n\t"
306         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
307         "psrah      %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
308         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
309         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
310         MMI_LDC1(%[ftmp6], %[block], 0x08)
311         MMI_LDC1(%[ftmp0], %[block], 0x48)
312         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
313         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
314         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
315         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
316         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
317         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
318         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
319         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
320         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
321         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
322         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
323         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
324         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
325         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
326         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
327         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
328         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
329         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
330         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
331         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
332         MMI_SDC1(%[ftmp3], %[block], 0x08)
333         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
334         "punpckhhw  %[ftmp3],   %[ftmp4],       %[ftmp7]                \n\t"
335         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
336         "punpckhhw  %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
337         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
338         "punpckhwd  %[ftmp1],   %[ftmp4],       %[ftmp2]                \n\t"
339         "punpcklwd  %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
340         "punpckhwd  %[ftmp2],   %[ftmp3],       %[ftmp7]                \n\t"
341         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
342         MMI_LDC1(%[ftmp7], %[block], 0x08)
343         "dmfc1      %[tmp5],    %[ftmp4]                                \n\t"
344         "mov.d      %[ftmp10],  %[ftmp1]                                \n\t"
345         "mov.d      %[ftmp12],  %[ftmp3]                                \n\t"
346         "mov.d      %[ftmp14],  %[ftmp2]                                \n\t"
347         "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp6]                \n\t"
348         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
349         "punpckhhw  %[ftmp6],   %[ftmp5],       %[ftmp7]                \n\t"
350         "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
351         "punpckhwd  %[ftmp7],   %[ftmp0],       %[ftmp5]                \n\t"
352         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
353         "punpckhwd  %[ftmp5],   %[ftmp2],       %[ftmp6]                \n\t"
354         "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
355         "dmfc1      %[tmp6],    %[ftmp0]                                \n\t"
356         "mov.d      %[ftmp11],  %[ftmp7]                                \n\t"
357         "mov.d      %[ftmp13],  %[ftmp2]                                \n\t"
358         "mov.d      %[ftmp15],  %[ftmp5]                                \n\t"
359         PTR_ADDIU  "%[addr0],   %[dst],         0x04                    \n\t"
360         "mov.d      %[ftmp7],   %[ftmp10]                               \n\t"
361         "dmtc1      %[tmp3],    %[ftmp6]                                \n\t"
362         MMI_LDC1(%[ftmp1], $sp, 0x10)
363         "dmtc1      %[tmp1],    %[ftmp3]                                \n\t"
364         "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
365         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
366         "psrah      %[ftmp0],   %[ftmp7],       %[ftmp8]                \n\t"
367         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
368         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
369         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
370         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp14]               \n\t"
371         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
372         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
373         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
374         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
375         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
376         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp14]               \n\t"
377         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp14]               \n\t"
378         "psrah      %[ftmp5],   %[ftmp14],      %[ftmp8]                \n\t"
379         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
380         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
381         "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
382         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
383         "psrah      %[ftmp6],   %[ftmp0],       %[ftmp9]                \n\t"
384         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
385         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
386         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
387         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
388         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
389         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
390         "mov.d      %[ftmp7],   %[ftmp12]                               \n\t"
391         "psrah      %[ftmp2],   %[ftmp12],      %[ftmp8]                \n\t"
392         "psrah      %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
393         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
394         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
395         MMI_LDC1(%[ftmp3], $sp, 0x00)
396         "dmtc1      %[tmp5],    %[ftmp7]                                \n\t"
397         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
398         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
399         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
400         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
401         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
402         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
403         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
404         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
405         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
406         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
407         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
408         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
409         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
410         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
411         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
412         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
413         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
414         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
415         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
416         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
417         MMI_SDC1(%[ftmp3], $sp, 0x00)
418         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
419         MMI_SDC1(%[ftmp0], $sp, 0x10)
420         "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
421         "xor        %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
422         MMI_SDC1(%[ftmp2], %[block], 0x00)
423         MMI_SDC1(%[ftmp2], %[block], 0x08)
424         MMI_SDC1(%[ftmp2], %[block], 0x10)
425         MMI_SDC1(%[ftmp2], %[block], 0x18)
426         MMI_SDC1(%[ftmp2], %[block], 0x20)
427         MMI_SDC1(%[ftmp2], %[block], 0x28)
428         MMI_SDC1(%[ftmp2], %[block], 0x30)
429         MMI_SDC1(%[ftmp2], %[block], 0x38)
430         MMI_SDC1(%[ftmp2], %[block], 0x40)
431         MMI_SDC1(%[ftmp2], %[block], 0x48)
432         MMI_SDC1(%[ftmp2], %[block], 0x50)
433         MMI_SDC1(%[ftmp2], %[block], 0x58)
434         MMI_SDC1(%[ftmp2], %[block], 0x60)
435         MMI_SDC1(%[ftmp2], %[block], 0x68)
436         MMI_SDC1(%[ftmp2], %[block], 0x70)
437         MMI_SDC1(%[ftmp2], %[block], 0x78)
438         "dli        %[tmp3],    0x06                                    \n\t"
439         "mtc1       %[tmp3],    %[ftmp10]                               \n\t"
440         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
441         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
442         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
443         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
444         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
445         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
446         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
447         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
448         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
449         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
450         MMI_SWC1(%[ftmp3], %[dst], 0x00)
451         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
452         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
453         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
454         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
455         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
456         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp10]               \n\t"
457         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
458         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
459         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
460         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
461         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
462         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
463         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
464         MMI_SWC1(%[ftmp3], %[dst], 0x00)
465         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
466         MMI_LDC1(%[ftmp5], $sp, 0x00)
467         MMI_LDC1(%[ftmp4], $sp, 0x10)
468         "dmtc1      %[tmp1],    %[ftmp6]                                \n\t"
469         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
470         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
471         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
472         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
473         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
474         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
475         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
476         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
477         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
478         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
479         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
480         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
481         MMI_SWC1(%[ftmp3], %[dst], 0x00)
482         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
483         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
484         PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
485         MMI_ULWC1(%[ftmp3], %[dst], 0x00)
486         MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
487         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
488         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp10]               \n\t"
489         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
490         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
491         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
492         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
493         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
494         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
495         MMI_SWC1(%[ftmp3], %[dst], 0x00)
496         MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
497         "dmtc1      %[tmp4],    %[ftmp1]                                \n\t"
498         "dmtc1      %[tmp2],    %[ftmp6]                                \n\t"
499         MMI_LDC1(%[ftmp4], $sp, 0x18)
500         "mov.d      %[ftmp5],   %[ftmp4]                                \n\t"
501         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
502         "psrah      %[ftmp7],   %[ftmp11],      %[ftmp8]                \n\t"
503         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
504         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
505         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp15]               \n\t"
506         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp11]               \n\t"
507         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
508         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
509         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
510         "psubh      %[ftmp3],   %[ftmp11],      %[ftmp1]                \n\t"
511         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
512         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp15]               \n\t"
513         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp15]               \n\t"
514         "psrah      %[ftmp2],   %[ftmp15],      %[ftmp8]                \n\t"
515         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
516         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
517         "mov.d      %[ftmp2],   %[ftmp4]                                \n\t"
518         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
519         "psrah      %[ftmp1],   %[ftmp7],       %[ftmp9]                \n\t"
520         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
521         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
522         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
523         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
524         "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
525         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
526         "mov.d      %[ftmp3],   %[ftmp13]                               \n\t"
527         "psrah      %[ftmp0],   %[ftmp13],      %[ftmp8]                \n\t"
528         "psrah      %[ftmp7],   %[ftmp6],       %[ftmp8]                \n\t"
529         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
530         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
531         MMI_LDC1(%[ftmp6], $sp, 0x08)
532         "dmtc1      %[tmp6],    %[ftmp3]                                \n\t"
533         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
534         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
535         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
536         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
537         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
538         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
539         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
540         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
541         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
542         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
543         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
544         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
545         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
546         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
547         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
548         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
549         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
550         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
551         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
552         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
553         MMI_SDC1(%[ftmp6], $sp, 0x08)
554         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
555         MMI_SDC1(%[ftmp7], $sp, 0x18)
556         "dmfc1      %[tmp2],    %[ftmp0]                                \n\t"
557         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
558         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
559         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
560         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
561         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
562         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
563         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
564         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
565         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
566         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
567         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
568         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
569         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
570         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
571         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
572         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
573         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
574         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
575         "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
576         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
577         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
578         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
579         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
580         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
581         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
582         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
583         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
584         MMI_LDC1(%[ftmp2], $sp, 0x08)
585         MMI_LDC1(%[ftmp5], $sp, 0x18)
586         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
587         "dmtc1      %[tmp2],    %[ftmp1]                                \n\t"
588         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
589         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
590         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
591         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp10]               \n\t"
592         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
593         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
594         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
595         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
596         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
597         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
598         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
599         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
600         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
601         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
602         PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
603         MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
604         MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
605         "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
606         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
607         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
608         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
609         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
610         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
611         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
612         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
613         MMI_SWC1(%[ftmp6], %[addr0], 0x00)
614         MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
615         PTR_ADDIU  "$sp,        $sp,            0x20                    \n\t"
616         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
617           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
618           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
619           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
620           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
621           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
622           [ftmp12]"=&f"(ftmp[12]),          [ftmp13]"=&f"(ftmp[13]),
623           [ftmp14]"=&f"(ftmp[14]),          [ftmp15]"=&f"(ftmp[15]),
624           [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
625           [tmp2]"=&r"(tmp[2]),              [tmp3]"=&r"(tmp[3]),
626           [tmp4]"=&r"(tmp[4]),              [tmp5]"=&r"(tmp[5]),
627           [tmp6]"=&r"(tmp[6]),
628           RESTRICT_ASM_LOW32
629           RESTRICT_ASM_ADDRT
630           [addr0]"=&r"(addr[0])
631         : [dst]"r"(dst),                    [block]"r"(block),
632           [stride]"r"((mips_reg)stride)
633         : "memory"
634     );
635
636 }
637
638 void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
639 {
640     int dc = (block[0] + 32) >> 6;
641     double ftmp[6];
642     DECLARE_VAR_LOW32;
643
644     block[0] = 0;
645
646     __asm__ volatile (
647         "mtc1       %[dc],      %[ftmp5]                                \n\t"
648         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
649         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
650         MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
651         MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
652         MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
653         MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
654         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
655         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
656         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
657         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
658         "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
659         "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
660         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
661         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
662         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
663         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
664         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
665         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
666         MMI_SWC1(%[ftmp1], %[dst0], 0x00)
667         MMI_SWC1(%[ftmp2], %[dst1], 0x00)
668         MMI_SWC1(%[ftmp3], %[dst2], 0x00)
669         MMI_SWC1(%[ftmp4], %[dst3], 0x00)
670         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
671           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
672           [ftmp4]"=&f"(ftmp[4]),
673           RESTRICT_ASM_LOW32
674           [ftmp5]"=&f"(ftmp[5])
675         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
676           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
677           [dc]"r"(dc)
678         : "memory"
679     );
680 }
681
682 void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
683 {
684     int dc = (block[0] + 32) >> 6;
685     double ftmp[10];
686     DECLARE_VAR_ALL64;
687
688     block[0] = 0;
689
690     __asm__ volatile (
691         "mtc1       %[dc],      %[ftmp5]                                \n\t"
692         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
693         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
694         MMI_LDC1(%[ftmp1], %[dst0], 0x00)
695         MMI_LDC1(%[ftmp2], %[dst1], 0x00)
696         MMI_LDC1(%[ftmp3], %[dst2], 0x00)
697         MMI_LDC1(%[ftmp4], %[dst3], 0x00)
698         "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
699         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
700         "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
701         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
702         "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
703         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
704         "punpckhbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
705         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
706         "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
707         "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
708         "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
709         "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
710         "paddsh     %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
711         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
712         "paddsh     %[ftmp9],   %[ftmp9],       %[ftmp5]                \n\t"
713         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
714         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
715         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
716         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
717         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
718         MMI_SDC1(%[ftmp1], %[dst0], 0x00)
719         MMI_SDC1(%[ftmp2], %[dst1], 0x00)
720         MMI_SDC1(%[ftmp3], %[dst2], 0x00)
721         MMI_SDC1(%[ftmp4], %[dst3], 0x00)
722
723         MMI_LDC1(%[ftmp1], %[dst4], 0x00)
724         MMI_LDC1(%[ftmp2], %[dst5], 0x00)
725         MMI_LDC1(%[ftmp3], %[dst6], 0x00)
726         MMI_LDC1(%[ftmp4], %[dst7], 0x00)
727         "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
728         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
729         "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
730         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
731         "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
732         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
733         "punpckhbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
734         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
735         "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
736         "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
737         "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
738         "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
739         "paddsh     %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
740         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
741         "paddsh     %[ftmp9],   %[ftmp9],       %[ftmp5]                \n\t"
742         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
743         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
744         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
745         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
746         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
747         MMI_SDC1(%[ftmp1], %[dst4], 0x00)
748         MMI_SDC1(%[ftmp2], %[dst5], 0x00)
749         MMI_SDC1(%[ftmp3], %[dst6], 0x00)
750         MMI_SDC1(%[ftmp4], %[dst7], 0x00)
751         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
752           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
753           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
754           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
755           [ftmp8]"=&f"(ftmp[8]),
756           RESTRICT_ASM_ALL64
757           [ftmp9]"=&f"(ftmp[9])
758         : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
759           [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
760           [dst4]"r"(dst+4*stride),          [dst5]"r"(dst+5*stride),
761           [dst6]"r"(dst+6*stride),          [dst7]"r"(dst+7*stride),
762           [dc]"r"(dc)
763         : "memory"
764     );
765 }
766
767 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
768         int16_t *block, int stride, const uint8_t nnzc[15*8])
769 {
770     int i;
771     for(i=0; i<16; i++){
772         int nnz = nnzc[ scan8[i] ];
773         if(nnz){
774             if(nnz==1 && ((int16_t*)block)[i*16])
775                 ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
776                         stride);
777             else
778                 ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
779                         stride);
780         }
781     }
782 }
783
784 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
785         int16_t *block, int stride, const uint8_t nnzc[15*8])
786 {
787     int i;
788     for(i=0; i<16; i++){
789         if(nnzc[ scan8[i] ])
790             ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
791         else if(((int16_t*)block)[i*16])
792             ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
793                     stride);
794     }
795 }
796
797 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
798         int16_t *block, int stride, const uint8_t nnzc[15*8])
799 {
800     int i;
801     for(i=0; i<16; i+=4){
802         int nnz = nnzc[ scan8[i] ];
803         if(nnz){
804             if(nnz==1 && ((int16_t*)block)[i*16])
805                 ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
806                         block + i*16, stride);
807             else
808                 ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
809                         stride);
810         }
811     }
812 }
813
814 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
815         int16_t *block, int stride, const uint8_t nnzc[15*8])
816 {
817     int i, j;
818     for(j=1; j<3; j++){
819         for(i=j*16; i<j*16+4; i++){
820             if(nnzc[ scan8[i] ])
821                 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
822                         block + i*16, stride);
823             else if(((int16_t*)block)[i*16])
824                 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
825                         block + i*16, stride);
826         }
827     }
828 }
829
830 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
831         int16_t *block, int stride, const uint8_t nnzc[15*8])
832 {
833     int i, j;
834
835     for(j=1; j<3; j++){
836         for(i=j*16; i<j*16+4; i++){
837             if(nnzc[ scan8[i] ])
838                 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
839                         block + i*16, stride);
840             else if(((int16_t*)block)[i*16])
841                 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
842                         block + i*16, stride);
843         }
844     }
845
846     for(j=1; j<3; j++){
847         for(i=j*16+4; i<j*16+8; i++){
848             if(nnzc[ scan8[i+4] ])
849                 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
850                         block + i*16, stride);
851             else if(((int16_t*)block)[i*16])
852                 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
853                         block + i*16, stride);
854         }
855     }
856 }
857
858 void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
859         int qmul)
860 {
861     double ftmp[10];
862     uint64_t tmp[2];
863     DECLARE_VAR_ALL64;
864
865     __asm__ volatile (
866         ".set       noreorder                                           \n\t"
867         "dli        %[tmp0],    0x08                                    \n\t"
868         MMI_LDC1(%[ftmp3], %[input], 0x18)
869         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
870         MMI_LDC1(%[ftmp2], %[input], 0x10)
871         "dli        %[tmp0],    0x20                                    \n\t"
872         MMI_LDC1(%[ftmp1], %[input], 0x08)
873         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
874         MMI_LDC1(%[ftmp0], %[input], 0x00)
875         "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
876         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
877         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
878         "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
879         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
880         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
881         "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
882         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
883         "psubh      %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
884         "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
885         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
886         "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
887         "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
888         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
889         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
890         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
891         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
892         "punpckhwd  %[ftmp2],   %[ftmp3],       %[ftmp0]                \n\t"
893         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
894         "mov.d      %[ftmp0],   %[ftmp4]                                \n\t"
895         "punpcklwd  %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
896         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
897         "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
898         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
899         "psubh      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
900         "mov.d      %[ftmp1],   %[ftmp2]                                \n\t"
901         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
902         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
903         "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
904         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
905         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
906         "mov.d      %[ftmp1],   %[ftmp4]                                \n\t"
907         "daddi      %[tmp0],    %[qmul],        -0x7fff                 \n\t"
908         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
909         "bgtz       %[tmp0],    1f                                      \n\t"
910         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
911         "ori        %[tmp0],    $0,             0x80                    \n\t"
912         "dsll       %[tmp0],    %[tmp0],        0x10                    \n\t"
913         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ff_pw_1]              \n\t"
914         "daddu      %[qmul],    %[qmul],        %[tmp0]                 \n\t"
915         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ff_pw_1]              \n\t"
916         "punpckhhw  %[ftmp5],   %[ftmp2],       %[ff_pw_1]              \n\t"
917         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ff_pw_1]              \n\t"
918         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
919         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
920         "pmaddhw    %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
921         "pmaddhw    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
922         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
923         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
924         "psraw      %[ftmp0],   %[ftmp0],       %[ftmp8]                \n\t"
925         "psraw      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
926         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
927         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
928         "packsswh   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
929         "packsswh   %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
930         "dmfc1      %[tmp1],    %[ftmp0]                                \n\t"
931         "dsrl       %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
932         "mfc1       %[input],   %[ftmp0]                                \n\t"
933         "sh         %[tmp1],    0x00(%[output])                         \n\t"
934         "sh         %[input],   0x80(%[output])                         \n\t"
935         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
936         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
937         "sh         %[tmp1],    0x20(%[output])                         \n\t"
938         "sh         %[input],   0xa0(%[output])                         \n\t"
939         "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
940         "dsrl       %[ftmp2],   %[ftmp2],       %[ftmp9]                \n\t"
941         "mfc1       %[input],   %[ftmp2]                                \n\t"
942         "sh         %[tmp1],    0x40(%[output])                         \n\t"
943         "sh         %[input],   0xc0(%[output])                         \n\t"
944         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
945         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
946         "sh         %[tmp1],    0x60(%[output])                         \n\t"
947         "sh         %[input],   0xe0(%[output])                         \n\t"
948         "punpckhhw  %[ftmp1],   %[ftmp3],       %[ff_pw_1]              \n\t"
949         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ff_pw_1]              \n\t"
950         "punpckhhw  %[ftmp5],   %[ftmp4],       %[ff_pw_1]              \n\t"
951         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ff_pw_1]              \n\t"
952         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
953         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
954         "pmaddhw    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
955         "pmaddhw    %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
956         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
957         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
958         "psraw      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
959         "psraw      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
960         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
961         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
962         "packsswh   %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
963         "packsswh   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
964         "dmfc1      %[tmp1],    %[ftmp3]                                \n\t"
965         "dsrl       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
966         "mfc1       %[input],   %[ftmp3]                                \n\t"
967         "sh         %[tmp1],    0x100(%[output])                        \n\t"
968         "sh         %[input],   0x180(%[output])                        \n\t"
969         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
970         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
971         "sh         %[tmp1],    0x120(%[output])                        \n\t"
972         "sh         %[input],   0x1a0(%[output])                        \n\t"
973         "dmfc1      %[tmp1],    %[ftmp4]                                \n\t"
974         "dsrl       %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
975         "mfc1       %[input],   %[ftmp4]                                \n\t"
976         "sh         %[tmp1],    0x140(%[output])                        \n\t"
977         "sh         %[input],   0x1c0(%[output])                        \n\t"
978         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
979         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
980         "sh         %[tmp1],    0x160(%[output])                        \n\t"
981         "j          2f                                                  \n\t"
982         "sh         %[input],   0x1e0(%[output])                        \n\t"
983         "1:                                                             \n\t"
984         "ori        %[tmp0],    $0,             0x1f                    \n\t"
985 #if HAVE_LOONGSON3
986         "clz        %[tmp1],    %[qmul]                                 \n\t"
987 #elif HAVE_LOONGSON2
988 #endif
989         "ori        %[input],   $0,             0x07                    \n\t"
990         "dsubu      %[tmp1],    %[tmp0],        %[tmp1]                 \n\t"
991         "ori        %[tmp0],    $0,             0x80                    \n\t"
992         "dsll       %[tmp0],    %[tmp0],        0x10                    \n\t"
993         "daddu      %[qmul],    %[qmul],        %[tmp0]                 \n\t"
994         "dsubu      %[tmp0],    %[tmp1],        %[input]                \n\t"
995         "movn       %[tmp1],    %[input],       %[tmp0]                 \n\t"
996         PTR_ADDIU  "%[input],   %[input],       0x01                    \n\t"
997         "andi       %[tmp0],    %[tmp1],        0xff                    \n\t"
998         "srlv       %[qmul],    %[qmul],        %[tmp0]                 \n\t"
999         PTR_SUBU   "%[input],   %[input],       %[tmp1]                 \n\t"
1000         "mtc1       %[input],   %[ftmp6]                                \n\t"
1001         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ff_pw_1]              \n\t"
1002         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ff_pw_1]              \n\t"
1003         "punpckhhw  %[ftmp5],   %[ftmp2],       %[ff_pw_1]              \n\t"
1004         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ff_pw_1]              \n\t"
1005         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
1006         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1007         "pmaddhw    %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
1008         "pmaddhw    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1009         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1010         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1011         "psraw      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
1012         "psraw      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1013         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
1014         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1015         "packsswh   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
1016         "packsswh   %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1017         "dmfc1      %[tmp1],    %[ftmp0]                                \n\t"
1018         "dsrl       %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
1019         "sh         %[tmp1],    0x00(%[output])                         \n\t"
1020         "mfc1       %[input],   %[ftmp0]                                \n\t"
1021         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1022         "sh         %[input],   0x80(%[output])                         \n\t"
1023         "sh         %[tmp1],    0x20(%[output])                         \n\t"
1024         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1025         "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
1026         "sh         %[input],   0xa0(%[output])                         \n\t"
1027         "dsrl       %[ftmp2],   %[ftmp2],       %[ftmp9]                \n\t"
1028         "sh         %[tmp1],    0x40(%[output])                         \n\t"
1029         "mfc1       %[input],   %[ftmp2]                                \n\t"
1030         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1031         "sh         %[input],   0xc0(%[output])                         \n\t"
1032         "sh         %[tmp1],    0x60(%[output])                         \n\t"
1033         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1034         "sh         %[input],   0xe0(%[output])                         \n\t"
1035         "punpckhhw  %[ftmp1],   %[ftmp3],       %[ff_pw_1]              \n\t"
1036         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ff_pw_1]              \n\t"
1037         "punpckhhw  %[ftmp5],   %[ftmp4],       %[ff_pw_1]              \n\t"
1038         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ff_pw_1]              \n\t"
1039         "mtc1       %[qmul],    %[ftmp7]                                \n\t"
1040         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1041         "pmaddhw    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1042         "pmaddhw    %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
1043         "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1044         "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1045         "psraw      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
1046         "psraw      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1047         "psraw      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
1048         "psraw      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1049         "packsswh   %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
1050         "packsswh   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1051         "dmfc1      %[tmp1],    %[ftmp3]                                \n\t"
1052         "dsrl       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
1053         "mfc1       %[input],   %[ftmp3]                                \n\t"
1054         "sh         %[tmp1],    0x100(%[output])                        \n\t"
1055         "sh         %[input],   0x180(%[output])                        \n\t"
1056         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1057         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1058         "sh         %[tmp1],    0x120(%[output])                        \n\t"
1059         "sh         %[input],   0x1a0(%[output])                        \n\t"
1060         "dmfc1      %[tmp1],    %[ftmp4]                                \n\t"
1061         "dsrl       %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
1062         "mfc1       %[input],   %[ftmp4]                                \n\t"
1063         "sh         %[tmp1],    0x140(%[output])                        \n\t"
1064         "sh         %[input],   0x1c0(%[output])                        \n\t"
1065         "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1066         PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1067         "sh         %[tmp1],    0x160(%[output])                        \n\t"
1068         "sh         %[input],   0x1e0(%[output])                        \n\t"
1069         "2:                                                             \n\t"
1070         ".set       reorder                                             \n\t"
1071         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1072           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1073           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1074           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1075           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1076           [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
1077           RESTRICT_ASM_ALL64
1078           [output]"+&r"(output),            [input]"+&r"(input),
1079           [qmul]"+&r"(qmul)
1080         : [ff_pw_1]"f"(ff_pw_1)
1081         : "memory"
1082     );
1083 }
1084
1085 void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1086 {
1087     int temp[8];
1088     int t[8];
1089
1090     temp[0] = block[0] + block[16];
1091     temp[1] = block[0] - block[16];
1092     temp[2] = block[32] + block[48];
1093     temp[3] = block[32] - block[48];
1094     temp[4] = block[64] + block[80];
1095     temp[5] = block[64] - block[80];
1096     temp[6] = block[96] + block[112];
1097     temp[7] = block[96] - block[112];
1098
1099     t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1100     t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1101     t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1102     t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1103     t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1104     t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1105     t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1106     t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1107
1108     block[  0]= (t[0]*qmul + 128) >> 8;
1109     block[ 32]= (t[1]*qmul + 128) >> 8;
1110     block[ 64]= (t[2]*qmul + 128) >> 8;
1111     block[ 96]= (t[3]*qmul + 128) >> 8;
1112     block[ 16]= (t[4]*qmul + 128) >> 8;
1113     block[ 48]= (t[5]*qmul + 128) >> 8;
1114     block[ 80]= (t[6]*qmul + 128) >> 8;
1115     block[112]= (t[7]*qmul + 128) >> 8;
1116 }
1117
1118 void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1119 {
1120     int a,b,c,d;
1121
1122     d = block[0] - block[16];
1123     a = block[0] + block[16];
1124     b = block[32] - block[48];
1125     c = block[32] + block[48];
1126     block[0] = ((a+c)*qmul) >> 7;
1127     block[16]= ((d+b)*qmul) >> 7;
1128     block[32]= ((a-c)*qmul) >> 7;
1129     block[48]= ((d-b)*qmul) >> 7;
1130 }
1131
1132 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1133         int log2_denom, int weight, int offset)
1134 {
1135     int y;
1136     double ftmp[8];
1137     DECLARE_VAR_ALL64;
1138
1139     offset <<= log2_denom;
1140
1141     if (log2_denom)
1142         offset += 1 << (log2_denom - 1);
1143
1144     for (y=0; y<height; y++, block+=stride) {
1145         __asm__ volatile (
1146             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1147             MMI_LDC1(%[ftmp1], %[block0], 0x00)
1148             MMI_LDC1(%[ftmp2], %[block1], 0x00)
1149             "mtc1       %[weight],  %[ftmp3]                            \n\t"
1150             "mtc1       %[offset],  %[ftmp4]                            \n\t"
1151             "mtc1       %[log2_denom],              %[ftmp5]            \n\t"
1152             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1153             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1154             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
1155             "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
1156             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1157             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1158             "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1159             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1160             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1161             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t"
1162             "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1163             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp4]            \n\t"
1164             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1165             "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1166             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1167             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1168             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1169             "psrah      %[ftmp2],   %[ftmp2],       %[ftmp5]            \n\t"
1170             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1171             "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1172             MMI_SDC1(%[ftmp1], %[block0], 0x00)
1173             MMI_SDC1(%[ftmp2], %[block1], 0x00)
1174             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1175               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1176               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1177               [ftmp6]"=&f"(ftmp[6]),
1178               RESTRICT_ASM_ALL64
1179               [ftmp7]"=&f"(ftmp[7])
1180             : [block0]"r"(block),           [block1]"r"(block+8),
1181               [weight]"r"(weight),          [offset]"r"(offset),
1182               [log2_denom]"r"(log2_denom)
1183             : "memory"
1184         );
1185     }
1186 }
1187
1188 void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
1189         ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1190         int offset)
1191 {
1192     int y;
1193     double ftmp[9];
1194     DECLARE_VAR_ALL64;
1195
1196     offset = ((offset + 1) | 1) << log2_denom;
1197
1198     for (y=0; y<height; y++, dst+=stride, src+=stride) {
1199         __asm__ volatile (
1200             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1201             MMI_LDC1(%[ftmp1], %[src0], 0x00)
1202             MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1203             "mtc1       %[weights], %[ftmp3]                            \n\t"
1204             "mtc1       %[weightd], %[ftmp4]                            \n\t"
1205             "mtc1       %[offset],  %[ftmp5]                            \n\t"
1206             "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1207             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1208             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1209             "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1210             "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1211             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1212             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1213             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1214             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1215             "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1216             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1217             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1218             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1219             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1220             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1221             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1222             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1223             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1224             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1225             MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1226             MMI_LDC1(%[ftmp1], %[src1], 0x00)
1227             MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1228             "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1229             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1230             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1231             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1232             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1233             "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1234             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1235             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1236             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1237             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1238             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1239             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1240             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1241             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1242             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1243             MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1244             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1245               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1246               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1247               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1248               RESTRICT_ASM_ALL64
1249               [ftmp8]"=&f"(ftmp[8])
1250             : [dst0]"r"(dst),               [dst1]"r"(dst+8),
1251               [src0]"r"(src),               [src1]"r"(src+8),
1252               [weights]"r"(weights),        [weightd]"r"(weightd),
1253               [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1254             : "memory"
1255         );
1256     }
1257 }
1258
1259 void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1260         int log2_denom, int weight, int offset)
1261 {
1262     int y;
1263     double ftmp[6];
1264     DECLARE_VAR_ALL64;
1265
1266     offset <<= log2_denom;
1267
1268     if (log2_denom)
1269         offset += 1 << (log2_denom - 1);
1270
1271     for (y=0; y<height; y++, block+=stride) {
1272         __asm__ volatile (
1273             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1274             MMI_LDC1(%[ftmp1], %[block], 0x00)
1275             "mtc1       %[weight],  %[ftmp2]                            \n\t"
1276             "mtc1       %[offset],  %[ftmp3]                            \n\t"
1277             "mtc1       %[log2_denom],              %[ftmp5]            \n\t"
1278             "pshufh     %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1279             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1280             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
1281             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1282             "pmullh     %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"
1283             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1284             "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1285             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1286             "psrah      %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t"
1287             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1288             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1289             MMI_SDC1(%[ftmp1], %[block], 0x00)
1290             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1291               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1292               [ftmp4]"=&f"(ftmp[4]),
1293               RESTRICT_ASM_ALL64
1294               [ftmp5]"=&f"(ftmp[5])
1295             : [block]"r"(block),            [weight]"r"(weight),
1296               [offset]"r"(offset),          [log2_denom]"r"(log2_denom)
1297             : "memory"
1298         );
1299     }
1300 }
1301
1302 void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
1303         ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1304         int offset)
1305 {
1306     int y;
1307     double ftmp[9];
1308     DECLARE_VAR_ALL64;
1309
1310     offset = ((offset + 1) | 1) << log2_denom;
1311
1312     for (y=0; y<height; y++, dst+=stride, src+=stride) {
1313         __asm__ volatile (
1314             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1315             MMI_LDC1(%[ftmp1], %[src], 0x00)
1316             MMI_LDC1(%[ftmp2], %[dst], 0x00)
1317             "mtc1       %[weights], %[ftmp3]                            \n\t"
1318             "mtc1       %[weightd], %[ftmp4]                            \n\t"
1319             "mtc1       %[offset],  %[ftmp5]                            \n\t"
1320             "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1321             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1322             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1323             "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1324             "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1325             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1326             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1327             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1328             "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1329             "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1330             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1331             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1332             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1333             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1334             "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1335             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1336             "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1337             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1338             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1339             MMI_SDC1(%[ftmp1], %[dst], 0x00)
1340             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1341               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1342               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1343               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1344               RESTRICT_ASM_ALL64
1345               [ftmp8]"=&f"(ftmp[8])
1346             : [dst]"r"(dst),                [src]"r"(src),
1347               [weights]"r"(weights),        [weightd]"r"(weightd),
1348               [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1349             : "memory"
1350         );
1351     }
1352 }
1353
1354 void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1355         int log2_denom, int weight, int offset)
1356 {
1357     int y;
1358     double ftmp[5];
1359     DECLARE_VAR_LOW32;
1360
1361     offset <<= log2_denom;
1362
1363     if (log2_denom)
1364         offset += 1 << (log2_denom - 1);
1365
1366     for (y=0; y<height; y++, block+=stride) {
1367         __asm__ volatile (
1368             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1369             MMI_ULWC1(%[ftmp1], %[block], 0x00)
1370             "mtc1       %[weight],  %[ftmp2]                            \n\t"
1371             "mtc1       %[offset],  %[ftmp3]                            \n\t"
1372             "mtc1       %[log2_denom],              %[ftmp4]            \n\t"
1373             "pshufh     %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1374             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1375             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1376             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1377             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1378             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1379             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1380             MMI_SWC1(%[ftmp1], %[block], 0x00)
1381             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1382               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1383               RESTRICT_ASM_LOW32
1384               [ftmp4]"=&f"(ftmp[4])
1385             : [block]"r"(block),            [weight]"r"(weight),
1386               [offset]"r"(offset),          [log2_denom]"r"(log2_denom)
1387             : "memory"
1388         );
1389     }
1390 }
1391
1392 void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
1393         ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1394         int offset)
1395 {
1396     int y;
1397     double ftmp[7];
1398     DECLARE_VAR_LOW32;
1399
1400     offset = ((offset + 1) | 1) << log2_denom;
1401
1402     for (y=0; y<height; y++, dst+=stride, src+=stride) {
1403         __asm__ volatile (
1404             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1405             MMI_ULWC1(%[ftmp1], %[src], 0x00)
1406             MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1407             "mtc1       %[weight],  %[ftmp3]                            \n\t"
1408             "mtc1       %[weightd], %[ftmp4]                            \n\t"
1409             "mtc1       %[offset],  %[ftmp5]                            \n\t"
1410             "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1411             "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1412             "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1413             "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1414             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1415             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1416             "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1417             "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1418             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1419             "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1420             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1421             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1422             MMI_SWC1(%[ftmp1], %[dst], 0x00)
1423             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1424               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1425               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1426               RESTRICT_ASM_LOW32
1427               [ftmp6]"=&f"(ftmp[6])
1428             : [dst]"r"(dst),                [src]"r"(src),
1429               [weight]"r"(weights),         [weightd]"r"(weightd),
1430               [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1431             : "memory"
1432         );
1433     }
1434 }
1435
1436 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1437         int8_t *tc0)
1438 {
1439     double ftmp[12];
1440     mips_reg addr[2];
1441     DECLARE_VAR_LOW32;
1442     DECLARE_VAR_ALL64;
1443     DECLARE_VAR_ADDRT;
1444
1445     __asm__ volatile (
1446         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
1447         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1448         PTR_ADDU   "%[addr1],   %[stride],      %[addr0]                \n\t"
1449         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1450         PTR_SUBU   "%[addr1],   $0,             %[addr1]                \n\t"
1451         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1452         PTR_ADDU   "%[addr1],   %[addr1],       %[pix]                  \n\t"
1453         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1454         MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1455         MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1456         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1457         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1458         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1459         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1460         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1461         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1462         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1463         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1464         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1465         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1466         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1467         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1468         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1469         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1470         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1471         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1472         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1473         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1474         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1475         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1476         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1477         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1478         "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
1479         MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1480         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1481         "punpcklbh  %[ftmp9],   %[ftmp5],       %[ftmp5]                \n\t"
1482         "pcmpgtb    %[ftmp5],   %[ftmp9],       %[ftmp4]                \n\t"
1483         MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1484         "and        %[ftmp10],  %[ftmp5],       %[ftmp8]                \n\t"
1485         "psubusb    %[ftmp8],   %[ftmp4],       %[ftmp2]                \n\t"
1486         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp4]                \n\t"
1487         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1488         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1489         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1490         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1491         "and        %[ftmp5],   %[ftmp10],      %[ftmp9]                \n\t"
1492         "psubb      %[ftmp8],   %[ftmp5],       %[ftmp7]                \n\t"
1493         "and        %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1494         "pavgb      %[ftmp5],   %[ftmp2],       %[ftmp3]                \n\t"
1495         MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1496         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1497         "xor        %[ftmp5],   %[ftmp5],       %[ftmp11]               \n\t"
1498         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1499         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1500         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp7]                \n\t"
1501         "paddusb    %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
1502         "pmaxub     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1503         "pminub     %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
1504         MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1505         MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1506         "psubusb    %[ftmp4],   %[ftmp5],       %[ftmp3]                \n\t"
1507         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp5]                \n\t"
1508         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1509         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1510         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
1511         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1512         "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1513         "and        %[ftmp6],   %[ftmp9],       %[ftmp7]                \n\t"
1514         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1515         "pavgb      %[ftmp7],   %[ftmp2],       %[ftmp3]                \n\t"
1516         MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1517         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1518         "xor        %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1519         "and        %[ftmp7],   %[ftmp7],       %[ff_pb_1]              \n\t"
1520         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1521         "psubusb    %[ftmp7],   %[ftmp4],       %[ftmp6]                \n\t"
1522         "paddusb    %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1523         "pmaxub     %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1524         "pminub     %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1525         MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1526         "xor        %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1527         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1528         "and        %[ftmp6],   %[ftmp6],       %[ff_pb_1]              \n\t"
1529         "xor        %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1530         "xor        %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
1531         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
1532         "pavgb      %[ftmp4],   %[ftmp4],       %[ff_pb_3]              \n\t"
1533         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
1534         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1535         "paddusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1536         "psubusb    %[ftmp7],   %[ff_pb_A1],    %[ftmp4]                \n\t"
1537         "psubusb    %[ftmp4],   %[ftmp4],       %[ff_pb_A1]             \n\t"
1538         "pminub     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1539         "pminub     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
1540         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1541         "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1542         "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1543         "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1544         MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1545         MMI_SDC1(%[ftmp3], %[pix], 0x00)
1546         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1547           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1548           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1549           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1550           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1551           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1552           RESTRICT_ASM_LOW32
1553           RESTRICT_ASM_ALL64
1554           RESTRICT_ASM_ADDRT
1555           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1])
1556         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1557           [alpha]"r"((mips_reg)alpha),      [beta]"r"((mips_reg)beta),
1558           [tc0]"r"(tc0),                    [ff_pb_1]"f"(ff_pb_1),
1559           [ff_pb_3]"f"(ff_pb_3),            [ff_pb_A1]"f"(ff_pb_A1)
1560         : "memory"
1561     );
1562 }
1563
1564 static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1565         int beta)
1566 {
1567     DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1568     double ftmp[16];
1569     uint64_t tmp[1];
1570     mips_reg addr[3];
1571     DECLARE_VAR_ALL64;
1572     DECLARE_VAR_ADDRT;
1573
1574     __asm__ volatile (
1575         "ori        %[tmp0],    $0,             0x01                    \n\t"
1576         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1577         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
1578         PTR_SLL    "%[addr0],   %[stride],      0x02                    \n\t"
1579         PTR_ADDU   "%[addr2],   %[stride],      %[stride]               \n\t"
1580         PTR_ADDIU  "%[alpha],   %[alpha],       -0x01                   \n\t"
1581         PTR_SLL    "%[ftmp11],  %[ftmp9],       %[ftmp9]                \n\t"
1582         "bltz       %[alpha],   1f                                      \n\t"
1583         PTR_ADDU   "%[addr1],   %[addr2],       %[stride]               \n\t"
1584         PTR_ADDIU  "%[beta],    %[beta],        -0x01                   \n\t"
1585         "bltz       %[beta],    1f                                      \n\t"
1586         PTR_SUBU   "%[addr0],   $0,             %[addr0]                \n\t"
1587         PTR_ADDU   "%[addr0],   %[addr0],       %[pix]                  \n\t"
1588         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1589         MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1590         MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1591         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1592         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1593         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1594         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1595         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1596         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1597         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1598         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1599         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1600         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1601         MMI_SDC1(%[ftmp5], %[stack], 0x10)
1602         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1603         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1604         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1605         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1606         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1607         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1608         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1609         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1610         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1611         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1612         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1613         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1614         MMI_LDC1(%[ftmp5], %[stack], 0x10)
1615         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1616         "ldc1       %[ftmp10],  %[ff_pb_1]                              \n\t"
1617         MMI_SDC1(%[ftmp8], %[stack], 0x20)
1618         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1619         "psubusb    %[ftmp8],   %[ftmp3],       %[ftmp2]                \n\t"
1620         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
1621         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp3]                \n\t"
1622         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1623         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1624         MMI_LDC1(%[ftmp15], %[stack], 0x20)
1625         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1626         "and        %[ftmp7],   %[ftmp7],       %[ftmp15]               \n\t"
1627         MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1628         "psubusb    %[ftmp8],   %[ftmp15],      %[ftmp2]                \n\t"
1629         "psubusb    %[ftmp5],   %[ftmp2],       %[ftmp15]               \n\t"
1630         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1631         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1632         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
1633         "and        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1634         MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1635         MMI_SDC1(%[ftmp5], %[stack], 0x30)
1636         "psubusb    %[ftmp8],   %[ftmp14],      %[ftmp3]                \n\t"
1637         "psubusb    %[ftmp5],   %[ftmp3],       %[ftmp14]               \n\t"
1638         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1639         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1640         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
1641         "and        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1642         MMI_SDC1(%[ftmp5], %[stack], 0x40)
1643         "pavgb      %[ftmp5],   %[ftmp15],      %[ftmp1]                \n\t"
1644         "pavgb      %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1645         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1646         MMI_SDC1(%[ftmp6], %[stack], 0x10)
1647         "paddb      %[ftmp7],   %[ftmp15],      %[ftmp1]                \n\t"
1648         "paddb      %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1649         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1650         "mov.d      %[ftmp8],   %[ftmp7]                                \n\t"
1651         MMI_SDC1(%[ftmp7], %[stack], 0x00)
1652         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
1653         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1654         "xor        %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1655         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1656         "psubb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1657         "pavgb      %[ftmp6],   %[ftmp15],      %[ftmp4]                \n\t"
1658         "psubb      %[ftmp7],   %[ftmp15],      %[ftmp4]                \n\t"
1659         "paddb      %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1660         "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1661         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1662         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1663         MMI_LDC1(%[ftmp13], %[stack], 0x10)
1664         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
1665         "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp11]               \n\t"
1666         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp13]               \n\t"
1667         "pavgb      %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1668         "xor        %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1669         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1670         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
1671         "xor        %[ftmp8],   %[ftmp2],       %[ftmp4]                \n\t"
1672         "pavgb      %[ftmp7],   %[ftmp2],       %[ftmp4]                \n\t"
1673         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1674         "psubb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1675         MMI_LDC1(%[ftmp13], %[stack], 0x30)
1676         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
1677         MMI_LDC1(%[ftmp12], %[stack], 0x20)
1678         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1679         "xor        %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
1680         "and        %[ftmp6],   %[ftmp6],       %[ftmp13]               \n\t"
1681         "and        %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1682         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1683         "xor        %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
1684         MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1685         MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1686         "paddb      %[ftmp7],   %[ftmp15],      %[ftmp6]                \n\t"
1687         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1688         MMI_LDC1(%[ftmp12], %[stack], 0x00)
1689         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1690         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1691         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1692         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1693         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1694         "xor        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1695         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1696         MMI_LDC1(%[ftmp12], %[stack], 0x30)
1697         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1698         "xor        %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
1699         "xor        %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1700         "and        %[ftmp5],   %[ftmp5],       %[ftmp12]               \n\t"
1701         "and        %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1702         "xor        %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
1703         "xor        %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1704         MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1705         MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1706         "pavgb      %[ftmp5],   %[ftmp14],      %[ftmp4]                \n\t"
1707         "pavgb      %[ftmp6],   %[ftmp3],       %[ftmp2]                \n\t"
1708         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1709         MMI_SDC1(%[ftmp6], %[stack], 0x10)
1710         "paddb      %[ftmp7],   %[ftmp14],      %[ftmp4]                \n\t"
1711         "paddb      %[ftmp8],   %[ftmp3],       %[ftmp2]                \n\t"
1712         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1713         "mov.d      %[ftmp8],   %[ftmp7]                                \n\t"
1714         MMI_SDC1(%[ftmp7], %[stack], 0x00)
1715         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
1716         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1717         "xor        %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1718         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1719         "psubb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1720         "pavgb      %[ftmp6],   %[ftmp14],      %[ftmp1]                \n\t"
1721         "paddb      %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1722         "psubb      %[ftmp7],   %[ftmp14],      %[ftmp1]                \n\t"
1723         "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1724         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1725         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1726         MMI_LDC1(%[ftmp12], %[stack], 0x10)
1727         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1728         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1729         "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp11]               \n\t"
1730         "pavgb      %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1731         "xor        %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1732         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1733         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
1734         "xor        %[ftmp8],   %[ftmp3],       %[ftmp1]                \n\t"
1735         "pavgb      %[ftmp7],   %[ftmp3],       %[ftmp1]                \n\t"
1736         "and        %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1737         MMI_LDC1(%[ftmp12], %[stack], 0x40)
1738         "psubb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1739         MMI_LDC1(%[ftmp13], %[stack], 0x20)
1740         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
1741         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1742         "xor        %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
1743         "and        %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1744         "and        %[ftmp7],   %[ftmp7],       %[ftmp13]               \n\t"
1745         "xor        %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1746         "xor        %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
1747         MMI_SDC1(%[ftmp6], %[pix], 0x00)
1748         MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1749         "paddb      %[ftmp7],   %[ftmp14],      %[ftmp6]                \n\t"
1750         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1751         MMI_LDC1(%[ftmp12], %[stack], 0x00)
1752         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1753         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1754         "paddb      %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1755         "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1756         "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1757         "xor        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1758         "and        %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1759         MMI_LDC1(%[ftmp12], %[stack], 0x40)
1760         "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1761         "xor        %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
1762         "xor        %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1763         "and        %[ftmp5],   %[ftmp5],       %[ftmp12]               \n\t"
1764         "and        %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1765         "xor        %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
1766         "xor        %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1767         MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1768         MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1769         "1:                                                             \n\t"
1770         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1771           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1772           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1773           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1774           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1775           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1776           [ftmp12]"=&f"(ftmp[12]),          [ftmp13]"=&f"(ftmp[13]),
1777           [ftmp14]"=&f"(ftmp[14]),          [ftmp15]"=&f"(ftmp[15]),
1778           [tmp0]"=&r"(tmp[0]),
1779           RESTRICT_ASM_ALL64
1780           RESTRICT_ASM_ADDRT
1781           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
1782           [addr2]"=&r"(addr[2]),
1783           [alpha]"+&r"(alpha),              [beta]"+&r"(beta)
1784         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1785           [stack]"r"(stack),                [ff_pb_1]"m"(ff_pb_1)
1786         : "memory"
1787     );
1788 }
1789
1790 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1791         int beta, int8_t *tc0)
1792 {
1793     double ftmp[9];
1794     mips_reg addr[1];
1795     DECLARE_VAR_LOW32;
1796     DECLARE_VAR_ALL64;
1797     DECLARE_VAR_ADDRT;
1798
1799     __asm__ volatile (
1800         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1801         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1802         "or         %[addr0],   $0,             %[pix]                  \n\t"
1803         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1804         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1805         MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1806         MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1807         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1808         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1809
1810         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1811         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1812         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1813         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1814         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1815         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1816         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1817         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1818         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1819         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1820         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1821         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1822         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1823         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1824         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1825         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1826         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1827         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1828         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1829         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1830         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1831         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1832         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1833         MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1834         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1835         "and        %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1836         "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1837         "xor        %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1838         "xor        %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1839         "and        %[ftmp6],   %[ftmp6],       %[ff_pb_1]              \n\t"
1840         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
1841         "xor        %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
1842         "pavgb      %[ftmp4],   %[ftmp4],       %[ff_pb_3]              \n\t"
1843         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
1844         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1845         "paddusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1846         "psubusb    %[ftmp7],   %[ff_pb_A1],    %[ftmp4]                \n\t"
1847         "psubusb    %[ftmp4],   %[ftmp4],       %[ff_pb_A1]             \n\t"
1848         "pminub     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1849         "pminub     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
1850         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1851         "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1852         "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1853         "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1854
1855         MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1856         MMI_SDC1(%[ftmp3], %[pix], 0x00)
1857         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1858           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1859           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1860           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1861           [ftmp8]"=&f"(ftmp[8]),
1862           RESTRICT_ASM_LOW32
1863           RESTRICT_ASM_ALL64
1864           RESTRICT_ASM_ADDRT
1865           [addr0]"=&r"(addr[0])
1866         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1867           [alpha]"r"(alpha),                [beta]"r"(beta),
1868           [tc0]"r"(tc0),                    [ff_pb_1]"f"(ff_pb_1),
1869           [ff_pb_3]"f"(ff_pb_3),            [ff_pb_A1]"f"(ff_pb_A1)
1870         : "memory"
1871     );
1872 }
1873
1874 void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1875         int beta)
1876 {
1877     double ftmp[9];
1878     mips_reg addr[1];
1879     DECLARE_VAR_ALL64;
1880     DECLARE_VAR_ADDRT;
1881
1882     __asm__ volatile (
1883         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1884         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1885         "or         %[addr0],   $0,             %[pix]                  \n\t"
1886         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1887         PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1888         MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1889         MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1890         MMI_LDC1(%[ftmp3], %[pix], 0x00)
1891         MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1892
1893         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1894         "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1895         "mtc1       %[beta],    %[ftmp6]                                \n\t"
1896         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1897         "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1898         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1899         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1900         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1901         "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1902         "or         %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1903         "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1904         "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1905         "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1906         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1907         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1908         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1909         "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1910         "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1911         "or         %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1912         "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1913         "or         %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1914         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1915         "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1916         "mov.d      %[ftmp6],   %[ftmp2]                                \n\t"
1917         "mov.d      %[ftmp7],   %[ftmp3]                                \n\t"
1918         "xor        %[ftmp5],   %[ftmp2],       %[ftmp4]                \n\t"
1919         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1920         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1921         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1922         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
1923         "xor        %[ftmp5],   %[ftmp3],       %[ftmp1]                \n\t"
1924         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1925         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
1926         "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
1927         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1928         "psubb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1929         "psubb      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1930         "and        %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
1931         "and        %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
1932         "paddb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1933         "paddb      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1934
1935         MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1936         MMI_SDC1(%[ftmp3], %[pix], 0x00)
1937         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1938           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1939           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1940           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1941           [ftmp8]"=&f"(ftmp[8]),
1942           RESTRICT_ASM_ALL64
1943           RESTRICT_ASM_ADDRT
1944           [addr0]"=&r"(addr[0])
1945         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1946           [alpha]"r"(alpha),                [beta]"r"(beta),
1947           [ff_pb_1]"f"(ff_pb_1)
1948         : "memory"
1949     );
1950 }
1951
1952 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1953         int8_t *tc0)
1954 {
1955     double ftmp[11];
1956     mips_reg addr[6];
1957     DECLARE_VAR_LOW32;
1958
1959     __asm__ volatile (
1960         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1961         "addi       %[beta],    %[beta],        -0x01                   \n\t"
1962         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
1963         PTR_ADDI   "%[pix],     %[pix],         -0x02                   \n\t"
1964         PTR_ADDU   "%[addr1],   %[addr0],       %[stride]               \n\t"
1965         PTR_ADDU   "%[addr2],   %[addr0],       %[addr0]                \n\t"
1966         "or         %[addr5],   $0,             %[pix]                  \n\t"
1967         PTR_ADDU   "%[pix],     %[pix],         %[addr1]                \n\t"
1968         MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1969         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
1970         MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1971         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
1972         MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1973         MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1974         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
1975         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
1976         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
1977         "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp1]                \n\t"
1978         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
1979         MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1980         PTR_ADDU   "%[addr4],   %[pix],         %[addr0]                \n\t"
1981         MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1982         PTR_ADDU   "%[addr3],   %[pix],         %[addr1]                \n\t"
1983         MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1984         PTR_ADDU   "%[addr4],   %[pix],         %[addr2]                \n\t"
1985         MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1986         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1987         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1988         "mov.d      %[ftmp6],   %[ftmp4]                                \n\t"
1989         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1990         "punpckhhw  %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1991         "punpckhwd  %[ftmp1],   %[ftmp0],       %[ftmp4]                \n\t"
1992         "punpckhwd  %[ftmp3],   %[ftmp2],       %[ftmp6]                \n\t"
1993         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
1994         "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1995         "mov.d      %[ftmp9],   %[ftmp0]                                \n\t"
1996         "mov.d      %[ftmp10],  %[ftmp3]                                \n\t"
1997
1998         "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1999         "mtc1       %[alpha],   %[ftmp4]                                \n\t"
2000         "mtc1       %[beta],    %[ftmp5]                                \n\t"
2001         "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
2002         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
2003         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2004         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2005         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp1]                \n\t"
2006         "psubusb    %[ftmp7],   %[ftmp1],       %[ftmp2]                \n\t"
2007         "or         %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2008         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2009         "psubusb    %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
2010         "psubusb    %[ftmp4],   %[ftmp0],       %[ftmp1]                \n\t"
2011         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2012         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2013         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2014         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
2015         "psubusb    %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
2016         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2017         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2018         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2019         "xor        %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2020         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2021         MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2022         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2023         "and        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2024         "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2025         "xor        %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
2026         "xor        %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2027         "and        %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
2028         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
2029         "xor        %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
2030         "pavgb      %[ftmp3],   %[ftmp3],       %[ff_pb_3]              \n\t"
2031         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
2032         "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2033         "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2034         "psubusb    %[ftmp6],   %[ff_pb_A1],    %[ftmp3]                \n\t"
2035         "psubusb    %[ftmp3],   %[ftmp3],       %[ff_pb_A1]             \n\t"
2036         "pminub     %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
2037         "pminub     %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
2038         "psubusb    %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
2039         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2040         "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2041         "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2042
2043         "punpckhwd  %[ftmp4],   %[ftmp9],       %[ftmp9]                \n\t"
2044         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2045         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2046         "punpcklbh  %[ftmp0],   %[ftmp9],       %[ftmp1]                \n\t"
2047         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
2048         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2049         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2050         MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2051         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2052         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2053         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2054         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2055         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2056         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2057         "punpckhwd  %[ftmp3],   %[ftmp10],      %[ftmp10]               \n\t"
2058         MMI_USWC1(%[ftmp0], %[pix], 0x00)
2059         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2060         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2061         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2062         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2063         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2064         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2065         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2066         PTR_ADDU   "%[addr3],   %[pix],         %[addr0]                \n\t"
2067         PTR_ADDU   "%[addr4],   %[pix],         %[addr1]                \n\t"
2068         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2069         MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2070         PTR_ADDU   "%[addr3],   %[pix],         %[addr2]                \n\t"
2071         "punpckhwd  %[ftmp9],   %[ftmp4],       %[ftmp4]                \n\t"
2072         MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2073         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2074           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2075           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2076           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2077           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2078           [ftmp10]"=&f"(ftmp[10]),
2079           RESTRICT_ASM_LOW32
2080           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2081           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2082           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2083           [pix]"+&r"(pix)
2084         : [alpha]"r"(alpha),                [beta]"r"(beta),
2085           [stride]"r"((mips_reg)stride),    [tc0]"r"(tc0),
2086           [ff_pb_1]"f"(ff_pb_1),            [ff_pb_3]"f"(ff_pb_3),
2087           [ff_pb_A1]"f"(ff_pb_A1)
2088         : "memory"
2089     );
2090 }
2091
2092 void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2093         int beta)
2094 {
2095     double ftmp[11];
2096     mips_reg addr[6];
2097     DECLARE_VAR_LOW32;
2098
2099     __asm__ volatile (
2100         "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
2101         "addi       %[beta],    %[beta],        -0x01                   \n\t"
2102         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2103         PTR_ADDI   "%[pix],     %[pix],         -0x02                   \n\t"
2104         PTR_ADDU   "%[addr1],   %[addr0],       %[stride]               \n\t"
2105         PTR_ADDU   "%[addr2],   %[addr0],       %[addr0]                \n\t"
2106         "or         %[addr5],   $0,             %[pix]                  \n\t"
2107         PTR_ADDU   "%[pix],     %[pix],         %[addr1]                \n\t"
2108         MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2109         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2110         MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2111         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2112         MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2113         MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2114         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2115         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2116         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2117         "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp1]                \n\t"
2118         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2119         MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2120         PTR_ADDU   "%[addr4],   %[pix],         %[addr0]                \n\t"
2121         MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2122         PTR_ADDU   "%[addr3],   %[pix],         %[addr1]                \n\t"
2123         MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2124         PTR_ADDU   "%[addr4],   %[pix],         %[addr2]                \n\t"
2125         MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2126         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2127         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
2128         "mov.d      %[ftmp6],   %[ftmp4]                                \n\t"
2129         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2130         "punpckhhw  %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
2131         "punpckhwd  %[ftmp1],   %[ftmp0],       %[ftmp4]                \n\t"
2132         "punpckhwd  %[ftmp3],   %[ftmp2],       %[ftmp6]                \n\t"
2133         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2134         "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2135
2136         "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
2137         "mtc1       %[alpha],   %[ftmp4]                                \n\t"
2138         "mtc1       %[beta],    %[ftmp5]                                \n\t"
2139         "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
2140         "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
2141         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2142         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2143         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp1]                \n\t"
2144         "psubusb    %[ftmp7],   %[ftmp1],       %[ftmp2]                \n\t"
2145         "or         %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2146         "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2147         "psubusb    %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
2148         "psubusb    %[ftmp4],   %[ftmp0],       %[ftmp1]                \n\t"
2149         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2150         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2151         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2152         "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
2153         "psubusb    %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
2154         "or         %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2155         "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2156         "or         %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2157         "xor        %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2158         "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2159         "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
2160         "mov.d      %[ftmp6],   %[ftmp2]                                \n\t"
2161         "xor        %[ftmp4],   %[ftmp1],       %[ftmp3]                \n\t"
2162         "and        %[ftmp4],   %[ftmp4],       %[ff_pb_1]              \n\t"
2163         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2164         "psubusb    %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
2165         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
2166         "xor        %[ftmp4],   %[ftmp2],       %[ftmp0]                \n\t"
2167         "and        %[ftmp4],   %[ftmp4],       %[ff_pb_1]              \n\t"
2168         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
2169         "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
2170         "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2171         "psubb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2172         "psubb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2173         "and        %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
2174         "and        %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
2175         "paddb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2176         "paddb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2177
2178         "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2179         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2180         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2181         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2182         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2183         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2184         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2185         MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2186         PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2187         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2188         PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2189         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2190         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2191         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2192         "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2193         MMI_USWC1(%[ftmp0], %[pix], 0x00)
2194         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2195         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2196         PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2197         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2198         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2199         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2200         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2201         PTR_ADDU   "%[addr3],   %[pix],         %[addr0]                \n\t"
2202         PTR_ADDU   "%[addr4],   %[pix],         %[addr1]                \n\t"
2203         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2204         PTR_ADDU   "%[addr3],   %[pix],         %[addr2]                \n\t"
2205         MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2206         "punpckhwd  %[ftmp9],   %[ftmp4],       %[ftmp4]                \n\t"
2207         MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2208         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2209           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2210           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2211           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2212           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2213           [ftmp10]"=&f"(ftmp[10]),
2214           RESTRICT_ASM_LOW32
2215           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2216           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2217           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2218           [pix]"+&r"(pix)
2219         : [alpha]"r"(alpha),                [beta]"r"(beta),
2220           [stride]"r"((mips_reg)stride),    [ff_pb_1]"f"(ff_pb_1)
2221         : "memory"
2222     );
2223 }
2224
2225 void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2226         int8_t *tc0)
2227 {
2228     if ((tc0[0] & tc0[1]) >= 0)
2229         ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2230     if ((tc0[2] & tc0[3]) >= 0)
2231         ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2232 }
2233
2234 void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2235         int beta)
2236 {
2237     deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2238     deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2239 }
2240
2241 void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2242         int8_t *tc0)
2243 {
2244     DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2245     double ftmp[9];
2246     mips_reg addr[8];
2247     DECLARE_VAR_LOW32;
2248     DECLARE_VAR_ALL64;
2249
2250     __asm__ volatile (
2251         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2252         PTR_ADDI   "%[addr1],   %[pix],         -0x4                    \n\t"
2253         PTR_ADDU   "%[addr2],   %[stride],      %[addr0]                \n\t"
2254         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2255         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2256         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2257         MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2258         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2259         MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2260         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2261         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2262         MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2263         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2264         MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2265         PTR_ADDU   "%[addr3],   %[addr4],       %[addr2]                \n\t"
2266         MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2267         PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2268         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2269         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2270         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2271         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2272         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2273         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2274         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2275         MMI_SDC1(%[ftmp1], %[stack], 0x10)
2276         MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2277         PTR_ADDU   "%[addr7],   %[addr6],       %[addr6]                \n\t"
2278         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2279         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2280         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2281         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2282         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2283         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2284         MMI_LDC1(%[ftmp8], %[stack], 0x10)
2285         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2286         MMI_SDC1(%[ftmp0], %[stack], 0x00)
2287         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp8]                \n\t"
2288         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
2289         "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp5]                \n\t"
2290         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2291         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2292         "punpckhwd  %[ftmp5],   %[ftmp7],       %[ftmp3]                \n\t"
2293         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
2294         "punpckhwd  %[ftmp3],   %[ftmp1],       %[ftmp2]                \n\t"
2295         "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2296         MMI_SDC1(%[ftmp1], %[stack], 0x10)
2297         MMI_SDC1(%[ftmp3], %[stack], 0x20)
2298         MMI_SDC1(%[ftmp7], %[stack], 0x30)
2299         MMI_SDC1(%[ftmp5], %[stack], 0x40)
2300         MMI_SDC1(%[ftmp6], %[stack], 0x50)
2301         PTR_ADDU   "%[addr1],   %[addr1],       %[addr7]                \n\t"
2302         PTR_ADDU   "%[addr4],   %[addr4],       %[addr7]                \n\t"
2303         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2304         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2305         MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2306         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2307         MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2308         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2309         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2310         MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2311         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2312         MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2313         PTR_ADDU   "%[addr3],   %[addr4],       %[addr2]                \n\t"
2314         MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2315         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2316         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2317         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2318         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2319         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2320         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2321         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2322         MMI_SDC1(%[ftmp1], %[stack], 0x18)
2323         MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2324         "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2325         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2326         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2327         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2328         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2329         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2330         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2331         MMI_LDC1(%[ftmp8], %[stack], 0x18)
2332         MMI_SDC1(%[ftmp0], %[stack], 0x08)
2333         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp8]                \n\t"
2334         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
2335         "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp5]                \n\t"
2336         "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2337         "punpckhwd  %[ftmp5],   %[ftmp7],       %[ftmp3]                \n\t"
2338         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
2339         "punpckhwd  %[ftmp3],   %[ftmp1],       %[ftmp2]                \n\t"
2340         "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2341         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2342         MMI_SDC1(%[ftmp1], %[stack], 0x18)
2343         MMI_SDC1(%[ftmp3], %[stack], 0x28)
2344         MMI_SDC1(%[ftmp7], %[stack], 0x38)
2345         MMI_SDC1(%[ftmp5], %[stack], 0x48)
2346         MMI_SDC1(%[ftmp6], %[stack], 0x58)
2347         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2348           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2349           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2350           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2351           [ftmp8]"=&f"(ftmp[8]),
2352           RESTRICT_ASM_ALL64
2353           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2354           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2355           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2356           [addr6]"=&r"(addr[6]),            [addr7]"=&r"(addr[7])
2357         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2358           [stack]"r"(stack)
2359         : "memory"
2360     );
2361
2362     ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2363
2364     __asm__ volatile (
2365         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2366         PTR_ADDI   "%[addr1],   %[pix],         -0x02                   \n\t"
2367         PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2368         PTR_ADDU   "%[addr2],   %[addr0],       %[stride]               \n\t"
2369         PTR_ADDU   "%[addr7],   %[addr6],       %[addr6]                \n\t"
2370         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2371         MMI_LDC1(%[ftmp0], %[stack], 0x10)
2372         MMI_LDC1(%[ftmp1], %[stack], 0x20)
2373         MMI_LDC1(%[ftmp2], %[stack], 0x30)
2374         MMI_LDC1(%[ftmp3], %[stack], 0x40)
2375         "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2376         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2377         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2378         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2379         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2380         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2381         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2382         MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2383         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2384         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2385         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2386         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2387         MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2388         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2389         "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2390         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2391         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2392         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2393         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2394         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2395         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2396         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2397         PTR_ADDU   "%[addr3],   %[addr4],       %[addr0]                \n\t"
2398         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2399         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2400         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2401         MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2402         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2403         "punpckhwd  %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2404         PTR_ADDU   "%[addr1],   %[addr1],       %[addr7]                \n\t"
2405         MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2406         PTR_ADDU   "%[addr4],   %[addr4],       %[addr7]                \n\t"
2407         MMI_LDC1(%[ftmp0], %[stack], 0x18)
2408         MMI_LDC1(%[ftmp1], %[stack], 0x28)
2409         MMI_LDC1(%[ftmp2], %[stack], 0x38)
2410         MMI_LDC1(%[ftmp3], %[stack], 0x48)
2411         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2412         "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2413         PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2414         "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2415         "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2416         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2417         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2418         PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2419         "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2420         "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2421         MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2422         "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2423         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2424         MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2425         MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2426         "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2427         "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2428         MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2429         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2430         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2431         PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2432         "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2433         "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2434         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2435         PTR_ADDU   "%[addr3],   %[addr4],       %[addr0]                \n\t"
2436         "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2437         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2438         MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2439         MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2440         PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2441         "punpckhwd  %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2442         MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2443         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2444           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2445           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2446           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2447           [ftmp8]"=&f"(ftmp[8]),
2448           RESTRICT_ASM_LOW32
2449           RESTRICT_ASM_ALL64
2450           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2451           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2452           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2453           [addr6]"=&r"(addr[6]),            [addr7]"=&r"(addr[7])
2454         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2455           [stack]"r"(stack)
2456         : "memory"
2457     );
2458 }
2459
2460 void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2461         int beta)
2462 {
2463     DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2464     DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2465     double ftmp[9];
2466     mips_reg addr[7];
2467     DECLARE_VAR_ALL64;
2468
2469     __asm__ volatile (
2470         PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2471         PTR_ADDI   "%[addr1],   %[pix],         -0x04                   \n\t"
2472         PTR_ADDU   "%[addr2],   %[addr0],       %[stride]               \n\t"
2473         PTR_ADDU   "%[addr3],   %[addr0],       %[addr0]                \n\t"
2474         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2475         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2476         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2477         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2478         MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2479         MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2480         PTR_ADDU   "%[addr5],   %[addr4],       %[stride]               \n\t"
2481         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2482         PTR_ADDU   "%[addr6],   %[addr4],       %[addr0]                \n\t"
2483         MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2484         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2485         MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2486         MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2487         PTR_ADDU   "%[addr5],   %[addr4],       %[addr3]                \n\t"
2488         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2489         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2490         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2491         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2492         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2493         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2494         MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2495         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2496         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2497         MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2498         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2499         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2500         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2501         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2502         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2503         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2504         MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2505         MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2506         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2507         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2508         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2509         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2510         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2511         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2512         MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2513         MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2514         MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2515         MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2516         MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2517         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2518         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2519         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2520         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2521         PTR_ADDU   "%[addr5],   %[addr3],       %[addr3]                \n\t"
2522         MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2523         MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2524         MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2525         MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2526         PTR_ADDU   "%[addr1],   %[addr1],       %[addr5]                \n\t"
2527         PTR_ADDU   "%[addr4],   %[addr4],       %[addr5]                \n\t"
2528         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2529         MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2530         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2531         MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2532         MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2533         PTR_ADDU   "%[addr5],   %[addr4],       %[stride]               \n\t"
2534         MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2535         PTR_ADDU   "%[addr6],   %[addr4],       %[addr0]                \n\t"
2536         MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2537         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2538         MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2539         MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2540         PTR_ADDU   "%[addr5],   %[addr4],       %[addr3]                \n\t"
2541         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2542         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2543         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2544         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2545         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2546         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2547         MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2548         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2549         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2550         MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2551         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2552         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2553         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2554         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2555         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2556         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2557         MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2558         MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2559         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2560         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2561         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2562         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2563         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2564         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2565         MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2566         MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2567         MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2568         MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2569         MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2570         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2571         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2572         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2573         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2574         MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2575         MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2576         MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2577         MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2578         PTR_S      "%[addr1],   0x00(%[pdat])                           \n\t"
2579         PTR_S      "%[addr2],   0x08(%[pdat])                           \n\t"
2580         PTR_S      "%[addr0],   0x10(%[pdat])                           \n\t"
2581         PTR_S      "%[addr3],   0x18(%[pdat])                           \n\t"
2582         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2583           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2584           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2585           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2586           [ftmp8]"=&f"(ftmp[8]),
2587           RESTRICT_ASM_ALL64
2588           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2589           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2590           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2591           [addr6]"=&r"(addr[6])
2592         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2593           [ptmp]"r"(ptmp),                  [pdat]"r"(pdat)
2594         : "memory"
2595     );
2596
2597     ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2598
2599     __asm__ volatile (
2600         PTR_L      "%[addr1],   0x00(%[pdat])                           \n\t"
2601         PTR_L      "%[addr2],   0x08(%[pdat])                           \n\t"
2602         PTR_L      "%[addr0],   0x10(%[pdat])                           \n\t"
2603         PTR_L      "%[addr3],   0x18(%[pdat])                           \n\t"
2604         PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2605         MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2606         MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2607         MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2608         MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2609         MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2610         MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2611         MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2612         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2613         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2614         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2615         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2616         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2617         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2618         MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2619         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2620         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2621         MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2622         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2623         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2624         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2625         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2626         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2627         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2628         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2629         MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2630         MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2631         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2632         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2633         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2634         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2635         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2636         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2637         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2638         MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2639         PTR_ADDU   "%[addr6],   %[addr4],       %[stride]               \n\t"
2640         MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2641         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2642         MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2643         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2644         MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2645         MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2646         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2647         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2648         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2649         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2650         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2651         MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2652         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2653         MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2654         PTR_ADDU   "%[addr6],   %[addr4],       %[addr3]                \n\t"
2655         MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2656         PTR_ADDU   "%[addr5],   %[addr3],       %[addr3]                \n\t"
2657         MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2658         PTR_SUBU   "%[addr1],   %[addr1],       %[addr5]                \n\t"
2659         PTR_SUBU   "%[addr4],   %[addr4],       %[addr5]                \n\t"
2660         MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2661         MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2662         MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2663         MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2664         MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2665         MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2666         MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2667         "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2668         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2669         "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2670         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2671         "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2672         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2673         MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2674         "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2675         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2676         MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2677         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2678         "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2679         "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2680         "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2681         "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2682         "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2683         "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2684         MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2685         MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2686         "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2687         "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2688         "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2689         "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2690         "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2691         "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2692         PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2693         MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2694         PTR_ADDU   "%[addr6],   %[addr4],       %[stride]               \n\t"
2695         MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2696         PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2697         MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2698         PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2699         MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2700         MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2701         PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2702         "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2703         "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2704         "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2705         "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2706         MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2707         PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2708         MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2709         PTR_ADDU   "%[addr6],   %[addr4],       %[addr3]                \n\t"
2710         MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2711         MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2712         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2713           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2714           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2715           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2716           [ftmp8]"=&f"(ftmp[8]),
2717           RESTRICT_ASM_ALL64
2718           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2719           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2720           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2721           [addr6]"=&r"(addr[6])
2722         : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2723           [ptmp]"r"(ptmp),                  [pdat]"r"(pdat)
2724         : "memory"
2725     );
2726 }