]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/h264chroma_mmi.c
Merge commit 'f8060865f3e1a16c62e0d337ef0979b6ee4ba457'
[ffmpeg] / libavcodec / mips / h264chroma_mmi.c
1 /*
2  * Loongson SIMD optimized h264chroma
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 #include "h264chroma_mips.h"
26 #include "constants.h"
27 #include "libavutil/mips/mmiutils.h"
28
29 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
30         int h, int x, int y)
31 {
32     int A = 64, B, C, D, E;
33     double ftmp[10];
34     uint64_t tmp[1];
35
36     if (!(x || y)) {
37         /* x=0, y=0, A=64 */
38         __asm__ volatile (
39             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
40             "dli        %[tmp0],    0x06                               \n\t"
41             "mtc1       %[tmp0],    %[ftmp4]                           \n\t"
42
43             "1:                                                        \n\t"
44             MMI_ULDC1(%[ftmp1], %[src], 0x00)
45             "addi       %[h],       %[h],           -0x04              \n\t"
46             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
47             MMI_ULDC1(%[ftmp5], %[src], 0x00)
48             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
49             MMI_ULDC1(%[ftmp6], %[src], 0x00)
50             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
51             MMI_ULDC1(%[ftmp7], %[src], 0x00)
52
53             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]           \n\t"
54             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
55             "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
56             "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
57             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
58             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
59             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
60             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
61             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
62             MMI_SDC1(%[ftmp1], %[dst], 0x00)
63
64             "punpcklbh  %[ftmp2],   %[ftmp5],       %[ftmp0]           \n\t"
65             "punpckhbh  %[ftmp3],   %[ftmp5],       %[ftmp0]           \n\t"
66             "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
67             "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
68             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
69             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
70             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
71             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
72             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
73             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
74             MMI_SDC1(%[ftmp1], %[dst], 0x00)
75
76             "punpcklbh  %[ftmp2],   %[ftmp6],       %[ftmp0]           \n\t"
77             "punpckhbh  %[ftmp3],   %[ftmp6],       %[ftmp0]           \n\t"
78             "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
79             "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
80             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
81             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
82             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
83             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
84             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
85             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
86             MMI_SDC1(%[ftmp1], %[dst], 0x00)
87
88             "punpcklbh  %[ftmp2],   %[ftmp7],       %[ftmp0]           \n\t"
89             "punpckhbh  %[ftmp3],   %[ftmp7],       %[ftmp0]           \n\t"
90             "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
91             "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
92             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
93             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
94             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
95             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
96             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
97             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
98             MMI_SDC1(%[ftmp1], %[dst], 0x00)
99
100             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
101             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
102             "bnez       %[h],       1b                                 \n\t"
103             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
104               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
105               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
106               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
107               [tmp0]"=&r"(tmp[0]),
108               [dst]"+&r"(dst),              [src]"+&r"(src),
109               [h]"+&r"(h)
110             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32)
111             : "memory"
112         );
113     } else {
114         if (x && y) {
115             /* x!=0, y!=0 */
116             D = x * y;
117             B = (x << 3) - D;
118             C = (y << 3) - D;
119             A = 64 - D - B - C;
120
121             __asm__ volatile (
122                 "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
123                 "dli        %[tmp0],    0x06                               \n\t"
124                 "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
125                 "pshufh     %[B],       %[B],           %[ftmp0]           \n\t"
126                 "mtc1       %[tmp0],    %[ftmp9]                           \n\t"
127                 "pshufh     %[C],       %[C],           %[ftmp0]           \n\t"
128                 "pshufh     %[D],       %[D],           %[ftmp0]           \n\t"
129
130                 "1:                                                        \n\t"
131                 MMI_ULDC1(%[ftmp1], %[src], 0x00)
132                 MMI_ULDC1(%[ftmp2], %[src], 0x01)
133                 PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
134                 MMI_ULDC1(%[ftmp3], %[src], 0x00)
135                 MMI_ULDC1(%[ftmp4], %[src], 0x01)
136                 "addi       %[h],       %[h],           -0x02              \n\t"
137
138                 "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
139                 "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
140                 "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
141                 "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
142                 "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
143                 "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
144                 "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
145                 "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
146                 "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
147                 "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
148
149                 "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
150                 "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
151                 "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
152                 "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
153                 "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
154                 "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
155                 "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
156                 "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
157                 "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
158                 "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
159
160                 "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]           \n\t"
161                 "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
162                 "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
163                 "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
164                 "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
165                 "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
166                 "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
167                 MMI_SDC1(%[ftmp1], %[dst], 0x00)
168                 PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
169
170                 MMI_ULDC1(%[ftmp1], %[src], 0x00)
171                 MMI_ULDC1(%[ftmp2], %[src], 0x01)
172                 PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
173                 MMI_ULDC1(%[ftmp3], %[src], 0x00)
174                 MMI_ULDC1(%[ftmp4], %[src], 0x01)
175
176                 "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
177                 "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
178                 "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
179                 "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
180                 "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
181                 "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
182                 "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
183                 "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
184                 "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
185                 "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
186
187                 "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
188                 "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
189                 "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
190                 "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
191                 "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
192                 "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
193                 "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
194                 "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
195                 "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
196                 "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
197
198                 "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]           \n\t"
199                 "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
200                 "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
201                 "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
202                 "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
203                 "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
204                 "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
205                 MMI_SDC1(%[ftmp1], %[dst], 0x00)
206                 PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
207
208                 "bnez       %[h],       1b                                 \n\t"
209                 : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
210                   [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
211                   [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
212                   [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
213                   [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
214                   [tmp0]"=&r"(tmp[0]),
215                   [dst]"+&r"(dst),              [src]"+&r"(src),
216                   [h]"+&r"(h)
217                 : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
218                   [A]"f"(A),                    [B]"f"(B),
219                   [C]"f"(C),                    [D]"f"(D)
220                 : "memory"
221             );
222         } else {
223             if (x) {
224                 /* x!=0, y==0 */
225                 E = x << 3;
226                 A = 64 - E;
227
228                 __asm__ volatile (
229                     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
230                     "dli        %[tmp0],    0x06                               \n\t"
231                     "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
232                     "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
233                     "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
234
235                     "1:                                                        \n\t"
236                     MMI_ULDC1(%[ftmp1], %[src], 0x00)
237                     MMI_ULDC1(%[ftmp2], %[src], 0x01)
238                     "addi       %[h],       %[h],           -0x01              \n\t"
239                     PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
240
241                     "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
242                     "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
243                     "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
244                     "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
245                     "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
246                     "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
247                     "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
248                     "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
249                     "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
250                     "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
251
252                     "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
253                     "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
254                     "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
255                     "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
256                     "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
257                     MMI_SDC1(%[ftmp1], %[dst], 0x00)
258                     PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
259                     "bnez       %[h],       1b                                 \n\t"
260                     : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
261                       [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
262                       [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
263                       [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
264                       [tmp0]"=&r"(tmp[0]),
265                       [dst]"+&r"(dst),              [src]"+&r"(src),
266                       [h]"+&r"(h)
267                     : [stride]"r"((mips_reg)stride),
268                       [ff_pw_32]"f"(ff_pw_32),
269                       [A]"f"(A),                    [E]"f"(E)
270                     : "memory"
271                 );
272             } else {
273                 /* x==0, y!=0 */
274                 E = y << 3;
275                 A = 64 - E;
276
277                 __asm__ volatile (
278                     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
279                     "dli        %[tmp0],    0x06                               \n\t"
280                     "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
281                     "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
282                     "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
283
284                     "1:                                                        \n\t"
285                     MMI_ULDC1(%[ftmp1], %[src], 0x00)
286                     PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
287                     MMI_ULDC1(%[ftmp2], %[src], 0x00)
288                     "addi       %[h],       %[h],           -0x01              \n\t"
289
290                     "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
291                     "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
292                     "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
293                     "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
294                     "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
295                     "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
296                     "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
297                     "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
298                     "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
299                     "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
300
301                     "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
302                     "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
303                     "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
304                     "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
305                     "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
306                     MMI_SDC1(%[ftmp1], %[dst], 0x00)
307
308                     PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
309                     "bnez       %[h],       1b                                 \n\t"
310                     : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
311                       [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
312                       [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
313                       [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
314                       [tmp0]"=&r"(tmp[0]),
315                       [dst]"+&r"(dst),              [src]"+&r"(src),
316                       [h]"+&r"(h)
317                     : [stride]"r"((mips_reg)stride),
318                       [ff_pw_32]"f"(ff_pw_32),
319                       [A]"f"(A),                    [E]"f"(E)
320                     : "memory"
321                 );
322             }
323         }
324     }
325 }
326
327 void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
328         int h, int x, int y)
329 {
330     int A = 64, B, C, D, E;
331     double ftmp[10];
332     uint64_t tmp[1];
333
334     if(!(x || y)){
335         /* x=0, y=0, A=64 */
336         __asm__ volatile (
337             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
338             "dli        %[tmp0],    0x06                                \n\t"
339             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
340             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
341
342             "1:                                                         \n\t"
343             MMI_ULDC1(%[ftmp1], %[src], 0x00)
344             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
345             MMI_ULDC1(%[ftmp5], %[src], 0x00)
346             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
347
348             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
349             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
350             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
351             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
352             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
353             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
354             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
355             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
356             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
357             MMI_LDC1(%[ftmp2], %[dst], 0x00)
358             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
359             MMI_SDC1(%[ftmp1], %[dst], 0x00)
360             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
361
362             "punpcklbh  %[ftmp2],   %[ftmp5],       %[ftmp0]            \n\t"
363             "punpckhbh  %[ftmp3],   %[ftmp5],       %[ftmp0]            \n\t"
364             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
365             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
366             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
367             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
368             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
369             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
370             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
371             MMI_LDC1(%[ftmp2], %[dst], 0x00)
372             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
373             MMI_SDC1(%[ftmp1], %[dst], 0x00)
374             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
375
376             "addi       %[h],       %[h],           -0x02               \n\t"
377             "bnez       %[h],       1b                                  \n\t"
378             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
379               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
380               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
381               [tmp0]"=&r"(tmp[0]),
382               [dst]"+&r"(dst),              [src]"+&r"(src),
383               [h]"+&r"(h)
384             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
385               [A]"f"(A)
386             : "memory"
387         );
388     } else {
389         if(x && y) {
390             /* x!=0, y!=0 */
391             D = x * y;
392             B = (x << 3) - D;
393             C = (y << 3) - D;
394             A = 64 - D - B - C;
395             __asm__ volatile (
396                 "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
397                 "dli        %[tmp0],    0x06                           \n\t"
398                 "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
399                 "pshufh     %[B],       %[B],           %[ftmp0]       \n\t"
400                 "mtc1       %[tmp0],    %[ftmp9]                       \n\t"
401                 "pshufh     %[C],       %[C],           %[ftmp0]       \n\t"
402                 "pshufh     %[D],       %[D],           %[ftmp0]       \n\t"
403
404                 "1:                                                    \n\t"
405                 MMI_ULDC1(%[ftmp1], %[src], 0x00)
406                 MMI_ULDC1(%[ftmp2], %[src], 0x01)
407                 PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
408                 MMI_ULDC1(%[ftmp3], %[src], 0x00)
409                 MMI_ULDC1(%[ftmp4], %[src], 0x01)
410                 "addi       %[h],       %[h],           -0x01          \n\t"
411
412                 "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]       \n\t"
413                 "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]       \n\t"
414                 "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]       \n\t"
415                 "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]       \n\t"
416                 "pmullh     %[ftmp5],   %[ftmp5],       %[A]           \n\t"
417                 "pmullh     %[ftmp7],   %[ftmp7],       %[B]           \n\t"
418                 "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]       \n\t"
419                 "pmullh     %[ftmp6],   %[ftmp6],       %[A]           \n\t"
420                 "pmullh     %[ftmp8],   %[ftmp8],       %[B]           \n\t"
421                 "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]       \n\t"
422
423                 "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]       \n\t"
424                 "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]       \n\t"
425                 "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]       \n\t"
426                 "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]       \n\t"
427                 "pmullh     %[ftmp5],   %[ftmp5],       %[C]           \n\t"
428                 "pmullh     %[ftmp7],   %[ftmp7],       %[D]           \n\t"
429                 "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]       \n\t"
430                 "pmullh     %[ftmp6],   %[ftmp6],       %[C]           \n\t"
431                 "pmullh     %[ftmp8],   %[ftmp8],       %[D]           \n\t"
432                 "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]       \n\t"
433
434                 "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]       \n\t"
435                 "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]       \n\t"
436                 "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
437                 "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
438                 "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]       \n\t"
439                 "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]       \n\t"
440                 "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
441                 MMI_LDC1(%[ftmp2], %[dst], 0x00)
442                 "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
443                 MMI_SDC1(%[ftmp1], %[dst], 0x00)
444                 PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
445                 "bnez       %[h],       1b                             \n\t"
446                 : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
447                   [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
448                   [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
449                   [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
450                   [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
451                   [tmp0]"=&r"(tmp[0]),
452                   [dst]"+&r"(dst),              [src]"+&r"(src),
453                   [h]"+&r"(h)
454                 : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
455                   [A]"f"(A),                    [B]"f"(B),
456                   [C]"f"(C),                    [D]"f"(D)
457                 : "memory"
458             );
459         } else {
460             if(x) {
461                 /* x!=0, y==0 */
462                 E = x << 3;
463                 A = 64 - E;
464                 __asm__ volatile (
465                     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
466                     "dli        %[tmp0],    0x06                           \n\t"
467                     "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
468                     "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
469                     "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
470
471                     "1:                                                    \n\t"
472                     MMI_ULDC1(%[ftmp1], %[src], 0x00)
473                     MMI_ULDC1(%[ftmp2], %[src], 0x01)
474                     PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
475                     "addi       %[h],       %[h],           -0x01          \n\t"
476
477                     "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
478                     "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
479                     "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
480                     "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
481                     "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
482                     "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
483                     "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
484                     "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
485                     "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
486                     "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
487
488                     "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
489                     "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
490                     "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
491                     "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
492                     "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
493                     MMI_LDC1(%[ftmp2], %[dst], 0x00)
494                     "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
495                     MMI_SDC1(%[ftmp1], %[dst], 0x00)
496                     PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
497                     "bnez       %[h],       1b                             \n\t"
498                     : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
499                       [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
500                       [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
501                       [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
502                       [tmp0]"=&r"(tmp[0]),
503                       [dst]"+&r"(dst),              [src]"+&r"(src),
504                       [h]"+&r"(h)
505                     : [stride]"r"((mips_reg)stride),
506                       [ff_pw_32]"f"(ff_pw_32),
507                       [A]"f"(A),                    [E]"f"(E)
508                     : "memory"
509                 );
510             } else {
511                 /* x==0, y!=0 */
512                 E = y << 3;
513                 A = 64 - E;
514                 __asm__ volatile (
515                     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
516                     "dli        %[tmp0],    0x06                           \n\t"
517                     "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
518                     "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
519                     "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
520
521                     "1:                                                    \n\t"
522                     MMI_ULDC1(%[ftmp1], %[src], 0x00)
523                     PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
524                     MMI_ULDC1(%[ftmp2], %[src], 0x00)
525                     "addi       %[h],       %[h],           -0x01          \n\t"
526
527                     "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
528                     "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
529                     "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
530                     "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
531                     "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
532                     "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
533                     "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
534                     "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
535                     "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
536                     "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
537
538                     "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
539                     "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
540                     "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
541                     "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
542                     "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
543                     MMI_LDC1(%[ftmp2], %[dst], 0x00)
544                     "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
545                     MMI_SDC1(%[ftmp1], %[dst], 0x00)
546                     PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
547                     "bnez       %[h],       1b                             \n\t"
548                     : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
549                       [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
550                       [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
551                       [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
552                       [tmp0]"=&r"(tmp[0]),
553                       [dst]"+&r"(dst),              [src]"+&r"(src),
554                       [h]"+&r"(h)
555                     : [stride]"r"((mips_reg)stride),
556                       [ff_pw_32]"f"(ff_pw_32),
557                       [A]"f"(A),                    [E]"f"(E)
558                     : "memory"
559                 );
560             }
561         }
562     }
563 }
564
565 void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
566         int h, int x, int y)
567 {
568     const int A = (8 - x) * (8 - y);
569     const int B = x * (8 - y);
570     const int C = (8 - x) *  y;
571     const int D = x *  y;
572     const int E = B + C;
573     double ftmp[8];
574     uint64_t tmp[1];
575     mips_reg addr[1];
576     DECLARE_VAR_LOW32;
577
578     if (D) {
579         __asm__ volatile (
580             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
581             "dli        %[tmp0],    0x06                                \n\t"
582             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
583             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
584             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
585             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
586             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
587
588             "1:                                                         \n\t"
589             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
590             MMI_ULWC1(%[ftmp1], %[src], 0x00)
591             MMI_ULWC1(%[ftmp2], %[src], 0x01)
592             MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
593             MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
594
595             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
596             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
597             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
598             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
599             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
600
601             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
602             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
603             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
604             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
605             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
606
607             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
608             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
609             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
610             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
611             "addi       %[h],       %[h],           -0x01               \n\t"
612             MMI_SWC1(%[ftmp1], %[dst], 0x00)
613             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
614             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
615             "bnez       %[h],       1b                                  \n\t"
616             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
617               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
618               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
619               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
620               [tmp0]"=&r"(tmp[0]),
621               RESTRICT_ASM_LOW32
622               [addr0]"=&r"(addr[0]),
623               [dst]"+&r"(dst),              [src]"+&r"(src),
624               [h]"+&r"(h)
625             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
626               [A]"f"(A),                    [B]"f"(B),
627               [C]"f"(C),                    [D]"f"(D)
628             : "memory"
629         );
630     } else if (E) {
631         const int step = C ? stride : 1;
632
633         __asm__ volatile (
634             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
635             "dli        %[tmp0],    0x06                                \n\t"
636             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
637             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
638             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
639
640             "1:                                                         \n\t"
641             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
642             MMI_ULWC1(%[ftmp1], %[src], 0x00)
643             MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
644
645             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
646             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
647             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
648             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
649             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
650
651             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
652             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
653             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
654             "addi       %[h],       %[h],           -0x01               \n\t"
655             MMI_SWC1(%[ftmp1], %[dst], 0x00)
656             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
657             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
658             "bnez       %[h],       1b                                  \n\t"
659             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
660               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
661               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
662               [tmp0]"=&r"(tmp[0]),
663               RESTRICT_ASM_LOW32
664               [addr0]"=&r"(addr[0]),
665               [dst]"+&r"(dst),              [src]"+&r"(src),
666               [h]"+&r"(h)
667             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
668               [ff_pw_32]"f"(ff_pw_32),
669               [A]"f"(A),                    [E]"f"(E)
670             : "memory"
671         );
672     } else {
673         __asm__ volatile (
674             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
675             "dli        %[tmp0],    0x06                                \n\t"
676             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
677             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
678
679             "1:                                                         \n\t"
680             MMI_ULWC1(%[ftmp1], %[src], 0x00)
681             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
682             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
683             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
684             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
685             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
686             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
687             MMI_SWC1(%[ftmp1], %[dst], 0x00)
688             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
689
690             MMI_ULWC1(%[ftmp1], %[src], 0x00)
691             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
692             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
693             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
694             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
695             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
696             "addi       %[h],       %[h],           -0x02               \n\t"
697             MMI_SWC1(%[ftmp1], %[dst], 0x00)
698
699             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
700             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
701             "bnez       %[h],       1b                                  \n\t"
702             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
703               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
704               [tmp0]"=&r"(tmp[0]),
705               RESTRICT_ASM_LOW32
706               [dst]"+&r"(dst),              [src]"+&r"(src),
707               [h]"+&r"(h)
708             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
709               [A]"f"(A)
710             : "memory"
711         );
712     }
713 }
714
715 void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
716         int h, int x, int y)
717 {
718     const int A = (8 - x) *(8 - y);
719     const int B = x * (8 - y);
720     const int C = (8 - x) * y;
721     const int D = x * y;
722     const int E = B + C;
723     double ftmp[8];
724     uint64_t tmp[1];
725     mips_reg addr[1];
726     DECLARE_VAR_LOW32;
727
728     if (D) {
729         __asm__ volatile (
730             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
731             "dli        %[tmp0],    0x06                                \n\t"
732             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
733             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
734             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
735             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
736             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
737
738             "1:                                                         \n\t"
739             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
740             MMI_ULWC1(%[ftmp1], %[src], 0x00)
741             MMI_ULWC1(%[ftmp2], %[src], 0x01)
742             MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
743             MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
744
745             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
746             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
747             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
748             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
749             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
750
751             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
752             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
753             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
754             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
755             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
756
757             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
758             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
759             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
760             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
761             MMI_LWC1(%[ftmp2], %[dst], 0x00)
762             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
763             "addi       %[h],       %[h],           -0x01               \n\t"
764             MMI_SWC1(%[ftmp1], %[dst], 0x00)
765             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
766             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
767             "bnez       %[h],       1b                                  \n\t"
768             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
769               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
770               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
771               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
772               [tmp0]"=&r"(tmp[0]),
773               RESTRICT_ASM_LOW32
774               [addr0]"=&r"(addr[0]),
775               [dst]"+&r"(dst),              [src]"+&r"(src),
776               [h]"+&r"(h)
777             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
778               [A]"f"(A),                    [B]"f"(B),
779               [C]"f"(C),                    [D]"f"(D)
780             : "memory"
781         );
782     } else if (E) {
783         const int step = C ? stride : 1;
784
785         __asm__ volatile (
786             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
787             "dli        %[tmp0],    0x06                                \n\t"
788             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
789             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
790             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
791             "1:                                                         \n\t"
792             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
793             MMI_ULWC1(%[ftmp1], %[src], 0x00)
794             MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
795
796             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
797             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
798             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
799             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
800             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
801
802             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
803             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
804             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
805             MMI_LWC1(%[ftmp2], %[dst], 0x00)
806             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
807             "addi       %[h],       %[h],           -0x01               \n\t"
808             MMI_SWC1(%[ftmp1], %[dst], 0x00)
809             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
810             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
811             "bnez       %[h],       1b                                  \n\t"
812             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
813               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
814               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
815               [tmp0]"=&r"(tmp[0]),
816               RESTRICT_ASM_LOW32
817               [addr0]"=&r"(addr[0]),
818               [dst]"+&r"(dst),              [src]"+&r"(src),
819               [h]"+&r"(h)
820             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
821               [ff_pw_32]"f"(ff_pw_32),
822               [A]"f"(A),                    [E]"f"(E)
823             : "memory"
824         );
825     } else {
826         __asm__ volatile (
827             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
828             "dli        %[tmp0],    0x06                                \n\t"
829             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
830             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
831
832             "1:                                                         \n\t"
833             MMI_ULWC1(%[ftmp1], %[src], 0x00)
834             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
835             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
836             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
837             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
838             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
839             MMI_LWC1(%[ftmp2], %[dst], 0x00)
840             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
841             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
842             MMI_SWC1(%[ftmp1], %[dst], 0x00)
843             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
844
845             MMI_ULWC1(%[ftmp1], %[src], 0x00)
846             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
847             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
848             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
849             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
850             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
851             MMI_LWC1(%[ftmp2], %[dst], 0x00)
852             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
853             "addi       %[h],       %[h],           -0x02               \n\t"
854             MMI_SWC1(%[ftmp1], %[dst], 0x00)
855
856             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
857             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
858             "bnez       %[h],       1b                                  \n\t"
859             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
860               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
861               [tmp0]"=&r"(tmp[0]),
862               RESTRICT_ASM_LOW32
863               [dst]"+&r"(dst),              [src]"+&r"(src),
864               [h]"+&r"(h)
865             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
866               [A]"f"(A)
867             : "memory"
868         );
869     }
870 }