]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/h264chroma_mmi.c
Merge commit '2beba58e0e4bda688bf96e12413231607ceafdd4'
[ffmpeg] / libavcodec / mips / h264chroma_mmi.c
1 /*
2  * Loongson SIMD optimized h264chroma
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 #include "h264chroma_mips.h"
26 #include "constants.h"
27 #include "libavutil/mips/mmiutils.h"
28
29 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
30         int h, int x, int y)
31 {
32     const int A = (8 - x) * (8 - y);
33     const int B = x * (8 - y);
34     const int C = (8 - x) * y;
35     const int D = x * y;
36     const int E = B + C;
37     double ftmp[10];
38     uint64_t tmp[1];
39     mips_reg addr[1];
40     DECLARE_VAR_ALL64;
41
42     if (D) {
43         __asm__ volatile (
44             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
45             "dli        %[tmp0],    0x06                                \n\t"
46             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
47             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
48             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
49             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
50             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
51
52             "1:                                                         \n\t"
53             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
54             MMI_ULDC1(%[ftmp1], %[src], 0x00)
55             MMI_ULDC1(%[ftmp2], %[src], 0x01)
56             MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
57             MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
58
59             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
60             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
61             "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
62             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
63             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
64             "pmullh     %[ftmp7],   %[ftmp7],       %[B]                \n\t"
65             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
66             "pmullh     %[ftmp6],   %[ftmp6],       %[A]                \n\t"
67             "pmullh     %[ftmp8],   %[ftmp8],       %[B]                \n\t"
68             "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
69
70             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
71             "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]            \n\t"
72             "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]            \n\t"
73             "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]            \n\t"
74             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
75             "pmullh     %[ftmp7],   %[ftmp7],       %[D]                \n\t"
76             "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
77             "pmullh     %[ftmp6],   %[ftmp6],       %[C]                \n\t"
78             "pmullh     %[ftmp8],   %[ftmp8],       %[D]                \n\t"
79             "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
80
81             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
82             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
83             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
84             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
85             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
86             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
87             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
88             "addi       %[h],       %[h],           -0x01               \n\t"
89             MMI_SDC1(%[ftmp1], %[dst], 0x00)
90             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
91             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
92             "bnez       %[h],       1b                                  \n\t"
93             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
94               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
95               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
96               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
97               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
98               [tmp0]"=&r"(tmp[0]),
99               RESTRICT_ASM_ALL64
100               [addr0]"=&r"(addr[0]),
101               [dst]"+&r"(dst),              [src]"+&r"(src),
102               [h]"+&r"(h)
103             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
104               [A]"f"(A),                    [B]"f"(B),
105               [C]"f"(C),                    [D]"f"(D)
106             : "memory"
107         );
108     } else if (E) {
109         const int step = C ? stride : 1;
110
111         __asm__ volatile (
112             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
113             "dli        %[tmp0],    0x06                                \n\t"
114             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
115             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
116             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
117
118             "1:                                                         \n\t"
119             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
120             MMI_ULDC1(%[ftmp1], %[src], 0x00)
121             MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
122
123             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
124             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
125             "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t"
126             "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
127             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
128             "pmullh     %[ftmp5],   %[ftmp5],       %[E]                \n\t"
129             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]            \n\t"
130             "pmullh     %[ftmp4],   %[ftmp4],       %[A]                \n\t"
131             "pmullh     %[ftmp6],   %[ftmp6],       %[E]                \n\t"
132             "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
133
134             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
135             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
136             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
137             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
138             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
139             "addi       %[h],       %[h],           -0x01               \n\t"
140             MMI_SDC1(%[ftmp1], %[dst], 0x00)
141             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
142             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
143             "bnez       %[h],       1b                                  \n\t"
144             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
145               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
146               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
147               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
148               [tmp0]"=&r"(tmp[0]),
149               RESTRICT_ASM_ALL64
150               [addr0]"=&r"(addr[0]),
151               [dst]"+&r"(dst),              [src]"+&r"(src),
152               [h]"+&r"(h)
153             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
154               [ff_pw_32]"f"(ff_pw_32),
155               [A]"f"(A),                    [E]"f"(E)
156             : "memory"
157         );
158     } else {
159         __asm__ volatile (
160             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
161             "dli        %[tmp0],    0x06                                \n\t"
162             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
163             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
164
165             "1:                                                         \n\t"
166             MMI_ULDC1(%[ftmp1], %[src], 0x00)
167             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
168             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
169             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
170             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
171             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
172             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
173             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
174             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
175             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
176             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
177             MMI_SDC1(%[ftmp1], %[dst], 0x00)
178
179             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
180             MMI_ULDC1(%[ftmp1], %[src], 0x00)
181             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
182             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
183             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
184             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
185             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
186             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
187             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
188             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
189             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
190             "addi       %[h],       %[h],           -0x02               \n\t"
191             MMI_SDC1(%[ftmp1], %[dst], 0x00)
192
193             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
194             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
195             "bnez       %[h],       1b                                  \n\t"
196             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
197               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
198               [ftmp4]"=&f"(ftmp[4]),
199               [tmp0]"=&r"(tmp[0]),
200               RESTRICT_ASM_ALL64
201               [dst]"+&r"(dst),              [src]"+&r"(src),
202               [h]"+&r"(h)
203             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
204               [A]"f"(A)
205             : "memory"
206         );
207     }
208 }
209
210 void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
211         int h, int x, int y)
212 {
213     const int A = (8 - x) * (8 - y);
214     const int B = x * (8 - y);
215     const int C = (8 - x) * y;
216     const int D = x * y;
217     const int E = B + C;
218     double ftmp[10];
219     uint64_t tmp[1];
220     mips_reg addr[1];
221     DECLARE_VAR_ALL64;
222
223     if (D) {
224         __asm__ volatile (
225             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
226             "dli        %[tmp0],    0x06                                \n\t"
227             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
228             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
229             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
230             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
231             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
232
233             "1:                                                         \n\t"
234             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
235             MMI_ULDC1(%[ftmp1], %[src], 0x00)
236             MMI_ULDC1(%[ftmp2], %[src], 0x01)
237             MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
238             MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
239
240             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
241             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
242             "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
243             "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
244             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
245             "pmullh     %[ftmp7],   %[ftmp7],       %[B]                \n\t"
246             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
247             "pmullh     %[ftmp6],   %[ftmp6],       %[A]                \n\t"
248             "pmullh     %[ftmp8],   %[ftmp8],       %[B]                \n\t"
249             "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
250
251             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
252             "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]            \n\t"
253             "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]            \n\t"
254             "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]            \n\t"
255             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
256             "pmullh     %[ftmp7],   %[ftmp7],       %[D]                \n\t"
257             "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
258             "pmullh     %[ftmp6],   %[ftmp6],       %[C]                \n\t"
259             "pmullh     %[ftmp8],   %[ftmp8],       %[D]                \n\t"
260             "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
261
262             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
263             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
264             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
265             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
266             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
267             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
268             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
269             MMI_LDC1(%[ftmp2], %[dst], 0x00)
270             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
271             "addi       %[h],       %[h],           -0x01               \n\t"
272             MMI_SDC1(%[ftmp1], %[dst], 0x00)
273             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
274             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
275             "bnez       %[h],       1b                                  \n\t"
276             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
277               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
278               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
279               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
280               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
281               [tmp0]"=&r"(tmp[0]),
282               RESTRICT_ASM_ALL64
283               [addr0]"=&r"(addr[0]),
284               [dst]"+&r"(dst),              [src]"+&r"(src),
285               [h]"+&r"(h)
286             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
287               [A]"f"(A),                    [B]"f"(B),
288               [C]"f"(C),                    [D]"f"(D)
289             : "memory"
290         );
291     } else if (E) {
292         const int step = C ? stride : 1;
293
294         __asm__ volatile (
295             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
296             "dli        %[tmp0],    0x06                                \n\t"
297             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
298             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
299             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
300
301             "1:                                                         \n\t"
302             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
303             MMI_ULDC1(%[ftmp1], %[src], 0x00)
304             MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
305
306             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
307             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
308             "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t"
309             "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
310             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
311             "pmullh     %[ftmp5],   %[ftmp5],       %[E]                \n\t"
312             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]            \n\t"
313             "pmullh     %[ftmp4],   %[ftmp4],       %[A]                \n\t"
314             "pmullh     %[ftmp6],   %[ftmp6],       %[E]                \n\t"
315             "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
316
317             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
318             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
319             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
320             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
321             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
322             MMI_LDC1(%[ftmp2], %[dst], 0x00)
323             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
324             "addi       %[h],       %[h],           -0x01               \n\t"
325             MMI_SDC1(%[ftmp1], %[dst], 0x00)
326             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
327             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
328             "bnez       %[h],       1b                                  \n\t"
329             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
330               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
331               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
332               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
333               [tmp0]"=&r"(tmp[0]),
334               RESTRICT_ASM_ALL64
335               [addr0]"=&r"(addr[0]),
336               [dst]"+&r"(dst),              [src]"+&r"(src),
337               [h]"+&r"(h)
338             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
339               [ff_pw_32]"f"(ff_pw_32),
340               [A]"f"(A),                    [E]"f"(E)
341             : "memory"
342         );
343     } else {
344         __asm__ volatile (
345             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
346             "dli        %[tmp0],    0x06                                \n\t"
347             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
348             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
349
350             "1:                                                         \n\t"
351             MMI_ULDC1(%[ftmp1], %[src], 0x00)
352             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
353             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
354             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
355             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
356             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
357             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
358             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
359             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
360             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
361             MMI_LDC1(%[ftmp2], %[dst], 0x00)
362             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
363             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
364             MMI_SDC1(%[ftmp1], %[dst], 0x00)
365             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
366
367             MMI_ULDC1(%[ftmp1], %[src], 0x00)
368             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
369             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
370             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
371             "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
372             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
373             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
374             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
375             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
376             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
377             MMI_LDC1(%[ftmp2], %[dst], 0x00)
378             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
379             "addi       %[h],       %[h],           -0x02               \n\t"
380             MMI_SDC1(%[ftmp1], %[dst], 0x00)
381
382             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
383             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
384             "bnez       %[h],       1b                                  \n\t"
385             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
386               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
387               [ftmp4]"=&f"(ftmp[4]),
388               [tmp0]"=&r"(tmp[0]),
389               RESTRICT_ASM_ALL64
390               [dst]"+&r"(dst),              [src]"+&r"(src),
391               [h]"+&r"(h)
392             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
393               [A]"f"(A)
394             : "memory"
395         );
396     }
397 }
398
399 void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
400         int h, int x, int y)
401 {
402     const int A = (8 - x) * (8 - y);
403     const int B = x * (8 - y);
404     const int C = (8 - x) *  y;
405     const int D = x *  y;
406     const int E = B + C;
407     double ftmp[8];
408     uint64_t tmp[1];
409     mips_reg addr[1];
410     DECLARE_VAR_LOW32;
411
412     if (D) {
413         __asm__ volatile (
414             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
415             "dli        %[tmp0],    0x06                                \n\t"
416             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
417             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
418             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
419             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
420             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
421
422             "1:                                                         \n\t"
423             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
424             MMI_ULWC1(%[ftmp1], %[src], 0x00)
425             MMI_ULWC1(%[ftmp2], %[src], 0x01)
426             MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
427             MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
428
429             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
430             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
431             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
432             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
433             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
434
435             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
436             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
437             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
438             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
439             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
440
441             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
442             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
443             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
444             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
445             "addi       %[h],       %[h],           -0x01               \n\t"
446             MMI_SWC1(%[ftmp1], %[dst], 0x00)
447             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
448             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
449             "bnez       %[h],       1b                                  \n\t"
450             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
451               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
452               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
453               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
454               [tmp0]"=&r"(tmp[0]),
455               RESTRICT_ASM_LOW32
456               [addr0]"=&r"(addr[0]),
457               [dst]"+&r"(dst),              [src]"+&r"(src),
458               [h]"+&r"(h)
459             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
460               [A]"f"(A),                    [B]"f"(B),
461               [C]"f"(C),                    [D]"f"(D)
462             : "memory"
463         );
464     } else if (E) {
465         const int step = C ? stride : 1;
466
467         __asm__ volatile (
468             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
469             "dli        %[tmp0],    0x06                                \n\t"
470             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
471             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
472             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
473
474             "1:                                                         \n\t"
475             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
476             MMI_ULWC1(%[ftmp1], %[src], 0x00)
477             MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
478
479             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
480             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
481             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
482             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
483             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
484
485             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
486             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
487             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
488             "addi       %[h],       %[h],           -0x01               \n\t"
489             MMI_SWC1(%[ftmp1], %[dst], 0x00)
490             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
491             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
492             "bnez       %[h],       1b                                  \n\t"
493             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
494               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
495               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
496               [tmp0]"=&r"(tmp[0]),
497               RESTRICT_ASM_LOW32
498               [addr0]"=&r"(addr[0]),
499               [dst]"+&r"(dst),              [src]"+&r"(src),
500               [h]"+&r"(h)
501             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
502               [ff_pw_32]"f"(ff_pw_32),
503               [A]"f"(A),                    [E]"f"(E)
504             : "memory"
505         );
506     } else {
507         __asm__ volatile (
508             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
509             "dli        %[tmp0],    0x06                                \n\t"
510             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
511             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
512
513             "1:                                                         \n\t"
514             MMI_ULWC1(%[ftmp1], %[src], 0x00)
515             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
516             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
517             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
518             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
519             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
520             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
521             MMI_SWC1(%[ftmp1], %[dst], 0x00)
522             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
523
524             MMI_ULWC1(%[ftmp1], %[src], 0x00)
525             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
526             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
527             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
528             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
529             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
530             "addi       %[h],       %[h],           -0x02               \n\t"
531             MMI_SWC1(%[ftmp1], %[dst], 0x00)
532
533             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
534             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
535             "bnez       %[h],       1b                                  \n\t"
536             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
537               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
538               [tmp0]"=&r"(tmp[0]),
539               RESTRICT_ASM_LOW32
540               [dst]"+&r"(dst),              [src]"+&r"(src),
541               [h]"+&r"(h)
542             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
543               [A]"f"(A)
544             : "memory"
545         );
546     }
547 }
548
549 void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
550         int h, int x, int y)
551 {
552     const int A = (8 - x) *(8 - y);
553     const int B = x * (8 - y);
554     const int C = (8 - x) * y;
555     const int D = x * y;
556     const int E = B + C;
557     double ftmp[8];
558     uint64_t tmp[1];
559     mips_reg addr[1];
560     DECLARE_VAR_LOW32;
561
562     if (D) {
563         __asm__ volatile (
564             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
565             "dli        %[tmp0],    0x06                                \n\t"
566             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
567             "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
568             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
569             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
570             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
571
572             "1:                                                         \n\t"
573             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
574             MMI_ULWC1(%[ftmp1], %[src], 0x00)
575             MMI_ULWC1(%[ftmp2], %[src], 0x01)
576             MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
577             MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
578
579             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
580             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
581             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
582             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
583             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
584
585             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
586             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
587             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
588             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
589             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
590
591             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
592             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
593             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
594             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
595             MMI_LWC1(%[ftmp2], %[dst], 0x00)
596             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
597             "addi       %[h],       %[h],           -0x01               \n\t"
598             MMI_SWC1(%[ftmp1], %[dst], 0x00)
599             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
600             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
601             "bnez       %[h],       1b                                  \n\t"
602             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
603               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
604               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
605               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
606               [tmp0]"=&r"(tmp[0]),
607               RESTRICT_ASM_LOW32
608               [addr0]"=&r"(addr[0]),
609               [dst]"+&r"(dst),              [src]"+&r"(src),
610               [h]"+&r"(h)
611             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
612               [A]"f"(A),                    [B]"f"(B),
613               [C]"f"(C),                    [D]"f"(D)
614             : "memory"
615         );
616     } else if (E) {
617         const int step = C ? stride : 1;
618
619         __asm__ volatile (
620             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
621             "dli        %[tmp0],    0x06                                \n\t"
622             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
623             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
624             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
625             "1:                                                         \n\t"
626             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
627             MMI_ULWC1(%[ftmp1], %[src], 0x00)
628             MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
629
630             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
631             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
632             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
633             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
634             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
635
636             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
637             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
638             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
639             MMI_LWC1(%[ftmp2], %[dst], 0x00)
640             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
641             "addi       %[h],       %[h],           -0x01               \n\t"
642             MMI_SWC1(%[ftmp1], %[dst], 0x00)
643             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
644             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
645             "bnez       %[h],       1b                                  \n\t"
646             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
647               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
648               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
649               [tmp0]"=&r"(tmp[0]),
650               RESTRICT_ASM_LOW32
651               [addr0]"=&r"(addr[0]),
652               [dst]"+&r"(dst),              [src]"+&r"(src),
653               [h]"+&r"(h)
654             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
655               [ff_pw_32]"f"(ff_pw_32),
656               [A]"f"(A),                    [E]"f"(E)
657             : "memory"
658         );
659     } else {
660         __asm__ volatile (
661             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
662             "dli        %[tmp0],    0x06                                \n\t"
663             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
664             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
665
666             "1:                                                         \n\t"
667             MMI_ULWC1(%[ftmp1], %[src], 0x00)
668             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
669             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
670             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
671             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
672             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
673             MMI_LWC1(%[ftmp2], %[dst], 0x00)
674             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
675             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
676             MMI_SWC1(%[ftmp1], %[dst], 0x00)
677             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
678
679             MMI_ULWC1(%[ftmp1], %[src], 0x00)
680             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
681             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
682             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
683             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
684             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
685             MMI_LWC1(%[ftmp2], %[dst], 0x00)
686             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
687             "addi       %[h],       %[h],           -0x02               \n\t"
688             MMI_SWC1(%[ftmp1], %[dst], 0x00)
689
690             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
691             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
692             "bnez       %[h],       1b                                  \n\t"
693             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
694               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
695               [tmp0]"=&r"(tmp[0]),
696               RESTRICT_ASM_LOW32
697               [dst]"+&r"(dst),              [src]"+&r"(src),
698               [h]"+&r"(h)
699             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
700               [A]"f"(A)
701             : "memory"
702         );
703     }
704 }