2 * Loongson SIMD optimized qpeldsp
4 * Copyright (c) 2016 Loongson Technology Corporation Limited
5 * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "hpeldsp_mips.h"
25 #include "libavcodec/bit_depth_template.c"
26 #include "libavutil/mips/mmiutils.h"
27 #include "constants.h"
29 void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
30 ptrdiff_t line_size, int h)
37 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
38 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
39 MMI_ULWC1(%[ftmp1], %[pixels], 0x00)
40 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
41 MMI_ULWC1(%[ftmp2], %[pixels], 0x00)
42 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
43 MMI_ULWC1(%[ftmp3], %[pixels], 0x00)
44 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
46 PTR_ADDI "%[h], %[h], -0x04 \n\t"
48 MMI_SWC1(%[ftmp0], %[block], 0x00)
49 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
50 MMI_SWC1(%[ftmp1], %[block], 0x00)
51 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
52 MMI_SWC1(%[ftmp2], %[block], 0x00)
53 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
54 MMI_SWC1(%[ftmp3], %[block], 0x00)
55 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
58 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
59 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
61 [block]"+&r"(block), [pixels]"+&r"(pixels),
63 : [line_size]"r"((mips_reg)line_size)
68 void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels,
69 ptrdiff_t line_size, int h)
76 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
77 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
78 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
79 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
80 MMI_ULDC1(%[ftmp2], %[pixels], 0x00)
81 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
82 MMI_ULDC1(%[ftmp3], %[pixels], 0x00)
83 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
85 PTR_ADDI "%[h], %[h], -0x04 \n\t"
87 MMI_SDC1(%[ftmp0], %[block], 0x00)
88 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
89 MMI_SDC1(%[ftmp1], %[block], 0x00)
90 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
91 MMI_SDC1(%[ftmp2], %[block], 0x00)
92 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
93 MMI_SDC1(%[ftmp3], %[block], 0x00)
94 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
97 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
98 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
100 [block]"+&r"(block), [pixels]"+&r"(pixels),
102 : [line_size]"r"((mips_reg)line_size)
107 void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels,
108 ptrdiff_t line_size, int h)
115 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
116 MMI_ULDC1(%[ftmp2], %[pixels], 0x08)
117 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
118 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
119 MMI_ULDC1(%[ftmp3], %[pixels], 0x08)
120 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
121 MMI_ULDC1(%[ftmp4], %[pixels], 0x00)
122 MMI_ULDC1(%[ftmp6], %[pixels], 0x08)
123 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
124 MMI_ULDC1(%[ftmp5], %[pixels], 0x00)
125 MMI_ULDC1(%[ftmp7], %[pixels], 0x08)
126 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
128 PTR_ADDI "%[h], %[h], -0x04 \n\t"
130 MMI_SDC1(%[ftmp0], %[block], 0x00)
131 MMI_SDC1(%[ftmp2], %[block], 0x08)
132 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
133 MMI_SDC1(%[ftmp1], %[block], 0x00)
134 MMI_SDC1(%[ftmp3], %[block], 0x08)
135 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
136 MMI_SDC1(%[ftmp4], %[block], 0x00)
137 MMI_SDC1(%[ftmp6], %[block], 0x08)
138 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
139 MMI_SDC1(%[ftmp5], %[block], 0x00)
140 MMI_SDC1(%[ftmp7], %[block], 0x08)
141 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
144 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
145 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
146 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
147 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
149 [block]"+&r"(block), [pixels]"+&r"(pixels),
151 : [line_size]"r"((mips_reg)line_size)
156 void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
157 ptrdiff_t line_size, int h)
165 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
167 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
168 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
169 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
170 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
171 MMI_ULWC1(%[ftmp2], %[block], 0x00)
172 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
173 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
174 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
175 MMI_SWC1(%[ftmp0], %[block], 0x00)
176 MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
177 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
178 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
180 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
181 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
182 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
183 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
184 MMI_ULWC1(%[ftmp2], %[block], 0x00)
185 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
186 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
187 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
188 MMI_SWC1(%[ftmp0], %[block], 0x00)
189 MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
190 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
191 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
193 PTR_ADDI "%[h], %[h], -0x04 \n\t"
195 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
196 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
199 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
200 [addr2]"=&r"(addr[2]),
201 [block]"+&r"(block), [pixels]"+&r"(pixels),
203 : [line_size]"r"((mips_reg)line_size)
208 void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels,
209 ptrdiff_t line_size, int h)
217 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
219 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
220 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
221 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
222 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
223 MMI_ULDC1(%[ftmp2], %[block], 0x00)
224 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
225 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
226 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
227 MMI_SDC1(%[ftmp0], %[block], 0x00)
228 MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
229 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
230 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
232 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
233 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
234 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
235 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
236 MMI_ULDC1(%[ftmp2], %[block], 0x00)
237 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
238 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
239 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
240 MMI_SDC1(%[ftmp0], %[block], 0x00)
241 MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
242 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
243 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
245 PTR_ADDI "%[h], %[h], -0x04 \n\t"
247 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
248 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
251 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
252 [addr2]"=&r"(addr[2]),
253 [block]"+&r"(block), [pixels]"+&r"(pixels),
255 : [line_size]"r"((mips_reg)line_size)
260 void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels,
261 ptrdiff_t line_size, int h)
269 PTR_ADDI "%[h], %[h], -0x04 \n\t"
270 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
271 MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
272 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
273 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
274 MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
275 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
276 MMI_ULDC1(%[ftmp2], %[block], 0x00)
277 MMI_ULDC1(%[ftmp6], %[block], 0x08)
278 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
279 MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
280 MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
281 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
282 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
283 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
284 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
285 MMI_SDC1(%[ftmp0], %[block], 0x00)
286 MMI_SDC1(%[ftmp4], %[block], 0x08)
287 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
288 MMI_SDC1(%[ftmp5], %[addr0], 0x08)
289 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
291 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
292 MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
293 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
294 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
295 MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
296 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
297 MMI_ULDC1(%[ftmp2], %[block], 0x00)
298 MMI_ULDC1(%[ftmp6], %[block], 0x08)
299 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
300 MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
301 MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
302 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
303 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
304 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
305 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
306 MMI_SDC1(%[ftmp0], %[block], 0x00)
307 MMI_SDC1(%[ftmp4], %[block], 0x08)
308 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
309 MMI_SDC1(%[ftmp5], %[addr0], 0x08)
310 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
313 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
314 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
315 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
316 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
318 [addr0]"=&r"(addr[0]),
319 [block]"+&r"(block), [pixels]"+&r"(pixels),
321 : [line_size]"r"((mips_reg)line_size)
326 inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
327 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
336 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
337 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
338 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
340 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
341 MMI_ULWC1(%[ftmp0], %[src1], 0x00)
342 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
343 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
344 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
345 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
346 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
347 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
348 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
349 MMI_SWC1(%[ftmp0], %[dst], 0x00)
350 MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
351 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
352 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
354 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
355 MMI_ULWC1(%[ftmp0], %[src1], 0x00)
356 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
357 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
358 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
359 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
360 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
361 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
362 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
363 MMI_SWC1(%[ftmp0], %[dst], 0x00)
364 MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
365 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
366 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
368 PTR_ADDI "%[h], %[h], -0x04 \n\t"
370 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
371 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
374 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
375 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
376 [addr4]"=&r"(addr[4]),
377 [dst]"+&r"(dst), [src1]"+&r"(src1),
378 [src2]"+&r"(src2), [h]"+&r"(h)
379 : [dst_stride]"r"((mips_reg)dst_stride),
380 [src_stride1]"r"((mips_reg)src_stride1),
381 [src_stride2]"r"((mips_reg)src_stride2)
386 inline void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
387 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
396 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
397 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
398 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
401 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
402 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
403 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
404 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
405 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
406 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
407 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
408 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
409 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
410 MMI_SDC1(%[ftmp0], %[dst], 0x00)
411 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
412 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
413 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
415 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
416 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
417 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
418 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
419 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
420 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
421 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
422 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
423 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
424 MMI_SDC1(%[ftmp0], %[dst], 0x00)
425 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
426 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
427 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
429 PTR_ADDI "%[h], %[h], -0x04 \n\t"
431 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
432 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
435 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
436 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
437 [addr4]"=&r"(addr[4]),
438 [dst]"+&r"(dst), [src1]"+&r"(src1),
439 [src2]"+&r"(src2), [h]"+&r"(h)
440 : [dst_stride]"r"((mips_reg)dst_stride),
441 [src_stride1]"r"((mips_reg)src_stride1),
442 [src_stride2]"r"((mips_reg)src_stride2)
447 inline void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
448 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
457 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
458 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
459 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
462 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
463 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
464 MMI_ULDC1(%[ftmp4], %[src1], 0x08)
465 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
466 MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
467 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
468 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
469 MMI_ULDC1(%[ftmp6], %[src2], 0x08)
470 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
471 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
472 MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
473 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
474 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
475 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
476 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
477 MMI_SDC1(%[ftmp0], %[dst], 0x00)
478 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
479 MMI_SDC1(%[ftmp4], %[dst], 0x08)
480 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
481 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
482 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
484 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
485 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
486 MMI_ULDC1(%[ftmp4], %[src1], 0x08)
487 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
488 MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
489 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
490 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
491 MMI_ULDC1(%[ftmp6], %[src2], 0x08)
492 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
493 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
494 MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
495 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
496 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
497 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
498 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
499 MMI_SDC1(%[ftmp0], %[dst], 0x00)
500 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
501 MMI_SDC1(%[ftmp4], %[dst], 0x08)
502 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
503 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
504 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
506 PTR_ADDI "%[h], %[h], -0x04 \n\t"
508 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
509 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
510 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
511 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
514 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
515 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
516 [addr4]"=&r"(addr[4]),
517 [dst]"+&r"(dst), [src1]"+&r"(src1),
518 [src2]"+&r"(src2), [h]"+&r"(h)
519 : [dst_stride]"r"((mips_reg)dst_stride),
520 [src_stride1]"r"((mips_reg)src_stride1),
521 [src_stride2]"r"((mips_reg)src_stride2)
526 inline void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
527 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
536 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
537 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
538 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
541 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
542 MMI_ULWC1(%[ftmp0], %[src1], 0x00)
543 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
544 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
545 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
546 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
547 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
548 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
549 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
550 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
551 MMI_ULWC1(%[ftmp4], %[dst], 0x00)
552 MMI_ULWC1(%[ftmp5], %[addr5], 0x00)
553 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
554 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
555 MMI_SWC1(%[ftmp0], %[dst], 0x00)
556 MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
557 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
558 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
560 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
561 MMI_ULWC1(%[ftmp0], %[src1], 0x00)
562 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
563 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
564 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
565 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
566 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
567 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
568 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
569 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
570 MMI_ULWC1(%[ftmp4], %[dst], 0x00)
571 MMI_ULWC1(%[ftmp5], %[addr5], 0x00)
572 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
573 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
574 MMI_SWC1(%[ftmp0], %[dst], 0x00)
575 MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
576 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
577 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
579 PTR_ADDI "%[h], %[h], -0x04 \n\t"
581 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
582 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
583 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
586 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
587 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
588 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
589 [dst]"+&r"(dst), [src1]"+&r"(src1),
590 [src2]"+&r"(src2), [h]"+&r"(h)
591 : [dst_stride]"r"((mips_reg)dst_stride),
592 [src_stride1]"r"((mips_reg)src_stride1),
593 [src_stride2]"r"((mips_reg)src_stride2)
598 inline void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
599 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
608 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
609 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
610 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
613 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
614 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
615 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
616 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
617 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
618 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
619 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
620 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
621 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
622 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
623 MMI_ULDC1(%[ftmp4], %[dst], 0x00)
624 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
625 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
626 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
627 MMI_SDC1(%[ftmp0], %[dst], 0x00)
628 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
629 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
630 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
632 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
633 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
634 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
635 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
636 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
637 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
638 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
639 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
640 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
641 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
642 MMI_ULDC1(%[ftmp4], %[dst], 0x00)
643 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
644 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
645 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
646 MMI_SDC1(%[ftmp0], %[dst], 0x00)
647 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
648 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
649 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
651 PTR_ADDI "%[h], %[h], -0x04 \n\t"
653 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
654 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
655 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
658 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
659 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
660 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
661 [dst]"+&r"(dst), [src1]"+&r"(src1),
662 [src2]"+&r"(src2), [h]"+&r"(h)
663 : [dst_stride]"r"((mips_reg)dst_stride),
664 [src_stride1]"r"((mips_reg)src_stride1),
665 [src_stride2]"r"((mips_reg)src_stride2)
670 inline void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
671 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
674 ff_avg_pixels8_l2_8_mmi(dst, src1, src2, dst_stride, src_stride1,
676 ff_avg_pixels8_l2_8_mmi(dst + 8, src1 + 8, src2 + 8, dst_stride,
677 src_stride1, src_stride2, h);
680 void ff_put_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
681 ptrdiff_t line_size, int h)
683 ff_put_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
687 void ff_put_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
688 ptrdiff_t line_size, int h)
690 ff_put_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
694 void ff_put_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
695 ptrdiff_t line_size, int h)
697 ff_put_pixels16_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
701 void ff_avg_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
702 ptrdiff_t line_size, int h)
704 ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
708 void ff_avg_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
709 ptrdiff_t line_size, int h)
711 ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
715 void ff_avg_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
716 ptrdiff_t line_size, int h)
718 ff_avg_pixels8_x2_8_mmi(block, pixels, line_size, h);
719 ff_avg_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
722 inline void ff_put_no_rnd_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
723 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
732 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
733 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
734 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
735 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
738 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
739 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
740 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
741 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
742 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
743 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
744 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
745 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
746 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
747 "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
748 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
749 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
750 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
751 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
752 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
753 MMI_SDC1(%[ftmp0], %[dst], 0x00)
754 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
755 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
756 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
758 MMI_ULDC1(%[ftmp0], %[src1], 0x00)
759 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
760 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
761 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
762 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
763 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
764 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
765 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
766 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
767 "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
768 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
769 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
770 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
771 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
772 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
773 MMI_SDC1(%[ftmp0], %[dst], 0x00)
774 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
775 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
776 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
778 PTR_ADDI "%[h], %[h], -0x04 \n\t"
780 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
781 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
782 [ftmp4]"=&f"(ftmp[4]),
785 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
786 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
787 [addr4]"=&r"(addr[4]),
788 [dst]"+&r"(dst), [src1]"+&r"(src1),
789 [src2]"+&r"(src2), [h]"+&r"(h)
790 : [dst_stride]"r"((mips_reg)dst_stride),
791 [src_stride1]"r"((mips_reg)src_stride1),
792 [src_stride2]"r"((mips_reg)src_stride2)
797 void ff_put_no_rnd_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
798 ptrdiff_t line_size, int h)
800 ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size,
801 line_size, line_size, h);
804 void ff_put_no_rnd_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
805 ptrdiff_t line_size, int h)
807 ff_put_no_rnd_pixels8_x2_8_mmi(block, pixels, line_size, h);
808 ff_put_no_rnd_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
811 void ff_put_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
812 ptrdiff_t line_size, int h)
814 ff_put_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
815 line_size, line_size, h);
818 void ff_put_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
819 ptrdiff_t line_size, int h)
821 ff_put_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
822 line_size, line_size, h);
825 void ff_put_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
826 ptrdiff_t line_size, int h)
828 ff_put_pixels16_l2_8_mmi(block, pixels, pixels + line_size, line_size,
829 line_size, line_size, h);
832 void ff_avg_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
833 ptrdiff_t line_size, int h)
835 ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
836 line_size, line_size, h);
839 void ff_avg_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
840 ptrdiff_t line_size, int h)
842 ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
843 line_size, line_size, h);
846 void ff_avg_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
847 ptrdiff_t line_size, int h)
849 ff_avg_pixels8_y2_8_mmi(block, pixels, line_size, h);
850 ff_avg_pixels8_y2_8_mmi(block + 8, pixels + 8, line_size, h);
853 void ff_put_no_rnd_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
854 ptrdiff_t line_size, int h)
856 ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + line_size,
857 line_size, line_size, line_size, h);
860 void ff_put_no_rnd_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
861 ptrdiff_t line_size, int h)
863 ff_put_no_rnd_pixels8_y2_8_mmi(block, pixels, line_size, h);
864 ff_put_no_rnd_pixels8_y2_8_mmi(block + 8 , pixels + 8, line_size, h);
867 void ff_put_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
868 ptrdiff_t line_size, int h)
870 /* FIXME HIGH BIT DEPTH */
872 const uint32_t a = AV_RN32(pixels);
873 const uint32_t b = AV_RN32(pixels + 1);
874 uint32_t l0 = (a & 0x03030303UL) +
877 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
878 ((b & 0xFCFCFCFCUL) >> 2);
882 for (i = 0; i < h; i += 2) {
883 uint32_t a = AV_RN32(pixels);
884 uint32_t b = AV_RN32(pixels + 1);
885 l1 = (a & 0x03030303UL) +
887 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
888 ((b & 0xFCFCFCFCUL) >> 2);
889 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
893 b = AV_RN32(pixels + 1);
894 l0 = (a & 0x03030303UL) +
897 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
898 ((b & 0xFCFCFCFCUL) >> 2);
899 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
905 void ff_put_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
906 ptrdiff_t line_size, int h)
915 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
916 "dli %[addr0], 0x0f \n\t"
917 "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
918 "dmtc1 %[addr0], %[ftmp8] \n\t"
919 "dli %[addr0], 0x01 \n\t"
920 "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
921 "dmtc1 %[addr0], %[ftmp8] \n\t"
922 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
924 "dli %[addr0], 0x02 \n\t"
925 "dmtc1 %[addr0], %[ftmp9] \n\t"
926 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
927 MMI_ULDC1(%[ftmp4], %[pixels], 0x01)
928 "mov.d %[ftmp1], %[ftmp0] \n\t"
929 "mov.d %[ftmp5], %[ftmp4] \n\t"
930 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
931 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
932 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
933 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
934 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
935 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
936 "xor %[addr0], %[addr0], %[addr0] \n\t"
937 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
941 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
942 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
943 MMI_ULDC1(%[ftmp2], %[addr1], 0x01)
944 "mov.d %[ftmp1], %[ftmp0] \n\t"
945 "mov.d %[ftmp3], %[ftmp2] \n\t"
946 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
947 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
948 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
949 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
950 "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
951 "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
952 "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
953 "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
954 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
955 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
956 "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
957 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
958 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
959 MMI_SDXC1(%[ftmp4], %[block], %[addr0], 0x00)
960 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
961 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
962 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
963 MMI_ULDC1(%[ftmp4], %[addr1], 0x01)
964 "mov.d %[ftmp3], %[ftmp2] \n\t"
965 "mov.d %[ftmp5], %[ftmp4] \n\t"
966 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
967 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
968 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
969 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
970 "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
971 "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
972 "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
973 "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
974 "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
975 "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
976 "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
977 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
978 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
979 MMI_SDXC1(%[ftmp0], %[block], %[addr0], 0x00)
980 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
981 PTR_ADDU "%[h], %[h], -0x02 \n\t"
983 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
984 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
985 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
986 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
987 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
990 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
991 [h]"+&r"(h), [pixels]"+&r"(pixels)
992 : [block]"r"(block), [line_size]"r"((mips_reg)line_size)
996 /* FIXME HIGH BIT DEPTH */
999 for (j = 0; j < 2; j++) {
1001 const uint32_t a = AV_RN32(pixels);
1002 const uint32_t b = AV_RN32(pixels + 1);
1003 uint32_t l0 = (a & 0x03030303UL) +
1004 (b & 0x03030303UL) +
1006 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1007 ((b & 0xFCFCFCFCUL) >> 2);
1010 pixels += line_size;
1011 for (i = 0; i < h; i += 2) {
1012 uint32_t a = AV_RN32(pixels);
1013 uint32_t b = AV_RN32(pixels + 1);
1014 l1 = (a & 0x03030303UL) +
1016 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1017 ((b & 0xFCFCFCFCUL) >> 2);
1018 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1019 pixels += line_size;
1021 a = AV_RN32(pixels);
1022 b = AV_RN32(pixels + 1);
1023 l0 = (a & 0x03030303UL) +
1024 (b & 0x03030303UL) +
1026 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1027 ((b & 0xFCFCFCFCUL) >> 2);
1028 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1029 pixels += line_size;
1032 pixels += 4 - line_size * (h + 1);
1033 block += 4 - line_size * h;
1038 void ff_put_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
1039 ptrdiff_t line_size, int h)
1041 ff_put_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1042 ff_put_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1045 void ff_avg_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
1046 ptrdiff_t line_size, int h)
1048 /* FIXME HIGH BIT DEPTH */
1050 const uint32_t a = AV_RN32(pixels);
1051 const uint32_t b = AV_RN32(pixels + 1);
1052 uint32_t l0 = (a & 0x03030303UL) +
1053 (b & 0x03030303UL) +
1055 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1056 ((b & 0xFCFCFCFCUL) >> 2);
1059 pixels += line_size;
1060 for (i = 0; i < h; i += 2) {
1061 uint32_t a = AV_RN32(pixels);
1062 uint32_t b = AV_RN32(pixels + 1);
1063 l1 = (a & 0x03030303UL) +
1065 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1066 ((b & 0xFCFCFCFCUL) >> 2);
1067 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1068 pixels += line_size;
1070 a = AV_RN32(pixels);
1071 b = AV_RN32(pixels + 1);
1072 l0 = (a & 0x03030303UL) +
1073 (b & 0x03030303UL) +
1075 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1076 ((b & 0xFCFCFCFCUL) >> 2);
1077 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1078 pixels += line_size;
1083 void ff_avg_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
1084 ptrdiff_t line_size, int h)
1086 /* FIXME HIGH BIT DEPTH */
1089 for (j = 0; j < 2; j++) {
1091 const uint32_t a = AV_RN32(pixels);
1092 const uint32_t b = AV_RN32(pixels + 1);
1093 uint32_t l0 = (a & 0x03030303UL) +
1094 (b & 0x03030303UL) +
1096 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1097 ((b & 0xFCFCFCFCUL) >> 2);
1100 pixels += line_size;
1101 for (i = 0; i < h; i += 2) {
1102 uint32_t a = AV_RN32(pixels);
1103 uint32_t b = AV_RN32(pixels + 1);
1104 l1 = (a & 0x03030303UL) +
1106 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1107 ((b & 0xFCFCFCFCUL) >> 2);
1108 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1109 pixels += line_size;
1111 a = AV_RN32(pixels);
1112 b = AV_RN32(pixels + 1);
1113 l0 = (a & 0x03030303UL) +
1114 (b & 0x03030303UL) +
1116 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1117 ((b & 0xFCFCFCFCUL) >> 2);
1118 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1119 pixels += line_size;
1122 pixels += 4 - line_size * (h + 1);
1123 block += 4 - line_size * h;
1127 void ff_avg_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
1128 ptrdiff_t line_size, int h)
1130 ff_avg_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1131 ff_avg_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1134 void ff_put_no_rnd_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
1135 ptrdiff_t line_size, int h)
1137 /* FIXME HIGH BIT DEPTH */
1140 for (j = 0; j < 2; j++) {
1142 const uint32_t a = AV_RN32(pixels);
1143 const uint32_t b = AV_RN32(pixels + 1);
1144 uint32_t l0 = (a & 0x03030303UL) +
1145 (b & 0x03030303UL) +
1147 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1148 ((b & 0xFCFCFCFCUL) >> 2);
1151 pixels += line_size;
1152 for (i = 0; i < h; i += 2) {
1153 uint32_t a = AV_RN32(pixels);
1154 uint32_t b = AV_RN32(pixels + 1);
1155 l1 = (a & 0x03030303UL) +
1157 h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1158 ((b & 0xFCFCFCFCUL) >> 2);
1159 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1160 pixels += line_size;
1162 a = AV_RN32(pixels);
1163 b = AV_RN32(pixels + 1);
1164 l0 = (a & 0x03030303UL) +
1165 (b & 0x03030303UL) +
1167 h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1168 ((b & 0xFCFCFCFCUL) >> 2);
1169 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1170 pixels += line_size;
1173 pixels += 4 - line_size * (h + 1);
1174 block += 4 - line_size * h;
1178 void ff_put_no_rnd_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
1179 ptrdiff_t line_size, int h)
1181 ff_put_no_rnd_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1182 ff_put_no_rnd_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);