2 * Loongson SIMD optimized h264pred
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "h264pred_mips.h"
26 #include "constants.h"
28 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
31 "dsubu $2, %0, %1 \r\n"
32 "daddu $3, %0, $0 \r\n"
46 ::"r"(src),"r"(stride)
47 : "$2","$3","$4","$5","$6","memory"
51 void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
54 "daddiu $2, %0, -1 \r\n"
55 "daddu $3, %0, $0 \r\n"
59 "dmul $5, $4, %2 \r\n"
68 ::"r"(src),"r"(stride),"r"(ff_pb_1)
69 : "$2","$3","$4","$5","$6","memory"
73 void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
76 "daddiu $2, %0, -1 \r\n"
81 "daddu $8, $8, $4 \r\n"
82 "daddu $2, $2, %1 \r\n"
83 "daddiu $6, $6, -1 \r\n"
87 "daddu $2, %0, $3 \r\n"
90 "daddu $8, $8, $4 \r\n"
91 "daddiu $2, $2, 1 \r\n"
92 "daddiu $6, $6, -1 \r\n"
94 "daddiu $8, $8, 0x10 \r\n"
96 "dmul $5, $8, %2 \r\n"
97 "daddu $2, %0, $0 \r\n"
102 "sdl $5, 15($2) \r\n"
104 "daddu $2, $2, %1 \r\n"
105 "daddiu $6, $6, -1 \r\n"
107 ::"r"(src),"r"(stride),"r"(ff_pb_1)
108 : "$2","$3","$4","$5","$6","$8","memory"
112 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
113 int has_topright, ptrdiff_t stride)
123 "ldl $10, 7(%3) \r\n"
124 "ldr $10, 0(%3) \r\n"
127 "dmtc1 $10, $f6 \r\n"
129 "punpcklbh $f8, $f2, $f0 \r\n"
130 "punpckhbh $f10, $f2, $f0 \r\n"
131 "punpcklbh $f12, $f4, $f0 \r\n"
132 "punpckhbh $f14, $f4, $f0 \r\n"
133 "punpcklbh $f16, $f6, $f0 \r\n"
134 "punpckhbh $f18, $f6, $f0 \r\n"
136 "pinsrh_0 $f8, $f8, $f12 \r\n"
139 "pinsrh_3 $f18, $f18, $f14 \r\n"
141 "daddiu $8, $0, 2 \r\n"
142 "dmtc1 $8, $f20 \r\n"
143 "pshufh $f22, $f20, $f0 \r\n"
144 "pmullh $f12, $f12, $f22 \r\n"
145 "pmullh $f14, $f14, $f22 \r\n"
146 "paddh $f8, $f8, $f12 \r\n"
147 "paddh $f10, $f10, $f14 \r\n"
148 "paddh $f8, $f8, $f16 \r\n"
149 "paddh $f10, $f10, $f18 \r\n"
150 "paddh $f8, $f8, $f22 \r\n"
151 "paddh $f10, $f10, $f22 \r\n"
152 "psrah $f8, $f8, $f20 \r\n"
153 "psrah $f10, $f10, $f20 \r\n"
154 "packushb $f4, $f8, $f10 \r\n"
155 "biadd $f2, $f4 \r\n"
157 "addiu $9, $9, 4 \r\n"
158 "dsrl $9, $9, 3 \r\n"
159 "li $8, 0x01010101 \r\n"
160 "mul %0, $9, $8 \r\n"
162 : "r"(src-stride-1),"r"(src-stride),"r"(src-stride+1),
163 "r"(has_topleft),"r"(has_topright)
167 for (y=0; y<8; y++) {
168 AV_WN4PA(((uint32_t*)src)+0, dc);
169 AV_WN4PA(((uint32_t*)src)+1, dc);
174 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft,
175 int has_topright, ptrdiff_t stride)
178 uint32_t dc, dc1, dc2;
180 const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
181 const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
182 const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
183 const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
184 const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
185 const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
186 const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
187 const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
194 "ldl $10, 7(%3) \r\n"
195 "ldr $10, 0(%3) \r\n"
198 "dmtc1 $10, $f6 \r\n"
200 "punpcklbh $f8, $f2, $f0 \r\n"
201 "punpckhbh $f10, $f2, $f0 \r\n"
202 "punpcklbh $f12, $f4, $f0 \r\n"
203 "punpckhbh $f14, $f4, $f0 \r\n"
204 "punpcklbh $f16, $f6, $f0 \r\n"
205 "punpckhbh $f18, $f6, $f0 \r\n"
206 "daddiu $8, $0, 3 \r\n"
207 "dmtc1 $8, $f20 \r\n"
208 "pshufh $f28, $f10, $f20 \r\n"
209 "pshufh $f30, $f18, $f20 \r\n"
210 "pinsrh_3 $f10, $f10, $f30 \r\n"
211 "pinsrh_3 $f18, $f18, $f28 \r\n"
213 "pinsrh_0 $f8, $f8, $f12 \r\n"
216 "pshufh $f30, $f14, $f20 \r\n"
217 "pinsrh_3 $f10, $f10, $f30 \r\n"
219 "daddiu $8, $0, 2 \r\n"
220 "dmtc1 $8, $f20 \r\n"
221 "pshufh $f22, $f20, $f0 \r\n"
222 "pmullh $f12, $f12, $f22 \r\n"
223 "pmullh $f14, $f14, $f22 \r\n"
224 "paddh $f8, $f8, $f12 \r\n"
225 "paddh $f10, $f10, $f14 \r\n"
226 "paddh $f8, $f8, $f16 \r\n"
227 "paddh $f10, $f10, $f18 \r\n"
228 "paddh $f8, $f8, $f22 \r\n"
229 "paddh $f10, $f10, $f22 \r\n"
230 "psrah $f8, $f8, $f20 \r\n"
231 "psrah $f10, $f10, $f20 \r\n"
232 "packushb $f4, $f8, $f10 \r\n"
233 "biadd $f2, $f4 \r\n"
236 : "r"(src-stride-1),"r"(src-stride),"r"(src-stride+1),
237 "r"(has_topleft),"r"(has_topright)
241 dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
242 dc = PIXEL_SPLAT_X4((dc1+dc2+8)>>4);
244 for (y=0; y<8; y++) {
245 AV_WN4PA(((uint32_t*)src)+0, dc);
246 AV_WN4PA(((uint32_t*)src)+1, dc);
251 void ff_pred8x8l_horizontal_8_mmi(uint8_t *src, int has_topleft,
252 int has_topright, ptrdiff_t stride)
254 const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
255 const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
256 const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
257 const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
258 const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
259 const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
260 const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
261 const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
263 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(l0));
264 AV_WN4PA(src+0*stride+4, PIXEL_SPLAT_X4(l0));
265 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(l1));
266 AV_WN4PA(src+1*stride+4, PIXEL_SPLAT_X4(l1));
267 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(l2));
268 AV_WN4PA(src+2*stride+4, PIXEL_SPLAT_X4(l2));
269 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(l3));
270 AV_WN4PA(src+3*stride+4, PIXEL_SPLAT_X4(l3));
271 AV_WN4PA(src+4*stride, PIXEL_SPLAT_X4(l4));
272 AV_WN4PA(src+4*stride+4, PIXEL_SPLAT_X4(l4));
273 AV_WN4PA(src+5*stride, PIXEL_SPLAT_X4(l5));
274 AV_WN4PA(src+5*stride+4, PIXEL_SPLAT_X4(l5));
275 AV_WN4PA(src+6*stride, PIXEL_SPLAT_X4(l6));
276 AV_WN4PA(src+6*stride+4, PIXEL_SPLAT_X4(l6));
277 AV_WN4PA(src+7*stride, PIXEL_SPLAT_X4(l7));
278 AV_WN4PA(src+7*stride+4, PIXEL_SPLAT_X4(l7));
281 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
282 int has_topright, ptrdiff_t stride)
292 "ldl $10, 7(%3) \r\n"
293 "ldr $10, 0(%3) \r\n"
296 "dmtc1 $10, $f6 \r\n"
298 "punpcklbh $f8, $f2, $f0 \r\n"
299 "punpckhbh $f10, $f2, $f0 \r\n"
300 "punpcklbh $f12, $f4, $f0 \r\n"
301 "punpckhbh $f14, $f4, $f0 \r\n"
302 "punpcklbh $f16, $f6, $f0 \r\n"
303 "punpckhbh $f18, $f6, $f0 \r\n"
305 "pinsrh_0 $f8, $f8, $f12 \r\n"
308 "pinsrh_3 $f18, $f18, $f14 \r\n"
310 "daddiu $8, $0, 2 \r\n"
311 "dmtc1 $8, $f20 \r\n"
312 "pshufh $f22, $f20, $f0 \r\n"
313 "pmullh $f12, $f12, $f22 \r\n"
314 "pmullh $f14, $f14, $f22 \r\n"
315 "paddh $f8, $f8, $f12 \r\n"
316 "paddh $f10, $f10, $f14 \r\n"
317 "paddh $f8, $f8, $f16 \r\n"
318 "paddh $f10, $f10, $f18 \r\n"
319 "paddh $f8, $f8, $f22 \r\n"
320 "paddh $f10, $f10, $f22 \r\n"
321 "psrah $f8, $f8, $f20 \r\n"
322 "psrah $f10, $f10, $f20 \r\n"
323 "packushb $f4, $f8, $f10 \r\n"
326 : "r"(src-stride-1),"r"(src-stride),"r"(src-stride+1),
327 "r"(has_topleft),"r"(has_topright)
331 a = AV_RN4PA(((uint32_t*)src)+0);
332 b = AV_RN4PA(((uint32_t*)src)+1);
334 for (y=1; y<8; y++) {
335 AV_WN4PA(((uint32_t*)(src+y*stride))+0, a);
336 AV_WN4PA(((uint32_t*)(src+y*stride))+1, b);
340 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
343 const int dc = (src[-stride] + src[1-stride] + src[2-stride]
344 + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
345 + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
348 "daddu $2, %2, $0 \r\n"
349 "dmul $3, $2, %3 \r\n"
350 "xor $4, $4, $4 \r\n"
351 "gsswx $3, 0(%0,$4) \r\n"
353 "gsswx $3, 0(%0,$4) \r\n"
355 "gsswx $3, 0(%0,$4) \r\n"
357 "gsswx $3, 0(%0,$4) \r\n"
358 ::"r"(src),"r"(stride),"r"(dc),"r"(ff_pb_1)
359 : "$2","$3","$4","memory"
363 void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
366 "dsubu $2, %0, %1 \r\n"
367 "daddu $3, %0, $0 \r\n"
377 ::"r"(src),"r"(stride)
378 : "$2","$3","$4","$5","memory"
382 void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
385 "daddiu $2, %0, -1 \r\n"
386 "daddu $3, %0, $0 \r\n"
390 "dmul $5, $4, %2 \r\n"
397 ::"r"(src),"r"(stride),"r"(ff_pb_1)
398 : "$2","$3","$4","$5","$6","memory"
402 static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
403 const int svq3, const int rv40)
407 "daddu $3, %0, $2 \r\n"
408 "xor $f8, $f8, $f8 \r\n"
409 "gslwlc1 $f0, 2($3) \r\n"
410 "gslwrc1 $f0, -1($3) \r\n"
411 "gslwlc1 $f2, 6($3) \r\n"
412 "gslwrc1 $f2, 3($3) \r\n"
413 "gslwlc1 $f4, 11($3) \r\n"
414 "gslwrc1 $f4, 8($3) \r\n"
415 "gslwlc1 $f6, 15($3) \r\n"
416 "gslwrc1 $f6, 12($3) \r\n"
417 "punpcklbh $f0, $f0, $f8 \r\n"
418 "punpcklbh $f2, $f2, $f8 \r\n"
419 "punpcklbh $f4, $f4, $f8 \r\n"
420 "punpcklbh $f6, $f6, $f8 \r\n"
421 "dmtc1 %4, $f20 \r\n"
422 "dmtc1 %5, $f22 \r\n"
423 "dmtc1 %6, $f24 \r\n"
424 "dmtc1 %7, $f26 \r\n"
425 "pmullh $f0, $f0, $f20 \r\n"
426 "pmullh $f2, $f2, $f22 \r\n"
427 "pmullh $f4, $f4, $f24 \r\n"
428 "pmullh $f6, $f6, $f26 \r\n"
429 "paddsh $f0, $f0, $f4 \r\n"
430 "paddsh $f2, $f2, $f6 \r\n"
431 "paddsh $f0, $f0, $f2 \r\n"
433 "dmtc1 $4, $f28 \r\n"
434 "pshufh $f2, $f0, $f28 \r\n"
435 "paddsh $f0, $f0, $f2 \r\n"
437 "dmtc1 $4, $f30 \r\n"
438 "pshufh $f2, $f0, $f30 \r\n"
439 "paddsh $f10, $f0, $f2 \r\n"
440 "daddiu $3, %0, -1 \r\n"
443 "lbu $8, 16($3) \r\n"
506 "pmullh $f0, $f0, $f20 \r\n"
507 "pmullh $f2, $f2, $f22 \r\n"
508 "pmullh $f4, $f4, $f24 \r\n"
509 "pmullh $f6, $f6, $f26 \r\n"
510 "paddsh $f0, $f0, $f4 \r\n"
511 "paddsh $f2, $f2, $f6 \r\n"
512 "paddsh $f0, $f0, $f2 \r\n"
513 "pshufh $f2, $f0, $f28 \r\n"
514 "paddsh $f0, $f0, $f2 \r\n"
515 "pshufh $f2, $f0, $f30 \r\n"
516 "paddsh $f12, $f0, $f2 \r\n"
517 "dmfc1 $2, $f10 \r\n"
520 "dmfc1 $3, $f12 \r\n"
533 "daddu $4, $2, $0 \r\n"
534 "daddu $2, $3, $0 \r\n"
535 "daddu $3, $4, $0 \r\n"
539 "dsra $4, $2, 2 \r\n"
541 "dsra $4, $3, 2 \r\n"
555 "daddu $5, $2, $3 \r\n"
561 "pshufh $f0, $f0, $f8 \r\n"
562 "dmtc1 $3, $f10 \r\n"
563 "pshufh $f10, $f10, $f8 \r\n"
564 "dmtc1 $8, $f12 \r\n"
565 "pshufh $f12, $f12, $f8 \r\n"
567 "dmtc1 $4, $f14 \r\n"
569 "pmullh $f2, $f2, $f0 \r\n"
571 "pmullh $f4, $f4, $f0 \r\n"
572 "dmtc1 %10, $f6 \r\n"
573 "pmullh $f6, $f6, $f0 \r\n"
574 "dmtc1 %11, $f8 \r\n"
575 "pmullh $f8, $f8, $f0 \r\n"
576 "daddu $3, %0, $0 \r\n"
579 "paddsh $f16, $f2, $f12 \r\n"
580 "psrah $f16, $f16, $f14 \r\n"
581 "paddsh $f18, $f4, $f12 \r\n"
582 "psrah $f18, $f18, $f14 \r\n"
583 "packushb $f20, $f16, $f18 \r\n"
584 "gssdlc1 $f20, 7($3) \r\n"
585 "gssdrc1 $f20, 0($3) \r\n"
586 "paddsh $f16, $f6, $f12 \r\n"
587 "psrah $f16, $f16, $f14 \r\n"
588 "paddsh $f18, $f8, $f12 \r\n"
589 "psrah $f18, $f18, $f14 \r\n"
590 "packushb $f20, $f16, $f18 \r\n"
591 "gssdlc1 $f20, 15($3) \r\n"
592 "gssdrc1 $f20, 8($3) \r\n"
593 "paddsh $f12, $f12, $f10 \r\n"
597 ::"r"(src),"r"(stride),"r"(svq3),"r"(rv40),
598 "r"(ff_pw_m8tom5),"r"(ff_pw_m4tom1),"r"(ff_pw_1to4),"r"(ff_pw_5to8),
599 "r"(ff_pw_0to3),"r"(ff_pw_4to7),"r"(ff_pw_8tob),"r"(ff_pw_ctof)
600 : "$2","$3","$4","$5","$6","$7","$8","memory"
604 void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
606 ff_pred16x16_plane_compat_8_mmi(src, stride, 1, 0);
609 void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
611 ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 1);
614 void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
616 ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 0);
619 void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
623 "xor $f0, $f0, $f0 \r\n"
624 "xor $f2, $f2, $f2 \r\n"
625 "xor $f30, $f30, $f30 \r\n"
627 "daddu $3, $3, %0 \r\n"
628 "gsldlc1 $f4, 7($3) \r\n"
629 "gsldrc1 $f4, 0($3) \r\n"
630 "punpcklbh $f0, $f4, $f30 \r\n"
631 "punpckhbh $f2, $f4, $f30 \r\n"
632 "biadd $f0, $f0 \r\n"
633 "biadd $f2, $f2 \r\n"
634 "pshufh $f0, $f0, $f30 \r\n"
635 "pshufh $f2, $f2, $f30 \r\n"
637 "pshufh $f4, $f4, $f30 \r\n"
638 "paddush $f0, $f0, $f4 \r\n"
639 "paddush $f2, $f2, $f4 \r\n"
641 "psrlh $f0, $f0, $f4 \r\n"
642 "psrlh $f2, $f2, $f4 \r\n"
643 "packushb $f4, $f0, $f2 \r\n"
646 "gssdlc1 $f4, 7(%0) \r\n"
647 "gssdrc1 $f4, 0(%0) \r\n"
648 "daddu %0, %0, %1 \r\n"
649 "daddiu $2, $2, -1 \r\n"
651 ::"r"(src),"r"(stride)
656 void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
660 "daddu $2, $2, %0 \r\n"
661 "daddiu $5, $2, 4 \r\n"
663 "daddu $3, $0, $6 \r\n"
666 "daddu $4, $0, $6 \r\n"
669 "daddu $3, $3, $6 \r\n"
672 "daddu $4, $4, $6 \r\n"
675 "daddu $3, $3, $6 \r\n"
678 "daddu $4, $4, $6 \r\n"
681 "daddu $3, $3, $6 \r\n"
684 "daddu $4, $4, $6 \r\n"
687 "daddu $6, $6, %0 \r\n"
689 "daddu $7, $0, $5 \r\n"
690 "daddu $6, $6, %1 \r\n"
692 "daddu $7, $7, $5 \r\n"
693 "daddu $6, $6, %1 \r\n"
695 "daddu $7, $7, $5 \r\n"
696 "daddu $6, $6, %1 \r\n"
698 "daddu $7, $7, $5 \r\n"
699 "daddu $6, $6, %1 \r\n"
701 "daddu $8, $0, $5 \r\n"
702 "daddu $6, $6, %1 \r\n"
704 "daddu $8, $8, $5 \r\n"
705 "daddu $6, $6, %1 \r\n"
707 "daddu $8, $8, $5 \r\n"
708 "daddu $6, $6, %1 \r\n"
710 "daddu $8, $8, $5 \r\n"
711 "daddu $3, $3, $7 \r\n"
712 "daddiu $3, $3, 4 \r\n"
713 "daddiu $4, $4, 2 \r\n"
714 "daddiu $5, $8, 2 \r\n"
715 "daddu $6, $4, $5 \r\n"
720 "xor $f30, $f30, $f30 \r\n"
722 "pshufh $f0, $f0, $f30 \r\n"
724 "pshufh $f2, $f2, $f30 \r\n"
726 "pshufh $f4, $f4, $f30 \r\n"
728 "pshufh $f6, $f6, $f30 \r\n"
729 "packushb $f0, $f0, $f2 \r\n"
730 "packushb $f2, $f4, $f6 \r\n"
731 "daddu $2, $0, %0 \r\n"
732 "sdc1 $f0, 0($2) \r\n"
733 "daddu $2, $2, %1 \r\n"
734 "sdc1 $f0, 0($2) \r\n"
735 "daddu $2, $2, %1 \r\n"
736 "sdc1 $f0, 0($2) \r\n"
737 "daddu $2, $2, %1 \r\n"
738 "sdc1 $f0, 0($2) \r\n"
739 "daddu $2, $2, %1 \r\n"
740 "sdc1 $f2, 0($2) \r\n"
741 "daddu $2, $2, %1 \r\n"
742 "sdc1 $f2, 0($2) \r\n"
743 "daddu $2, $2, %1 \r\n"
744 "sdc1 $f2, 0($2) \r\n"
745 "daddu $2, $2, %1 \r\n"
746 "sdc1 $f2, 0($2) \r\n"
747 ::"r"(src),"r"(stride)
748 :"$2","$3","$4","$5","$6","$7","$8","memory"
752 void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
755 "dsubu $2, %0, %1 \r\n"
756 "daddu $3, %0, $0 \r\n"
766 ::"r"(src),"r"(stride)
767 : "$2","$3","$4","$5","memory"
771 void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
774 "daddiu $2, %0, -1 \r\n"
775 "daddu $3, %0, $0 \r\n"
779 "dmul $5, $4, %2 \r\n"
786 ::"r"(src),"r"(stride),"r"(ff_pb_1)
787 : "$2","$3","$4","$5","$6","memory"