2 * Loongson SIMD optimized blockdsp
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "blockdsp_mips.h"
25 #include "libavutil/mips/asmdefs.h"
27 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
32 "mtc1 %[value], %[ftmp0] \n\t"
33 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
34 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
35 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
37 "gssdlc1 %[ftmp0], 0x07(%[block]) \n\t"
38 "gssdrc1 %[ftmp0], 0x00(%[block]) \n\t"
39 PTR_ADDI "%[h], %[h], -0x01 \n\t"
40 "gssdlc1 %[ftmp0], 0x0f(%[block]) \n\t"
41 "gssdrc1 %[ftmp0], 0x08(%[block]) \n\t"
42 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
44 : [block]"+&r"(block), [h]"+&r"(h),
46 : [value]"r"(value), [line_size]"r"((mips_reg)line_size)
51 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
56 "mtc1 %[value], %[ftmp0] \n\t"
57 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
58 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
59 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
61 "gssdlc1 %[ftmp0], 0x07(%[block]) \n\t"
62 "gssdrc1 %[ftmp0], 0x00(%[block]) \n\t"
63 PTR_ADDI "%[h], %[h], -0x01 \n\t"
64 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
66 : [block]"+&r"(block), [h]"+&r"(h),
68 : [value]"r"(value), [line_size]"r"((mips_reg)line_size)
73 void ff_clear_block_mmi(int16_t *block)
78 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
79 "xor %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
80 "gssqc1 %[ftmp0], %[ftmp1], 0x00(%[block]) \n\t"
81 "gssqc1 %[ftmp0], %[ftmp1], 0x10(%[block]) \n\t"
82 "gssqc1 %[ftmp0], %[ftmp1], 0x20(%[block]) \n\t"
83 "gssqc1 %[ftmp0], %[ftmp1], 0x30(%[block]) \n\t"
84 "gssqc1 %[ftmp0], %[ftmp1], 0x40(%[block]) \n\t"
85 "gssqc1 %[ftmp0], %[ftmp1], 0x50(%[block]) \n\t"
86 "gssqc1 %[ftmp0], %[ftmp1], 0x60(%[block]) \n\t"
87 "gssqc1 %[ftmp0], %[ftmp1], 0x70(%[block]) \n\t"
88 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
94 void ff_clear_blocks_mmi(int16_t *block)
99 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
100 "xor %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
101 "gssqc1 %[ftmp0], %[ftmp1], 0x00(%[block]) \n\t"
102 "gssqc1 %[ftmp0], %[ftmp1], 0x10(%[block]) \n\t"
103 "gssqc1 %[ftmp0], %[ftmp1], 0x20(%[block]) \n\t"
104 "gssqc1 %[ftmp0], %[ftmp1], 0x30(%[block]) \n\t"
105 "gssqc1 %[ftmp0], %[ftmp1], 0x40(%[block]) \n\t"
106 "gssqc1 %[ftmp0], %[ftmp1], 0x50(%[block]) \n\t"
107 "gssqc1 %[ftmp0], %[ftmp1], 0x60(%[block]) \n\t"
108 "gssqc1 %[ftmp0], %[ftmp1], 0x70(%[block]) \n\t"
110 "gssqc1 %[ftmp0], %[ftmp1], 0x80(%[block]) \n\t"
111 "gssqc1 %[ftmp0], %[ftmp1], 0x90(%[block]) \n\t"
112 "gssqc1 %[ftmp0], %[ftmp1], 0xa0(%[block]) \n\t"
113 "gssqc1 %[ftmp0], %[ftmp1], 0xb0(%[block]) \n\t"
114 "gssqc1 %[ftmp0], %[ftmp1], 0xc0(%[block]) \n\t"
115 "gssqc1 %[ftmp0], %[ftmp1], 0xd0(%[block]) \n\t"
116 "gssqc1 %[ftmp0], %[ftmp1], 0xe0(%[block]) \n\t"
117 "gssqc1 %[ftmp0], %[ftmp1], 0xf0(%[block]) \n\t"
119 "gssqc1 %[ftmp0], %[ftmp1], 0x100(%[block]) \n\t"
120 "gssqc1 %[ftmp0], %[ftmp1], 0x110(%[block]) \n\t"
121 "gssqc1 %[ftmp0], %[ftmp1], 0x120(%[block]) \n\t"
122 "gssqc1 %[ftmp0], %[ftmp1], 0x130(%[block]) \n\t"
123 "gssqc1 %[ftmp0], %[ftmp1], 0x140(%[block]) \n\t"
124 "gssqc1 %[ftmp0], %[ftmp1], 0x150(%[block]) \n\t"
125 "gssqc1 %[ftmp0], %[ftmp1], 0x160(%[block]) \n\t"
126 "gssqc1 %[ftmp0], %[ftmp1], 0x170(%[block]) \n\t"
128 "gssqc1 %[ftmp0], %[ftmp1], 0x180(%[block]) \n\t"
129 "gssqc1 %[ftmp0], %[ftmp1], 0x190(%[block]) \n\t"
130 "gssqc1 %[ftmp0], %[ftmp1], 0x1a0(%[block]) \n\t"
131 "gssqc1 %[ftmp0], %[ftmp1], 0x1b0(%[block]) \n\t"
132 "gssqc1 %[ftmp0], %[ftmp1], 0x1c0(%[block]) \n\t"
133 "gssqc1 %[ftmp0], %[ftmp1], 0x1d0(%[block]) \n\t"
134 "gssqc1 %[ftmp0], %[ftmp1], 0x1e0(%[block]) \n\t"
135 "gssqc1 %[ftmp0], %[ftmp1], 0x1f0(%[block]) \n\t"
137 "gssqc1 %[ftmp0], %[ftmp1], 0x200(%[block]) \n\t"
138 "gssqc1 %[ftmp0], %[ftmp1], 0x210(%[block]) \n\t"
139 "gssqc1 %[ftmp0], %[ftmp1], 0x220(%[block]) \n\t"
140 "gssqc1 %[ftmp0], %[ftmp1], 0x230(%[block]) \n\t"
141 "gssqc1 %[ftmp0], %[ftmp1], 0x240(%[block]) \n\t"
142 "gssqc1 %[ftmp0], %[ftmp1], 0x250(%[block]) \n\t"
143 "gssqc1 %[ftmp0], %[ftmp1], 0x260(%[block]) \n\t"
144 "gssqc1 %[ftmp0], %[ftmp1], 0x270(%[block]) \n\t"
146 "gssqc1 %[ftmp0], %[ftmp1], 0x280(%[block]) \n\t"
147 "gssqc1 %[ftmp0], %[ftmp1], 0x290(%[block]) \n\t"
148 "gssqc1 %[ftmp0], %[ftmp1], 0x2a0(%[block]) \n\t"
149 "gssqc1 %[ftmp0], %[ftmp1], 0x2b0(%[block]) \n\t"
150 "gssqc1 %[ftmp0], %[ftmp1], 0x2c0(%[block]) \n\t"
151 "gssqc1 %[ftmp0], %[ftmp1], 0x2d0(%[block]) \n\t"
152 "gssqc1 %[ftmp0], %[ftmp1], 0x2e0(%[block]) \n\t"
153 "gssqc1 %[ftmp0], %[ftmp1], 0x2f0(%[block]) \n\t"
154 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
155 : [block]"r"((mips_reg)block)