]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/blockdsp_mmi.c
Merge commit '5b1409c75563b4a3aca113c34d09e3b5442de47f'
[ffmpeg] / libavcodec / mips / blockdsp_mmi.c
1 /*
2  * Loongson SIMD optimized blockdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23
24 #include "blockdsp_mips.h"
25 #include "libavutil/mips/asmdefs.h"
26
27 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
28 {
29     double ftmp[1];
30
31     __asm__ volatile (
32         "mtc1       %[value],   %[ftmp0]                                \n\t"
33         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
34         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
35         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
36         "1:                                                             \n\t"
37         "gssdlc1    %[ftmp0],   0x07(%[block])                          \n\t"
38         "gssdrc1    %[ftmp0],   0x00(%[block])                          \n\t"
39         PTR_ADDI    "%[h],      %[h],           -0x01                   \n\t"
40         "gssdlc1    %[ftmp0],   0x0f(%[block])                          \n\t"
41         "gssdrc1    %[ftmp0],   0x08(%[block])                          \n\t"
42         PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
43         "bnez       %[h],       1b                                      \n\t"
44         : [block]"+&r"(block),              [h]"+&r"(h),
45           [ftmp0]"=&f"(ftmp[0])
46         : [value]"r"(value),                [line_size]"r"((mips_reg)line_size)
47         : "memory"
48     );
49 }
50
51 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
52 {
53     double ftmp0;
54
55     __asm__ volatile (
56         "mtc1       %[value],   %[ftmp0]                                \n\t"
57         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
58         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
59         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
60         "1:                                                             \n\t"
61         "gssdlc1    %[ftmp0],   0x07(%[block])                          \n\t"
62         "gssdrc1    %[ftmp0],   0x00(%[block])                          \n\t"
63         PTR_ADDI   "%[h],       %[h],           -0x01                   \n\t"
64         PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
65         "bnez       %[h],       1b                                      \n\t"
66         : [block]"+&r"(block),              [h]"+&r"(h),
67           [ftmp0]"=&f"(ftmp0)
68         : [value]"r"(value),                [line_size]"r"((mips_reg)line_size)
69         : "memory"
70     );
71 }
72
73 void ff_clear_block_mmi(int16_t *block)
74 {
75     double ftmp[2];
76
77     __asm__ volatile (
78         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
79         "xor        %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
80         "gssqc1     %[ftmp0],   %[ftmp1],       0x00(%[block])          \n\t"
81         "gssqc1     %[ftmp0],   %[ftmp1],       0x10(%[block])          \n\t"
82         "gssqc1     %[ftmp0],   %[ftmp1],       0x20(%[block])          \n\t"
83         "gssqc1     %[ftmp0],   %[ftmp1],       0x30(%[block])          \n\t"
84         "gssqc1     %[ftmp0],   %[ftmp1],       0x40(%[block])          \n\t"
85         "gssqc1     %[ftmp0],   %[ftmp1],       0x50(%[block])          \n\t"
86         "gssqc1     %[ftmp0],   %[ftmp1],       0x60(%[block])          \n\t"
87         "gssqc1     %[ftmp0],   %[ftmp1],       0x70(%[block])          \n\t"
88         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1])
89         : [block]"r"(block)
90         : "memory"
91     );
92 }
93
94 void ff_clear_blocks_mmi(int16_t *block)
95 {
96     double ftmp[2];
97
98     __asm__ volatile (
99         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
100         "xor        %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
101         "gssqc1     %[ftmp0],   %[ftmp1],       0x00(%[block])          \n\t"
102         "gssqc1     %[ftmp0],   %[ftmp1],       0x10(%[block])          \n\t"
103         "gssqc1     %[ftmp0],   %[ftmp1],       0x20(%[block])          \n\t"
104         "gssqc1     %[ftmp0],   %[ftmp1],       0x30(%[block])          \n\t"
105         "gssqc1     %[ftmp0],   %[ftmp1],       0x40(%[block])          \n\t"
106         "gssqc1     %[ftmp0],   %[ftmp1],       0x50(%[block])          \n\t"
107         "gssqc1     %[ftmp0],   %[ftmp1],       0x60(%[block])          \n\t"
108         "gssqc1     %[ftmp0],   %[ftmp1],       0x70(%[block])          \n\t"
109
110         "gssqc1     %[ftmp0],   %[ftmp1],       0x80(%[block])          \n\t"
111         "gssqc1     %[ftmp0],   %[ftmp1],       0x90(%[block])          \n\t"
112         "gssqc1     %[ftmp0],   %[ftmp1],       0xa0(%[block])          \n\t"
113         "gssqc1     %[ftmp0],   %[ftmp1],       0xb0(%[block])          \n\t"
114         "gssqc1     %[ftmp0],   %[ftmp1],       0xc0(%[block])          \n\t"
115         "gssqc1     %[ftmp0],   %[ftmp1],       0xd0(%[block])          \n\t"
116         "gssqc1     %[ftmp0],   %[ftmp1],       0xe0(%[block])          \n\t"
117         "gssqc1     %[ftmp0],   %[ftmp1],       0xf0(%[block])          \n\t"
118
119         "gssqc1     %[ftmp0],   %[ftmp1],       0x100(%[block])         \n\t"
120         "gssqc1     %[ftmp0],   %[ftmp1],       0x110(%[block])         \n\t"
121         "gssqc1     %[ftmp0],   %[ftmp1],       0x120(%[block])         \n\t"
122         "gssqc1     %[ftmp0],   %[ftmp1],       0x130(%[block])         \n\t"
123         "gssqc1     %[ftmp0],   %[ftmp1],       0x140(%[block])         \n\t"
124         "gssqc1     %[ftmp0],   %[ftmp1],       0x150(%[block])         \n\t"
125         "gssqc1     %[ftmp0],   %[ftmp1],       0x160(%[block])         \n\t"
126         "gssqc1     %[ftmp0],   %[ftmp1],       0x170(%[block])         \n\t"
127
128         "gssqc1     %[ftmp0],   %[ftmp1],       0x180(%[block])         \n\t"
129         "gssqc1     %[ftmp0],   %[ftmp1],       0x190(%[block])         \n\t"
130         "gssqc1     %[ftmp0],   %[ftmp1],       0x1a0(%[block])         \n\t"
131         "gssqc1     %[ftmp0],   %[ftmp1],       0x1b0(%[block])         \n\t"
132         "gssqc1     %[ftmp0],   %[ftmp1],       0x1c0(%[block])         \n\t"
133         "gssqc1     %[ftmp0],   %[ftmp1],       0x1d0(%[block])         \n\t"
134         "gssqc1     %[ftmp0],   %[ftmp1],       0x1e0(%[block])         \n\t"
135         "gssqc1     %[ftmp0],   %[ftmp1],       0x1f0(%[block])         \n\t"
136
137         "gssqc1     %[ftmp0],   %[ftmp1],       0x200(%[block])         \n\t"
138         "gssqc1     %[ftmp0],   %[ftmp1],       0x210(%[block])         \n\t"
139         "gssqc1     %[ftmp0],   %[ftmp1],       0x220(%[block])         \n\t"
140         "gssqc1     %[ftmp0],   %[ftmp1],       0x230(%[block])         \n\t"
141         "gssqc1     %[ftmp0],   %[ftmp1],       0x240(%[block])         \n\t"
142         "gssqc1     %[ftmp0],   %[ftmp1],       0x250(%[block])         \n\t"
143         "gssqc1     %[ftmp0],   %[ftmp1],       0x260(%[block])         \n\t"
144         "gssqc1     %[ftmp0],   %[ftmp1],       0x270(%[block])         \n\t"
145
146         "gssqc1     %[ftmp0],   %[ftmp1],       0x280(%[block])         \n\t"
147         "gssqc1     %[ftmp0],   %[ftmp1],       0x290(%[block])         \n\t"
148         "gssqc1     %[ftmp0],   %[ftmp1],       0x2a0(%[block])         \n\t"
149         "gssqc1     %[ftmp0],   %[ftmp1],       0x2b0(%[block])         \n\t"
150         "gssqc1     %[ftmp0],   %[ftmp1],       0x2c0(%[block])         \n\t"
151         "gssqc1     %[ftmp0],   %[ftmp1],       0x2d0(%[block])         \n\t"
152         "gssqc1     %[ftmp0],   %[ftmp1],       0x2e0(%[block])         \n\t"
153         "gssqc1     %[ftmp0],   %[ftmp1],       0x2f0(%[block])         \n\t"
154         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1])
155         : [block]"r"((mips_reg)block)
156         : "memory"
157     );
158 }