]> git.sesse.net Git - freerainbowtables/blob - BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc
merge all the algorithms for distrrtgen_cuda
[freerainbowtables] / BOINC software / BOINC client apps / distrrtgen_cuda / rcuda_lm.inc
1 //============================================================================\r
2 // Name        : rcuda_lm.inc\r
3 // Author      : Jan Kyska\r
4 // Version     : 1.00\r
5 // Description : LM hash kernel for Generator of FreeRainbowTables\r
6 //============================================================================ \r
7 \r
8 namespace RC_LM {\r
9 \r
10 #define  ITERATIONS    16\r
11 \r
12 \r
13 __device__ __constant__ int shifts2[16] = { 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 };\r
14 \r
15 __device__ __constant__ unsigned int des_skb[8][64] = {\r
16 {\r
17 /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */\r
18 0x00000000L,0x00000010L,0x20000000L,0x20000010L,\r
19 0x00010000L,0x00010010L,0x20010000L,0x20010010L,\r
20 0x00000800L,0x00000810L,0x20000800L,0x20000810L,\r
21 0x00010800L,0x00010810L,0x20010800L,0x20010810L,\r
22 0x00000020L,0x00000030L,0x20000020L,0x20000030L,\r
23 0x00010020L,0x00010030L,0x20010020L,0x20010030L,\r
24 0x00000820L,0x00000830L,0x20000820L,0x20000830L,\r
25 0x00010820L,0x00010830L,0x20010820L,0x20010830L,\r
26 0x00080000L,0x00080010L,0x20080000L,0x20080010L,\r
27 0x00090000L,0x00090010L,0x20090000L,0x20090010L,\r
28 0x00080800L,0x00080810L,0x20080800L,0x20080810L,\r
29 0x00090800L,0x00090810L,0x20090800L,0x20090810L,\r
30 0x00080020L,0x00080030L,0x20080020L,0x20080030L,\r
31 0x00090020L,0x00090030L,0x20090020L,0x20090030L,\r
32 0x00080820L,0x00080830L,0x20080820L,0x20080830L,\r
33 0x00090820L,0x00090830L,0x20090820L,0x20090830L,\r
34 },{\r
35 /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */\r
36 0x00000000L,0x02000000L,0x00002000L,0x02002000L,\r
37 0x00200000L,0x02200000L,0x00202000L,0x02202000L,\r
38 0x00000004L,0x02000004L,0x00002004L,0x02002004L,\r
39 0x00200004L,0x02200004L,0x00202004L,0x02202004L,\r
40 0x00000400L,0x02000400L,0x00002400L,0x02002400L,\r
41 0x00200400L,0x02200400L,0x00202400L,0x02202400L,\r
42 0x00000404L,0x02000404L,0x00002404L,0x02002404L,\r
43 0x00200404L,0x02200404L,0x00202404L,0x02202404L,\r
44 0x10000000L,0x12000000L,0x10002000L,0x12002000L,\r
45 0x10200000L,0x12200000L,0x10202000L,0x12202000L,\r
46 0x10000004L,0x12000004L,0x10002004L,0x12002004L,\r
47 0x10200004L,0x12200004L,0x10202004L,0x12202004L,\r
48 0x10000400L,0x12000400L,0x10002400L,0x12002400L,\r
49 0x10200400L,0x12200400L,0x10202400L,0x12202400L,\r
50 0x10000404L,0x12000404L,0x10002404L,0x12002404L,\r
51 0x10200404L,0x12200404L,0x10202404L,0x12202404L,\r
52 },{\r
53 /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */\r
54 0x00000000L,0x00000001L,0x00040000L,0x00040001L,\r
55 0x01000000L,0x01000001L,0x01040000L,0x01040001L,\r
56 0x00000002L,0x00000003L,0x00040002L,0x00040003L,\r
57 0x01000002L,0x01000003L,0x01040002L,0x01040003L,\r
58 0x00000200L,0x00000201L,0x00040200L,0x00040201L,\r
59 0x01000200L,0x01000201L,0x01040200L,0x01040201L,\r
60 0x00000202L,0x00000203L,0x00040202L,0x00040203L,\r
61 0x01000202L,0x01000203L,0x01040202L,0x01040203L,\r
62 0x08000000L,0x08000001L,0x08040000L,0x08040001L,\r
63 0x09000000L,0x09000001L,0x09040000L,0x09040001L,\r
64 0x08000002L,0x08000003L,0x08040002L,0x08040003L,\r
65 0x09000002L,0x09000003L,0x09040002L,0x09040003L,\r
66 0x08000200L,0x08000201L,0x08040200L,0x08040201L,\r
67 0x09000200L,0x09000201L,0x09040200L,0x09040201L,\r
68 0x08000202L,0x08000203L,0x08040202L,0x08040203L,\r
69 0x09000202L,0x09000203L,0x09040202L,0x09040203L,\r
70 },{\r
71 /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */\r
72 0x00000000L,0x00100000L,0x00000100L,0x00100100L,\r
73 0x00000008L,0x00100008L,0x00000108L,0x00100108L,\r
74 0x00001000L,0x00101000L,0x00001100L,0x00101100L,\r
75 0x00001008L,0x00101008L,0x00001108L,0x00101108L,\r
76 0x04000000L,0x04100000L,0x04000100L,0x04100100L,\r
77 0x04000008L,0x04100008L,0x04000108L,0x04100108L,\r
78 0x04001000L,0x04101000L,0x04001100L,0x04101100L,\r
79 0x04001008L,0x04101008L,0x04001108L,0x04101108L,\r
80 0x00020000L,0x00120000L,0x00020100L,0x00120100L,\r
81 0x00020008L,0x00120008L,0x00020108L,0x00120108L,\r
82 0x00021000L,0x00121000L,0x00021100L,0x00121100L,\r
83 0x00021008L,0x00121008L,0x00021108L,0x00121108L,\r
84 0x04020000L,0x04120000L,0x04020100L,0x04120100L,\r
85 0x04020008L,0x04120008L,0x04020108L,0x04120108L,\r
86 0x04021000L,0x04121000L,0x04021100L,0x04121100L,\r
87 0x04021008L,0x04121008L,0x04021108L,0x04121108L,\r
88 },{\r
89 /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */\r
90 0x00000000L,0x10000000L,0x00010000L,0x10010000L,\r
91 0x00000004L,0x10000004L,0x00010004L,0x10010004L,\r
92 0x20000000L,0x30000000L,0x20010000L,0x30010000L,\r
93 0x20000004L,0x30000004L,0x20010004L,0x30010004L,\r
94 0x00100000L,0x10100000L,0x00110000L,0x10110000L,\r
95 0x00100004L,0x10100004L,0x00110004L,0x10110004L,\r
96 0x20100000L,0x30100000L,0x20110000L,0x30110000L,\r
97 0x20100004L,0x30100004L,0x20110004L,0x30110004L,\r
98 0x00001000L,0x10001000L,0x00011000L,0x10011000L,\r
99 0x00001004L,0x10001004L,0x00011004L,0x10011004L,\r
100 0x20001000L,0x30001000L,0x20011000L,0x30011000L,\r
101 0x20001004L,0x30001004L,0x20011004L,0x30011004L,\r
102 0x00101000L,0x10101000L,0x00111000L,0x10111000L,\r
103 0x00101004L,0x10101004L,0x00111004L,0x10111004L,\r
104 0x20101000L,0x30101000L,0x20111000L,0x30111000L,\r
105 0x20101004L,0x30101004L,0x20111004L,0x30111004L,\r
106 },{\r
107 /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */\r
108 0x00000000L,0x08000000L,0x00000008L,0x08000008L,\r
109 0x00000400L,0x08000400L,0x00000408L,0x08000408L,\r
110 0x00020000L,0x08020000L,0x00020008L,0x08020008L,\r
111 0x00020400L,0x08020400L,0x00020408L,0x08020408L,\r
112 0x00000001L,0x08000001L,0x00000009L,0x08000009L,\r
113 0x00000401L,0x08000401L,0x00000409L,0x08000409L,\r
114 0x00020001L,0x08020001L,0x00020009L,0x08020009L,\r
115 0x00020401L,0x08020401L,0x00020409L,0x08020409L,\r
116 0x02000000L,0x0A000000L,0x02000008L,0x0A000008L,\r
117 0x02000400L,0x0A000400L,0x02000408L,0x0A000408L,\r
118 0x02020000L,0x0A020000L,0x02020008L,0x0A020008L,\r
119 0x02020400L,0x0A020400L,0x02020408L,0x0A020408L,\r
120 0x02000001L,0x0A000001L,0x02000009L,0x0A000009L,\r
121 0x02000401L,0x0A000401L,0x02000409L,0x0A000409L,\r
122 0x02020001L,0x0A020001L,0x02020009L,0x0A020009L,\r
123 0x02020401L,0x0A020401L,0x02020409L,0x0A020409L,\r
124 },{\r
125 /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */\r
126 0x00000000L,0x00000100L,0x00080000L,0x00080100L,\r
127 0x01000000L,0x01000100L,0x01080000L,0x01080100L,\r
128 0x00000010L,0x00000110L,0x00080010L,0x00080110L,\r
129 0x01000010L,0x01000110L,0x01080010L,0x01080110L,\r
130 0x00200000L,0x00200100L,0x00280000L,0x00280100L,\r
131 0x01200000L,0x01200100L,0x01280000L,0x01280100L,\r
132 0x00200010L,0x00200110L,0x00280010L,0x00280110L,\r
133 0x01200010L,0x01200110L,0x01280010L,0x01280110L,\r
134 0x00000200L,0x00000300L,0x00080200L,0x00080300L,\r
135 0x01000200L,0x01000300L,0x01080200L,0x01080300L,\r
136 0x00000210L,0x00000310L,0x00080210L,0x00080310L,\r
137 0x01000210L,0x01000310L,0x01080210L,0x01080310L,\r
138 0x00200200L,0x00200300L,0x00280200L,0x00280300L,\r
139 0x01200200L,0x01200300L,0x01280200L,0x01280300L,\r
140 0x00200210L,0x00200310L,0x00280210L,0x00280310L,\r
141 0x01200210L,0x01200310L,0x01280210L,0x01280310L,\r
142 },{\r
143 /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */\r
144 0x00000000L,0x04000000L,0x00040000L,0x04040000L,\r
145 0x00000002L,0x04000002L,0x00040002L,0x04040002L,\r
146 0x00002000L,0x04002000L,0x00042000L,0x04042000L,\r
147 0x00002002L,0x04002002L,0x00042002L,0x04042002L,\r
148 0x00000020L,0x04000020L,0x00040020L,0x04040020L,\r
149 0x00000022L,0x04000022L,0x00040022L,0x04040022L,\r
150 0x00002020L,0x04002020L,0x00042020L,0x04042020L,\r
151 0x00002022L,0x04002022L,0x00042022L,0x04042022L,\r
152 0x00000800L,0x04000800L,0x00040800L,0x04040800L,\r
153 0x00000802L,0x04000802L,0x00040802L,0x04040802L,\r
154 0x00002800L,0x04002800L,0x00042800L,0x04042800L,\r
155 0x00002802L,0x04002802L,0x00042802L,0x04042802L,\r
156 0x00000820L,0x04000820L,0x00040820L,0x04040820L,\r
157 0x00000822L,0x04000822L,0x00040822L,0x04040822L,\r
158 0x00002820L,0x04002820L,0x00042820L,0x04042820L,\r
159 0x00002822L,0x04002822L,0x00042822L,0x04042822L,\r
160 }};\r
161 \r
162 __device__ __constant__ unsigned int des_SPtrans[8][64] = {\r
163 {\r
164 /* nibble 0 */\r
165 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,\r
166 0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,\r
167 0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,\r
168 0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,\r
169 0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,\r
170 0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,\r
171 0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,\r
172 0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,\r
173 0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,\r
174 0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,\r
175 0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,\r
176 0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,\r
177 0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,\r
178 0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,\r
179 0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,\r
180 0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L,\r
181 },{\r
182 /* nibble 1 */\r
183 0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,\r
184 0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,\r
185 0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,\r
186 0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,\r
187 0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,\r
188 0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,\r
189 0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,\r
190 0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,\r
191 0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,\r
192 0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,\r
193 0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,\r
194 0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,\r
195 0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,\r
196 0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,\r
197 0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,\r
198 0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L,\r
199 },{\r
200 /* nibble 2 */\r
201 0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,\r
202 0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,\r
203 0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,\r
204 0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,\r
205 0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,\r
206 0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,\r
207 0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,\r
208 0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,\r
209 0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,\r
210 0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,\r
211 0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,\r
212 0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,\r
213 0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,\r
214 0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,\r
215 0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,\r
216 0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L,\r
217 },{\r
218 /* nibble 3 */\r
219 0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,\r
220 0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,\r
221 0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,\r
222 0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,\r
223 0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,\r
224 0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,\r
225 0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,\r
226 0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,\r
227 0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,\r
228 0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,\r
229 0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,\r
230 0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,\r
231 0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,\r
232 0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,\r
233 0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,\r
234 0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L,\r
235 },{\r
236 /* nibble 4 */\r
237 0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,\r
238 0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,\r
239 0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,\r
240 0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,\r
241 0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,\r
242 0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,\r
243 0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,\r
244 0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,\r
245 0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,\r
246 0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,\r
247 0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,\r
248 0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,\r
249 0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,\r
250 0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,\r
251 0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,\r
252 0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L,\r
253 },{\r
254 /* nibble 5 */\r
255 0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,\r
256 0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,\r
257 0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,\r
258 0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,\r
259 0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,\r
260 0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,\r
261 0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,\r
262 0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,\r
263 0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,\r
264 0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,\r
265 0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,\r
266 0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,\r
267 0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,\r
268 0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,\r
269 0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,\r
270 0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,\r
271 },{\r
272 /* nibble 6 */\r
273 0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,\r
274 0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,\r
275 0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,\r
276 0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,\r
277 0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,\r
278 0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,\r
279 0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,\r
280 0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,\r
281 0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,\r
282 0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,\r
283 0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,\r
284 0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,\r
285 0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,\r
286 0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,\r
287 0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,\r
288 0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,\r
289 },{\r
290 /* nibble 7 */\r
291 0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,\r
292 0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,\r
293 0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,\r
294 0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,\r
295 0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,\r
296 0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,\r
297 0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,\r
298 0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,\r
299 0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,\r
300 0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,\r
301 0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,\r
302 0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,\r
303 0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,\r
304 0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,\r
305 0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,\r
306 0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,\r
307 }};\r
308 \r
309 \r
310 __device__ void PERM_OP(int ia, int ib, int it, unsigned int n, unsigned int m, unsigned int* data) {\r
311         data[it] =((data[ia] >> n ) ^ data[ib]) & m;\r
312         data[ib] ^= data[it];\r
313         data[ia] ^= data[it] << n;\r
314 }\r
315 \r
316 __device__ void HPERM_OP(int ia, int it, int n, unsigned int m, unsigned int* data) {\r
317         data[it] = ((data[ia] << (16-n)) ^ data[ia]) & m;\r
318         data[ia] = data[ia] ^ data[it] ^ (data[it]>>(16-n));\r
319 }\r
320 \r
321 __device__ void IP(int il, int ir, int it, unsigned int* data) {\r
322         PERM_OP(ir, il, it, 4, 0x0f0f0f0f, data);\r
323         PERM_OP(il, ir, it, 16, 0x0000ffff, data);\r
324         PERM_OP(ir, il, it, 2, 0x33333333, data);\r
325         PERM_OP(il, ir, it, 8, 0x00ff00ff, data);\r
326         PERM_OP(ir, il, it, 1, 0x55555555, data);\r
327 }\r
328 \r
329 __device__ void FP(int il, int ir, int it, unsigned int* data) {\r
330         PERM_OP(il, ir, it, 1, 0x55555555, data);\r
331         PERM_OP(ir, il, it, 8, 0x00ff00ff, data);\r
332         PERM_OP(il, ir, it, 2, 0x33333333, data);\r
333         PERM_OP(ir, il, it, 16, 0x0000ffff, data);\r
334         PERM_OP(il, ir, it, 4, 0x0f0f0f0f, data);\r
335 }\r
336 \r
337 __device__ unsigned int D_ENCRYPT(unsigned int ll, unsigned int uu, unsigned int tt) {\r
338         tt = (tt>>4)|(tt<<28);\r
339         return ll ^ des_SPtrans[0][(uu>>2)&0x3f] ^\r
340                         des_SPtrans[2][(uu>>10)&0x3f] ^\r
341                         des_SPtrans[4][(uu>>18)&0x3f] ^\r
342                         des_SPtrans[6][(uu>>26)&0x3f] ^\r
343                         des_SPtrans[1][(tt>>2)&0x3f] ^\r
344                         des_SPtrans[3][(tt>>10)&0x3f] ^\r
345                         des_SPtrans[5][(tt>>18)&0x3f] ^\r
346                         des_SPtrans[7][(tt>>26)&0x3f];\r
347 }\r
348 \r
349 }\r
350 \r
351 __global__ void RTGenLMKernel(unsigned int chainStart, unsigned int chainStop) {\r
352         uint3 dimItem;\r
353         unsigned int *hData2, *hData3;\r
354         uint64 uiDiv64, uiVal64, uiMul64;\r
355         unsigned int uiVal, uiDiv;\r
356         unsigned int jj, rs, rt;\r
357 \r
358         __shared__ unsigned int shData2[SHIDX(BLOCK_X_SIZE)];\r
359         __shared__ unsigned int shData3[BLOCK_X_SIZE<<2];\r
360         hData2 = shData2 + ((threadIdx.x>>4)<<8)+(threadIdx.x&15);\r
361         hData3 = shData3 + ((threadIdx.x>>4)<<6)+(threadIdx.x&15);\r
362 \r
363         RTGEN_PROLOGUE;\r
364 \r
365         // transform to the plain text\r
366         for(ii = 0; ii < 8; ii++)\r
367                 hData[SHIDX(ii)] = 0;\r
368         \r
369         for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
370                 uiVal64 = idx64 + cplStart[ii];\r
371                 uiVal64--;\r
372                 dimItem = cplDimVec[ii];\r
373 \r
374                 uiMul64 = (uint64)dimItem.y<<32;\r
375                 idx64 = __umul64hi(uiVal64, uiMul64);\r
376                 uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
377                 uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
378                 uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
379                 idx64 += uiVal;\r
380                 if(uiDiv >= dimItem.x) {\r
381                         uiDiv -= dimItem.x;\r
382                         idx64++;\r
383                 }\r
384                 hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv];\r
385         }\r
386 \r
387         for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
388                 uiVal = idx + cplStart[ii];\r
389                 uiVal--;\r
390                 dimItem = cplDimVec[ii];\r
391 \r
392                 idx = __umulhi(uiVal, dimItem.y);\r
393                 uiDiv = uiVal - idx*dimItem.x;\r
394                 if(uiDiv >= dimItem.x) {\r
395                         uiDiv -= dimItem.x;\r
396                         idx++;\r
397                 }\r
398                 hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv];\r
399         }\r
400 \r
401         for(jj = 8, ii--; jj < 15; jj++, ii--)\r
402                 hData[SHIDX(jj)] = hData[SHIDX(ii&7)];\r
403 \r
404         // set key\r
405         ii = 255;\r
406         uiVal = ((hData[SHIDX(10)] << 5) | (hData[SHIDX(11)] >> 3))&ii;\r
407         uiVal = (uiVal<<8) | (((hData[SHIDX(9)] << 6) | (hData[SHIDX(10)] >> 2))&ii);\r
408         uiVal = (uiVal<<8) | (((hData[SHIDX(8)] << 7) | (hData[SHIDX(9)] >> 1))&ii);\r
409         uiVal = (uiVal<<8) | hData[SHIDX(8)];\r
410         \r
411         uiDiv = (hData[SHIDX(14)] << 1)&ii;\r
412         uiDiv = (uiDiv<<8) | (((hData[SHIDX(13)] << 2) | (hData[SHIDX(14)] >> 6))&ii);\r
413         uiDiv = (uiDiv<<8) | (((hData[SHIDX(12)] << 3) | (hData[SHIDX(13)] >> 5))&ii);\r
414         uiDiv = (uiDiv<<8) | (((hData[SHIDX(11)] << 4) | (hData[SHIDX(12)] >> 4))&ii);\r
415 \r
416         hData[SHIDX(0)] = uiVal;\r
417         hData[SHIDX(1)] = uiDiv;\r
418         RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 4, 0x0f0f0f0f, hData);\r
419         RC_LM::HPERM_OP(SHIDX(0), SHIDX(2), -2, 0xcccc0000, hData);\r
420         RC_LM::HPERM_OP(SHIDX(1), SHIDX(2), -2, 0xcccc0000, hData);\r
421         RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData);\r
422         RC_LM::PERM_OP(SHIDX(0), SHIDX(1), SHIDX(2), 8, 0x00ff00ff, hData);\r
423         RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData);\r
424         uiVal = hData[SHIDX(0)];\r
425         uiDiv = hData[SHIDX(1)];\r
426         uiDiv = ((uiDiv&0x000000ff)<<16) | (uiDiv&0x0000ff00) | ((uiDiv&0x00ff0000)>>16) | ((uiVal&0xf0000000)>>4);\r
427         uiVal &= 0x0fffffff;\r
428 \r
429         for(ii = 0; ii < ITERATIONS; ii++) {\r
430                 if(RC_LM::shifts2[ii]) { \r
431                         uiVal = ((uiVal>>2)|(uiVal<<26)); \r
432                         uiDiv =((uiDiv>>2)|(uiDiv<<26)); \r
433                 } else { \r
434                         uiVal = ((uiVal>>1)|(uiVal<<27)); \r
435                         uiDiv = ((uiDiv>>1)|(uiDiv<<27)); \r
436                 }\r
437                 uiVal &= 0x0fffffff;\r
438                 uiDiv &= 0x0fffffff;\r
439 \r
440                 rs = RC_LM::des_skb[0][uiVal&0x3f] |\r
441                         RC_LM::des_skb[1][((uiVal>>6)&0x03)|((uiVal>>7)&0x3c)] |\r
442                         RC_LM::des_skb[2][((uiVal>>13)&0x0f)|((uiVal>>14)&0x30)] |\r
443                         RC_LM::des_skb[3][((uiVal>>20)&0x01)|((uiVal>>21)&0x06) |\r
444                         ((uiVal>>22)&0x38)];\r
445                 rt = RC_LM::des_skb[4][uiDiv&0x3f] |\r
446                         RC_LM::des_skb[5][((uiDiv>>7)&0x03)|((uiDiv>>8)&0x3c)] |\r
447                         RC_LM::des_skb[6][(uiDiv>>15)&0x3f] |\r
448                         RC_LM::des_skb[7][((uiDiv>>21)&0x0f)|((uiDiv>>22)&0x30)];\r
449 \r
450                 /* table contained 0213 4657 */\r
451                 idx = (rt<<16)|(rs&0x0000ffff);\r
452                 hData[SHIDX(ii)] = (idx>>30)|(idx<<2);\r
453                 idx = (rs>>16)|(rt&0xffff0000);\r
454                 hData2[SHIDX(ii)] = (idx>>26)|(idx<<6);\r
455         }\r
456 \r
457         // encrypt the "magic" data\r
458         hData3[SHIDX(0)] = 0x2153474B;\r
459         hData3[SHIDX(1)] = 0x25242340;\r
460 \r
461         RC_LM::IP(SHIDX(0), SHIDX(1), SHIDX(2), hData3);\r
462         uiVal = hData3[SHIDX(0)];\r
463         uiVal = ((uiVal>>29)|(uiVal<<3));\r
464         uiDiv = hData3[SHIDX(1)];\r
465         uiDiv = ((uiDiv>>29)|(uiDiv<<3));\r
466 \r
467         for(ii = 0; ii < 16; ii+=2) {\r
468                 uiDiv = RC_LM::D_ENCRYPT(uiDiv, uiVal^hData[SHIDX(ii)], uiVal^hData2[SHIDX(ii)]);\r
469                 uiVal = RC_LM::D_ENCRYPT(uiVal, uiDiv^hData[SHIDX(ii+1)], uiDiv^hData2[SHIDX(ii+1)]);\r
470         }\r
471 \r
472         hData3[SHIDX(0)] = ((uiVal>>3)|(uiVal<<29));\r
473         hData3[SHIDX(1)] = ((uiDiv>>3)|(uiDiv<<29));\r
474         RC_LM::FP(SHIDX(0), SHIDX(1), SHIDX(2), hData3);\r
475         \r
476         hData[SHIDX(0)] = hData3[SHIDX(1)];\r
477         hData[SHIDX(1)] = hData3[SHIDX(0)];\r
478         RTGEN_EPILOGUE\r
479 }\r