]> git.sesse.net Git - freerainbowtables/blobdiff - BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc
merge all the algorithms for distrrtgen_cuda
[freerainbowtables] / BOINC software / BOINC client apps / distrrtgen_cuda / rcuda_lm.inc
diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc
new file mode 100644 (file)
index 0000000..d1fbc2b
--- /dev/null
@@ -0,0 +1,479 @@
+//============================================================================\r
+// Name        : rcuda_lm.inc\r
+// Author      : Jan Kyska\r
+// Version     : 1.00\r
+// Description : LM hash kernel for Generator of FreeRainbowTables\r
+//============================================================================ \r
+\r
+namespace RC_LM {\r
+\r
+#define  ITERATIONS    16\r
+\r
+\r
+__device__ __constant__ int shifts2[16] = { 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 };\r
+\r
+__device__ __constant__ unsigned int des_skb[8][64] = {\r
+{\r
+/* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */\r
+0x00000000L,0x00000010L,0x20000000L,0x20000010L,\r
+0x00010000L,0x00010010L,0x20010000L,0x20010010L,\r
+0x00000800L,0x00000810L,0x20000800L,0x20000810L,\r
+0x00010800L,0x00010810L,0x20010800L,0x20010810L,\r
+0x00000020L,0x00000030L,0x20000020L,0x20000030L,\r
+0x00010020L,0x00010030L,0x20010020L,0x20010030L,\r
+0x00000820L,0x00000830L,0x20000820L,0x20000830L,\r
+0x00010820L,0x00010830L,0x20010820L,0x20010830L,\r
+0x00080000L,0x00080010L,0x20080000L,0x20080010L,\r
+0x00090000L,0x00090010L,0x20090000L,0x20090010L,\r
+0x00080800L,0x00080810L,0x20080800L,0x20080810L,\r
+0x00090800L,0x00090810L,0x20090800L,0x20090810L,\r
+0x00080020L,0x00080030L,0x20080020L,0x20080030L,\r
+0x00090020L,0x00090030L,0x20090020L,0x20090030L,\r
+0x00080820L,0x00080830L,0x20080820L,0x20080830L,\r
+0x00090820L,0x00090830L,0x20090820L,0x20090830L,\r
+},{\r
+/* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */\r
+0x00000000L,0x02000000L,0x00002000L,0x02002000L,\r
+0x00200000L,0x02200000L,0x00202000L,0x02202000L,\r
+0x00000004L,0x02000004L,0x00002004L,0x02002004L,\r
+0x00200004L,0x02200004L,0x00202004L,0x02202004L,\r
+0x00000400L,0x02000400L,0x00002400L,0x02002400L,\r
+0x00200400L,0x02200400L,0x00202400L,0x02202400L,\r
+0x00000404L,0x02000404L,0x00002404L,0x02002404L,\r
+0x00200404L,0x02200404L,0x00202404L,0x02202404L,\r
+0x10000000L,0x12000000L,0x10002000L,0x12002000L,\r
+0x10200000L,0x12200000L,0x10202000L,0x12202000L,\r
+0x10000004L,0x12000004L,0x10002004L,0x12002004L,\r
+0x10200004L,0x12200004L,0x10202004L,0x12202004L,\r
+0x10000400L,0x12000400L,0x10002400L,0x12002400L,\r
+0x10200400L,0x12200400L,0x10202400L,0x12202400L,\r
+0x10000404L,0x12000404L,0x10002404L,0x12002404L,\r
+0x10200404L,0x12200404L,0x10202404L,0x12202404L,\r
+},{\r
+/* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */\r
+0x00000000L,0x00000001L,0x00040000L,0x00040001L,\r
+0x01000000L,0x01000001L,0x01040000L,0x01040001L,\r
+0x00000002L,0x00000003L,0x00040002L,0x00040003L,\r
+0x01000002L,0x01000003L,0x01040002L,0x01040003L,\r
+0x00000200L,0x00000201L,0x00040200L,0x00040201L,\r
+0x01000200L,0x01000201L,0x01040200L,0x01040201L,\r
+0x00000202L,0x00000203L,0x00040202L,0x00040203L,\r
+0x01000202L,0x01000203L,0x01040202L,0x01040203L,\r
+0x08000000L,0x08000001L,0x08040000L,0x08040001L,\r
+0x09000000L,0x09000001L,0x09040000L,0x09040001L,\r
+0x08000002L,0x08000003L,0x08040002L,0x08040003L,\r
+0x09000002L,0x09000003L,0x09040002L,0x09040003L,\r
+0x08000200L,0x08000201L,0x08040200L,0x08040201L,\r
+0x09000200L,0x09000201L,0x09040200L,0x09040201L,\r
+0x08000202L,0x08000203L,0x08040202L,0x08040203L,\r
+0x09000202L,0x09000203L,0x09040202L,0x09040203L,\r
+},{\r
+/* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */\r
+0x00000000L,0x00100000L,0x00000100L,0x00100100L,\r
+0x00000008L,0x00100008L,0x00000108L,0x00100108L,\r
+0x00001000L,0x00101000L,0x00001100L,0x00101100L,\r
+0x00001008L,0x00101008L,0x00001108L,0x00101108L,\r
+0x04000000L,0x04100000L,0x04000100L,0x04100100L,\r
+0x04000008L,0x04100008L,0x04000108L,0x04100108L,\r
+0x04001000L,0x04101000L,0x04001100L,0x04101100L,\r
+0x04001008L,0x04101008L,0x04001108L,0x04101108L,\r
+0x00020000L,0x00120000L,0x00020100L,0x00120100L,\r
+0x00020008L,0x00120008L,0x00020108L,0x00120108L,\r
+0x00021000L,0x00121000L,0x00021100L,0x00121100L,\r
+0x00021008L,0x00121008L,0x00021108L,0x00121108L,\r
+0x04020000L,0x04120000L,0x04020100L,0x04120100L,\r
+0x04020008L,0x04120008L,0x04020108L,0x04120108L,\r
+0x04021000L,0x04121000L,0x04021100L,0x04121100L,\r
+0x04021008L,0x04121008L,0x04021108L,0x04121108L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */\r
+0x00000000L,0x10000000L,0x00010000L,0x10010000L,\r
+0x00000004L,0x10000004L,0x00010004L,0x10010004L,\r
+0x20000000L,0x30000000L,0x20010000L,0x30010000L,\r
+0x20000004L,0x30000004L,0x20010004L,0x30010004L,\r
+0x00100000L,0x10100000L,0x00110000L,0x10110000L,\r
+0x00100004L,0x10100004L,0x00110004L,0x10110004L,\r
+0x20100000L,0x30100000L,0x20110000L,0x30110000L,\r
+0x20100004L,0x30100004L,0x20110004L,0x30110004L,\r
+0x00001000L,0x10001000L,0x00011000L,0x10011000L,\r
+0x00001004L,0x10001004L,0x00011004L,0x10011004L,\r
+0x20001000L,0x30001000L,0x20011000L,0x30011000L,\r
+0x20001004L,0x30001004L,0x20011004L,0x30011004L,\r
+0x00101000L,0x10101000L,0x00111000L,0x10111000L,\r
+0x00101004L,0x10101004L,0x00111004L,0x10111004L,\r
+0x20101000L,0x30101000L,0x20111000L,0x30111000L,\r
+0x20101004L,0x30101004L,0x20111004L,0x30111004L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */\r
+0x00000000L,0x08000000L,0x00000008L,0x08000008L,\r
+0x00000400L,0x08000400L,0x00000408L,0x08000408L,\r
+0x00020000L,0x08020000L,0x00020008L,0x08020008L,\r
+0x00020400L,0x08020400L,0x00020408L,0x08020408L,\r
+0x00000001L,0x08000001L,0x00000009L,0x08000009L,\r
+0x00000401L,0x08000401L,0x00000409L,0x08000409L,\r
+0x00020001L,0x08020001L,0x00020009L,0x08020009L,\r
+0x00020401L,0x08020401L,0x00020409L,0x08020409L,\r
+0x02000000L,0x0A000000L,0x02000008L,0x0A000008L,\r
+0x02000400L,0x0A000400L,0x02000408L,0x0A000408L,\r
+0x02020000L,0x0A020000L,0x02020008L,0x0A020008L,\r
+0x02020400L,0x0A020400L,0x02020408L,0x0A020408L,\r
+0x02000001L,0x0A000001L,0x02000009L,0x0A000009L,\r
+0x02000401L,0x0A000401L,0x02000409L,0x0A000409L,\r
+0x02020001L,0x0A020001L,0x02020009L,0x0A020009L,\r
+0x02020401L,0x0A020401L,0x02020409L,0x0A020409L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */\r
+0x00000000L,0x00000100L,0x00080000L,0x00080100L,\r
+0x01000000L,0x01000100L,0x01080000L,0x01080100L,\r
+0x00000010L,0x00000110L,0x00080010L,0x00080110L,\r
+0x01000010L,0x01000110L,0x01080010L,0x01080110L,\r
+0x00200000L,0x00200100L,0x00280000L,0x00280100L,\r
+0x01200000L,0x01200100L,0x01280000L,0x01280100L,\r
+0x00200010L,0x00200110L,0x00280010L,0x00280110L,\r
+0x01200010L,0x01200110L,0x01280010L,0x01280110L,\r
+0x00000200L,0x00000300L,0x00080200L,0x00080300L,\r
+0x01000200L,0x01000300L,0x01080200L,0x01080300L,\r
+0x00000210L,0x00000310L,0x00080210L,0x00080310L,\r
+0x01000210L,0x01000310L,0x01080210L,0x01080310L,\r
+0x00200200L,0x00200300L,0x00280200L,0x00280300L,\r
+0x01200200L,0x01200300L,0x01280200L,0x01280300L,\r
+0x00200210L,0x00200310L,0x00280210L,0x00280310L,\r
+0x01200210L,0x01200310L,0x01280210L,0x01280310L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */\r
+0x00000000L,0x04000000L,0x00040000L,0x04040000L,\r
+0x00000002L,0x04000002L,0x00040002L,0x04040002L,\r
+0x00002000L,0x04002000L,0x00042000L,0x04042000L,\r
+0x00002002L,0x04002002L,0x00042002L,0x04042002L,\r
+0x00000020L,0x04000020L,0x00040020L,0x04040020L,\r
+0x00000022L,0x04000022L,0x00040022L,0x04040022L,\r
+0x00002020L,0x04002020L,0x00042020L,0x04042020L,\r
+0x00002022L,0x04002022L,0x00042022L,0x04042022L,\r
+0x00000800L,0x04000800L,0x00040800L,0x04040800L,\r
+0x00000802L,0x04000802L,0x00040802L,0x04040802L,\r
+0x00002800L,0x04002800L,0x00042800L,0x04042800L,\r
+0x00002802L,0x04002802L,0x00042802L,0x04042802L,\r
+0x00000820L,0x04000820L,0x00040820L,0x04040820L,\r
+0x00000822L,0x04000822L,0x00040822L,0x04040822L,\r
+0x00002820L,0x04002820L,0x00042820L,0x04042820L,\r
+0x00002822L,0x04002822L,0x00042822L,0x04042822L,\r
+}};\r
+\r
+__device__ __constant__ unsigned int des_SPtrans[8][64] = {\r
+{\r
+/* nibble 0 */\r
+0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,\r
+0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,\r
+0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,\r
+0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,\r
+0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,\r
+0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,\r
+0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,\r
+0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,\r
+0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,\r
+0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,\r
+0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,\r
+0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,\r
+0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,\r
+0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,\r
+0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,\r
+0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L,\r
+},{\r
+/* nibble 1 */\r
+0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,\r
+0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,\r
+0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,\r
+0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,\r
+0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,\r
+0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,\r
+0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,\r
+0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,\r
+0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,\r
+0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,\r
+0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,\r
+0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,\r
+0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,\r
+0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,\r
+0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,\r
+0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L,\r
+},{\r
+/* nibble 2 */\r
+0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,\r
+0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,\r
+0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,\r
+0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,\r
+0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,\r
+0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,\r
+0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,\r
+0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,\r
+0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,\r
+0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,\r
+0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,\r
+0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,\r
+0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,\r
+0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,\r
+0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,\r
+0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L,\r
+},{\r
+/* nibble 3 */\r
+0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,\r
+0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,\r
+0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,\r
+0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,\r
+0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,\r
+0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,\r
+0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,\r
+0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,\r
+0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,\r
+0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,\r
+0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,\r
+0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,\r
+0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,\r
+0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,\r
+0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,\r
+0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L,\r
+},{\r
+/* nibble 4 */\r
+0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,\r
+0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,\r
+0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,\r
+0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,\r
+0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,\r
+0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,\r
+0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,\r
+0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,\r
+0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,\r
+0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,\r
+0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,\r
+0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,\r
+0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,\r
+0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,\r
+0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,\r
+0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L,\r
+},{\r
+/* nibble 5 */\r
+0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,\r
+0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,\r
+0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,\r
+0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,\r
+0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,\r
+0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,\r
+0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,\r
+0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,\r
+0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,\r
+0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,\r
+0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,\r
+0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,\r
+0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,\r
+0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,\r
+0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,\r
+0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,\r
+},{\r
+/* nibble 6 */\r
+0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,\r
+0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,\r
+0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,\r
+0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,\r
+0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,\r
+0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,\r
+0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,\r
+0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,\r
+0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,\r
+0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,\r
+0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,\r
+0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,\r
+0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,\r
+0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,\r
+0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,\r
+0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,\r
+},{\r
+/* nibble 7 */\r
+0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,\r
+0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,\r
+0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,\r
+0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,\r
+0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,\r
+0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,\r
+0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,\r
+0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,\r
+0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,\r
+0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,\r
+0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,\r
+0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,\r
+0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,\r
+0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,\r
+0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,\r
+0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,\r
+}};\r
+\r
+\r
+__device__ void PERM_OP(int ia, int ib, int it, unsigned int n, unsigned int m, unsigned int* data) {\r
+       data[it] =((data[ia] >> n ) ^ data[ib]) & m;\r
+       data[ib] ^= data[it];\r
+       data[ia] ^= data[it] << n;\r
+}\r
+\r
+__device__ void HPERM_OP(int ia, int it, int n, unsigned int m, unsigned int* data) {\r
+       data[it] = ((data[ia] << (16-n)) ^ data[ia]) & m;\r
+       data[ia] = data[ia] ^ data[it] ^ (data[it]>>(16-n));\r
+}\r
+\r
+__device__ void IP(int il, int ir, int it, unsigned int* data) {\r
+       PERM_OP(ir, il, it, 4, 0x0f0f0f0f, data);\r
+       PERM_OP(il, ir, it, 16, 0x0000ffff, data);\r
+       PERM_OP(ir, il, it, 2, 0x33333333, data);\r
+       PERM_OP(il, ir, it, 8, 0x00ff00ff, data);\r
+       PERM_OP(ir, il, it, 1, 0x55555555, data);\r
+}\r
+\r
+__device__ void FP(int il, int ir, int it, unsigned int* data) {\r
+       PERM_OP(il, ir, it, 1, 0x55555555, data);\r
+       PERM_OP(ir, il, it, 8, 0x00ff00ff, data);\r
+       PERM_OP(il, ir, it, 2, 0x33333333, data);\r
+       PERM_OP(ir, il, it, 16, 0x0000ffff, data);\r
+       PERM_OP(il, ir, it, 4, 0x0f0f0f0f, data);\r
+}\r
+\r
+__device__ unsigned int D_ENCRYPT(unsigned int ll, unsigned int uu, unsigned int tt) {\r
+       tt = (tt>>4)|(tt<<28);\r
+       return ll ^ des_SPtrans[0][(uu>>2)&0x3f] ^\r
+                       des_SPtrans[2][(uu>>10)&0x3f] ^\r
+                       des_SPtrans[4][(uu>>18)&0x3f] ^\r
+                       des_SPtrans[6][(uu>>26)&0x3f] ^\r
+                       des_SPtrans[1][(tt>>2)&0x3f] ^\r
+                       des_SPtrans[3][(tt>>10)&0x3f] ^\r
+                       des_SPtrans[5][(tt>>18)&0x3f] ^\r
+                       des_SPtrans[7][(tt>>26)&0x3f];\r
+}\r
+\r
+}\r
+\r
+__global__ void RTGenLMKernel(unsigned int chainStart, unsigned int chainStop) {\r
+       uint3 dimItem;\r
+       unsigned int *hData2, *hData3;\r
+       uint64 uiDiv64, uiVal64, uiMul64;\r
+       unsigned int uiVal, uiDiv;\r
+       unsigned int jj, rs, rt;\r
+\r
+       __shared__ unsigned int shData2[SHIDX(BLOCK_X_SIZE)];\r
+       __shared__ unsigned int shData3[BLOCK_X_SIZE<<2];\r
+       hData2 = shData2 + ((threadIdx.x>>4)<<8)+(threadIdx.x&15);\r
+       hData3 = shData3 + ((threadIdx.x>>4)<<6)+(threadIdx.x&15);\r
+\r
+       RTGEN_PROLOGUE;\r
+\r
+       // transform to the plain text\r
+       for(ii = 0; ii < 8; ii++)\r
+               hData[SHIDX(ii)] = 0;\r
+       \r
+       for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
+               uiVal64 = idx64 + cplStart[ii];\r
+               uiVal64--;\r
+               dimItem = cplDimVec[ii];\r
+\r
+               uiMul64 = (uint64)dimItem.y<<32;\r
+               idx64 = __umul64hi(uiVal64, uiMul64);\r
+               uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
+               uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
+               uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
+               idx64 += uiVal;\r
+               if(uiDiv >= dimItem.x) {\r
+                       uiDiv -= dimItem.x;\r
+                       idx64++;\r
+               }\r
+               hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv];\r
+       }\r
+\r
+       for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
+               uiVal = idx + cplStart[ii];\r
+               uiVal--;\r
+               dimItem = cplDimVec[ii];\r
+\r
+               idx = __umulhi(uiVal, dimItem.y);\r
+               uiDiv = uiVal - idx*dimItem.x;\r
+               if(uiDiv >= dimItem.x) {\r
+                       uiDiv -= dimItem.x;\r
+                       idx++;\r
+               }\r
+               hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv];\r
+       }\r
+\r
+       for(jj = 8, ii--; jj < 15; jj++, ii--)\r
+               hData[SHIDX(jj)] = hData[SHIDX(ii&7)];\r
+\r
+       // set key\r
+       ii = 255;\r
+       uiVal = ((hData[SHIDX(10)] << 5) | (hData[SHIDX(11)] >> 3))&ii;\r
+       uiVal = (uiVal<<8) | (((hData[SHIDX(9)] << 6) | (hData[SHIDX(10)] >> 2))&ii);\r
+       uiVal = (uiVal<<8) | (((hData[SHIDX(8)] << 7) | (hData[SHIDX(9)] >> 1))&ii);\r
+       uiVal = (uiVal<<8) | hData[SHIDX(8)];\r
+       \r
+       uiDiv = (hData[SHIDX(14)] << 1)&ii;\r
+       uiDiv = (uiDiv<<8) | (((hData[SHIDX(13)] << 2) | (hData[SHIDX(14)] >> 6))&ii);\r
+       uiDiv = (uiDiv<<8) | (((hData[SHIDX(12)] << 3) | (hData[SHIDX(13)] >> 5))&ii);\r
+       uiDiv = (uiDiv<<8) | (((hData[SHIDX(11)] << 4) | (hData[SHIDX(12)] >> 4))&ii);\r
+\r
+       hData[SHIDX(0)] = uiVal;\r
+       hData[SHIDX(1)] = uiDiv;\r
+       RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 4, 0x0f0f0f0f, hData);\r
+       RC_LM::HPERM_OP(SHIDX(0), SHIDX(2), -2, 0xcccc0000, hData);\r
+       RC_LM::HPERM_OP(SHIDX(1), SHIDX(2), -2, 0xcccc0000, hData);\r
+       RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData);\r
+       RC_LM::PERM_OP(SHIDX(0), SHIDX(1), SHIDX(2), 8, 0x00ff00ff, hData);\r
+       RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData);\r
+       uiVal = hData[SHIDX(0)];\r
+       uiDiv = hData[SHIDX(1)];\r
+       uiDiv = ((uiDiv&0x000000ff)<<16) | (uiDiv&0x0000ff00) | ((uiDiv&0x00ff0000)>>16) | ((uiVal&0xf0000000)>>4);\r
+       uiVal &= 0x0fffffff;\r
+\r
+       for(ii = 0; ii < ITERATIONS; ii++) {\r
+               if(RC_LM::shifts2[ii]) { \r
+                       uiVal = ((uiVal>>2)|(uiVal<<26)); \r
+                       uiDiv =((uiDiv>>2)|(uiDiv<<26)); \r
+               } else { \r
+                       uiVal = ((uiVal>>1)|(uiVal<<27)); \r
+                       uiDiv = ((uiDiv>>1)|(uiDiv<<27)); \r
+               }\r
+               uiVal &= 0x0fffffff;\r
+               uiDiv &= 0x0fffffff;\r
+\r
+               rs = RC_LM::des_skb[0][uiVal&0x3f] |\r
+                       RC_LM::des_skb[1][((uiVal>>6)&0x03)|((uiVal>>7)&0x3c)] |\r
+                       RC_LM::des_skb[2][((uiVal>>13)&0x0f)|((uiVal>>14)&0x30)] |\r
+                       RC_LM::des_skb[3][((uiVal>>20)&0x01)|((uiVal>>21)&0x06) |\r
+                       ((uiVal>>22)&0x38)];\r
+               rt = RC_LM::des_skb[4][uiDiv&0x3f] |\r
+                       RC_LM::des_skb[5][((uiDiv>>7)&0x03)|((uiDiv>>8)&0x3c)] |\r
+                       RC_LM::des_skb[6][(uiDiv>>15)&0x3f] |\r
+                       RC_LM::des_skb[7][((uiDiv>>21)&0x0f)|((uiDiv>>22)&0x30)];\r
+\r
+               /* table contained 0213 4657 */\r
+               idx = (rt<<16)|(rs&0x0000ffff);\r
+               hData[SHIDX(ii)] = (idx>>30)|(idx<<2);\r
+               idx = (rs>>16)|(rt&0xffff0000);\r
+               hData2[SHIDX(ii)] = (idx>>26)|(idx<<6);\r
+       }\r
+\r
+       // encrypt the "magic" data\r
+       hData3[SHIDX(0)] = 0x2153474B;\r
+       hData3[SHIDX(1)] = 0x25242340;\r
+\r
+       RC_LM::IP(SHIDX(0), SHIDX(1), SHIDX(2), hData3);\r
+       uiVal = hData3[SHIDX(0)];\r
+       uiVal = ((uiVal>>29)|(uiVal<<3));\r
+       uiDiv = hData3[SHIDX(1)];\r
+       uiDiv = ((uiDiv>>29)|(uiDiv<<3));\r
+\r
+       for(ii = 0; ii < 16; ii+=2) {\r
+               uiDiv = RC_LM::D_ENCRYPT(uiDiv, uiVal^hData[SHIDX(ii)], uiVal^hData2[SHIDX(ii)]);\r
+               uiVal = RC_LM::D_ENCRYPT(uiVal, uiDiv^hData[SHIDX(ii+1)], uiDiv^hData2[SHIDX(ii+1)]);\r
+       }\r
+\r
+       hData3[SHIDX(0)] = ((uiVal>>3)|(uiVal<<29));\r
+       hData3[SHIDX(1)] = ((uiDiv>>3)|(uiDiv<<29));\r
+       RC_LM::FP(SHIDX(0), SHIDX(1), SHIDX(2), hData3);\r
+       \r
+       hData[SHIDX(0)] = hData3[SHIDX(1)];\r
+       hData[SHIDX(1)] = hData3[SHIDX(0)];\r
+       RTGEN_EPILOGUE\r
+}\r