X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=BOINC%20software%2FBOINC%20client%20apps%2Fdistrrtgen_cuda%2Frcuda_lm.inc;fp=BOINC%20software%2FBOINC%20client%20apps%2Fdistrrtgen_cuda%2Frcuda_lm.inc;h=d1fbc2b2dbf50d5493fd478db2ecfc064138a2c9;hb=683eeaa26cde6b9faa7600497da5896319583692;hp=0000000000000000000000000000000000000000;hpb=948b06c2953b5caa5d6af5eb3dacb2200e357844;p=freerainbowtables diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc new file mode 100644 index 0000000..d1fbc2b --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_lm.inc @@ -0,0 +1,479 @@ +//============================================================================ +// Name : rcuda_lm.inc +// Author : Jan Kyska +// Version : 1.00 +// Description : LM hash kernel for Generator of FreeRainbowTables +//============================================================================ + +namespace RC_LM { + +#define ITERATIONS 16 + + +__device__ __constant__ int shifts2[16] = { 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 }; + +__device__ __constant__ unsigned int des_skb[8][64] = { +{ +/* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ +0x00000000L,0x00000010L,0x20000000L,0x20000010L, +0x00010000L,0x00010010L,0x20010000L,0x20010010L, +0x00000800L,0x00000810L,0x20000800L,0x20000810L, +0x00010800L,0x00010810L,0x20010800L,0x20010810L, +0x00000020L,0x00000030L,0x20000020L,0x20000030L, +0x00010020L,0x00010030L,0x20010020L,0x20010030L, +0x00000820L,0x00000830L,0x20000820L,0x20000830L, +0x00010820L,0x00010830L,0x20010820L,0x20010830L, +0x00080000L,0x00080010L,0x20080000L,0x20080010L, +0x00090000L,0x00090010L,0x20090000L,0x20090010L, +0x00080800L,0x00080810L,0x20080800L,0x20080810L, +0x00090800L,0x00090810L,0x20090800L,0x20090810L, +0x00080020L,0x00080030L,0x20080020L,0x20080030L, +0x00090020L,0x00090030L,0x20090020L,0x20090030L, +0x00080820L,0x00080830L,0x20080820L,0x20080830L, +0x00090820L,0x00090830L,0x20090820L,0x20090830L, +},{ +/* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ +0x00000000L,0x02000000L,0x00002000L,0x02002000L, +0x00200000L,0x02200000L,0x00202000L,0x02202000L, +0x00000004L,0x02000004L,0x00002004L,0x02002004L, +0x00200004L,0x02200004L,0x00202004L,0x02202004L, +0x00000400L,0x02000400L,0x00002400L,0x02002400L, +0x00200400L,0x02200400L,0x00202400L,0x02202400L, +0x00000404L,0x02000404L,0x00002404L,0x02002404L, +0x00200404L,0x02200404L,0x00202404L,0x02202404L, +0x10000000L,0x12000000L,0x10002000L,0x12002000L, +0x10200000L,0x12200000L,0x10202000L,0x12202000L, +0x10000004L,0x12000004L,0x10002004L,0x12002004L, +0x10200004L,0x12200004L,0x10202004L,0x12202004L, +0x10000400L,0x12000400L,0x10002400L,0x12002400L, +0x10200400L,0x12200400L,0x10202400L,0x12202400L, +0x10000404L,0x12000404L,0x10002404L,0x12002404L, +0x10200404L,0x12200404L,0x10202404L,0x12202404L, +},{ +/* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ +0x00000000L,0x00000001L,0x00040000L,0x00040001L, +0x01000000L,0x01000001L,0x01040000L,0x01040001L, +0x00000002L,0x00000003L,0x00040002L,0x00040003L, +0x01000002L,0x01000003L,0x01040002L,0x01040003L, +0x00000200L,0x00000201L,0x00040200L,0x00040201L, +0x01000200L,0x01000201L,0x01040200L,0x01040201L, +0x00000202L,0x00000203L,0x00040202L,0x00040203L, +0x01000202L,0x01000203L,0x01040202L,0x01040203L, +0x08000000L,0x08000001L,0x08040000L,0x08040001L, +0x09000000L,0x09000001L,0x09040000L,0x09040001L, +0x08000002L,0x08000003L,0x08040002L,0x08040003L, +0x09000002L,0x09000003L,0x09040002L,0x09040003L, +0x08000200L,0x08000201L,0x08040200L,0x08040201L, +0x09000200L,0x09000201L,0x09040200L,0x09040201L, +0x08000202L,0x08000203L,0x08040202L,0x08040203L, +0x09000202L,0x09000203L,0x09040202L,0x09040203L, +},{ +/* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ +0x00000000L,0x00100000L,0x00000100L,0x00100100L, +0x00000008L,0x00100008L,0x00000108L,0x00100108L, +0x00001000L,0x00101000L,0x00001100L,0x00101100L, +0x00001008L,0x00101008L,0x00001108L,0x00101108L, +0x04000000L,0x04100000L,0x04000100L,0x04100100L, +0x04000008L,0x04100008L,0x04000108L,0x04100108L, +0x04001000L,0x04101000L,0x04001100L,0x04101100L, +0x04001008L,0x04101008L,0x04001108L,0x04101108L, +0x00020000L,0x00120000L,0x00020100L,0x00120100L, +0x00020008L,0x00120008L,0x00020108L,0x00120108L, +0x00021000L,0x00121000L,0x00021100L,0x00121100L, +0x00021008L,0x00121008L,0x00021108L,0x00121108L, +0x04020000L,0x04120000L,0x04020100L,0x04120100L, +0x04020008L,0x04120008L,0x04020108L,0x04120108L, +0x04021000L,0x04121000L,0x04021100L,0x04121100L, +0x04021008L,0x04121008L,0x04021108L,0x04121108L, +},{ +/* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ +0x00000000L,0x10000000L,0x00010000L,0x10010000L, +0x00000004L,0x10000004L,0x00010004L,0x10010004L, +0x20000000L,0x30000000L,0x20010000L,0x30010000L, +0x20000004L,0x30000004L,0x20010004L,0x30010004L, +0x00100000L,0x10100000L,0x00110000L,0x10110000L, +0x00100004L,0x10100004L,0x00110004L,0x10110004L, +0x20100000L,0x30100000L,0x20110000L,0x30110000L, +0x20100004L,0x30100004L,0x20110004L,0x30110004L, +0x00001000L,0x10001000L,0x00011000L,0x10011000L, +0x00001004L,0x10001004L,0x00011004L,0x10011004L, +0x20001000L,0x30001000L,0x20011000L,0x30011000L, +0x20001004L,0x30001004L,0x20011004L,0x30011004L, +0x00101000L,0x10101000L,0x00111000L,0x10111000L, +0x00101004L,0x10101004L,0x00111004L,0x10111004L, +0x20101000L,0x30101000L,0x20111000L,0x30111000L, +0x20101004L,0x30101004L,0x20111004L,0x30111004L, +},{ +/* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ +0x00000000L,0x08000000L,0x00000008L,0x08000008L, +0x00000400L,0x08000400L,0x00000408L,0x08000408L, +0x00020000L,0x08020000L,0x00020008L,0x08020008L, +0x00020400L,0x08020400L,0x00020408L,0x08020408L, +0x00000001L,0x08000001L,0x00000009L,0x08000009L, +0x00000401L,0x08000401L,0x00000409L,0x08000409L, +0x00020001L,0x08020001L,0x00020009L,0x08020009L, +0x00020401L,0x08020401L,0x00020409L,0x08020409L, +0x02000000L,0x0A000000L,0x02000008L,0x0A000008L, +0x02000400L,0x0A000400L,0x02000408L,0x0A000408L, +0x02020000L,0x0A020000L,0x02020008L,0x0A020008L, +0x02020400L,0x0A020400L,0x02020408L,0x0A020408L, +0x02000001L,0x0A000001L,0x02000009L,0x0A000009L, +0x02000401L,0x0A000401L,0x02000409L,0x0A000409L, +0x02020001L,0x0A020001L,0x02020009L,0x0A020009L, +0x02020401L,0x0A020401L,0x02020409L,0x0A020409L, +},{ +/* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ +0x00000000L,0x00000100L,0x00080000L,0x00080100L, +0x01000000L,0x01000100L,0x01080000L,0x01080100L, +0x00000010L,0x00000110L,0x00080010L,0x00080110L, +0x01000010L,0x01000110L,0x01080010L,0x01080110L, +0x00200000L,0x00200100L,0x00280000L,0x00280100L, +0x01200000L,0x01200100L,0x01280000L,0x01280100L, +0x00200010L,0x00200110L,0x00280010L,0x00280110L, +0x01200010L,0x01200110L,0x01280010L,0x01280110L, +0x00000200L,0x00000300L,0x00080200L,0x00080300L, +0x01000200L,0x01000300L,0x01080200L,0x01080300L, +0x00000210L,0x00000310L,0x00080210L,0x00080310L, +0x01000210L,0x01000310L,0x01080210L,0x01080310L, +0x00200200L,0x00200300L,0x00280200L,0x00280300L, +0x01200200L,0x01200300L,0x01280200L,0x01280300L, +0x00200210L,0x00200310L,0x00280210L,0x00280310L, +0x01200210L,0x01200310L,0x01280210L,0x01280310L, +},{ +/* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ +0x00000000L,0x04000000L,0x00040000L,0x04040000L, +0x00000002L,0x04000002L,0x00040002L,0x04040002L, +0x00002000L,0x04002000L,0x00042000L,0x04042000L, +0x00002002L,0x04002002L,0x00042002L,0x04042002L, +0x00000020L,0x04000020L,0x00040020L,0x04040020L, +0x00000022L,0x04000022L,0x00040022L,0x04040022L, +0x00002020L,0x04002020L,0x00042020L,0x04042020L, +0x00002022L,0x04002022L,0x00042022L,0x04042022L, +0x00000800L,0x04000800L,0x00040800L,0x04040800L, +0x00000802L,0x04000802L,0x00040802L,0x04040802L, +0x00002800L,0x04002800L,0x00042800L,0x04042800L, +0x00002802L,0x04002802L,0x00042802L,0x04042802L, +0x00000820L,0x04000820L,0x00040820L,0x04040820L, +0x00000822L,0x04000822L,0x00040822L,0x04040822L, +0x00002820L,0x04002820L,0x00042820L,0x04042820L, +0x00002822L,0x04002822L,0x00042822L,0x04042822L, +}}; + +__device__ __constant__ unsigned int des_SPtrans[8][64] = { +{ +/* nibble 0 */ +0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, +0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, +0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L, +0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L, +0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L, +0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L, +0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L, +0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L, +0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L, +0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L, +0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L, +0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L, +0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L, +0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L, +0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L, +0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L, +},{ +/* nibble 1 */ +0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L, +0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L, +0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L, +0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L, +0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L, +0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L, +0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L, +0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L, +0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L, +0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L, +0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L, +0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L, +0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L, +0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L, +0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L, +0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L, +},{ +/* nibble 2 */ +0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L, +0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L, +0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L, +0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L, +0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L, +0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L, +0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L, +0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L, +0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L, +0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L, +0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L, +0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L, +0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L, +0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L, +0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L, +0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L, +},{ +/* nibble 3 */ +0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L, +0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L, +0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L, +0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L, +0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L, +0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L, +0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L, +0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L, +0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L, +0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L, +0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L, +0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L, +0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L, +0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L, +0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L, +0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L, +},{ +/* nibble 4 */ +0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L, +0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L, +0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L, +0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L, +0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L, +0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L, +0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L, +0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L, +0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L, +0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L, +0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L, +0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L, +0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L, +0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L, +0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L, +0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L, +},{ +/* nibble 5 */ +0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L, +0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L, +0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L, +0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L, +0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L, +0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L, +0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L, +0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L, +0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L, +0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L, +0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L, +0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L, +0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L, +0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L, +0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L, +0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L, +},{ +/* nibble 6 */ +0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L, +0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L, +0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L, +0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L, +0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L, +0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L, +0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L, +0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L, +0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L, +0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L, +0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L, +0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L, +0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L, +0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L, +0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L, +0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L, +},{ +/* nibble 7 */ +0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L, +0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L, +0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L, +0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L, +0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L, +0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L, +0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L, +0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L, +0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L, +0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L, +0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L, +0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L, +0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L, +0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L, +0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L, +0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L, +}}; + + +__device__ void PERM_OP(int ia, int ib, int it, unsigned int n, unsigned int m, unsigned int* data) { + data[it] =((data[ia] >> n ) ^ data[ib]) & m; + data[ib] ^= data[it]; + data[ia] ^= data[it] << n; +} + +__device__ void HPERM_OP(int ia, int it, int n, unsigned int m, unsigned int* data) { + data[it] = ((data[ia] << (16-n)) ^ data[ia]) & m; + data[ia] = data[ia] ^ data[it] ^ (data[it]>>(16-n)); +} + +__device__ void IP(int il, int ir, int it, unsigned int* data) { + PERM_OP(ir, il, it, 4, 0x0f0f0f0f, data); + PERM_OP(il, ir, it, 16, 0x0000ffff, data); + PERM_OP(ir, il, it, 2, 0x33333333, data); + PERM_OP(il, ir, it, 8, 0x00ff00ff, data); + PERM_OP(ir, il, it, 1, 0x55555555, data); +} + +__device__ void FP(int il, int ir, int it, unsigned int* data) { + PERM_OP(il, ir, it, 1, 0x55555555, data); + PERM_OP(ir, il, it, 8, 0x00ff00ff, data); + PERM_OP(il, ir, it, 2, 0x33333333, data); + PERM_OP(ir, il, it, 16, 0x0000ffff, data); + PERM_OP(il, ir, it, 4, 0x0f0f0f0f, data); +} + +__device__ unsigned int D_ENCRYPT(unsigned int ll, unsigned int uu, unsigned int tt) { + tt = (tt>>4)|(tt<<28); + return ll ^ des_SPtrans[0][(uu>>2)&0x3f] ^ + des_SPtrans[2][(uu>>10)&0x3f] ^ + des_SPtrans[4][(uu>>18)&0x3f] ^ + des_SPtrans[6][(uu>>26)&0x3f] ^ + des_SPtrans[1][(tt>>2)&0x3f] ^ + des_SPtrans[3][(tt>>10)&0x3f] ^ + des_SPtrans[5][(tt>>18)&0x3f] ^ + des_SPtrans[7][(tt>>26)&0x3f]; +} + +} + +__global__ void RTGenLMKernel(unsigned int chainStart, unsigned int chainStop) { + uint3 dimItem; + unsigned int *hData2, *hData3; + uint64 uiDiv64, uiVal64, uiMul64; + unsigned int uiVal, uiDiv; + unsigned int jj, rs, rt; + + __shared__ unsigned int shData2[SHIDX(BLOCK_X_SIZE)]; + __shared__ unsigned int shData3[BLOCK_X_SIZE<<2]; + hData2 = shData2 + ((threadIdx.x>>4)<<8)+(threadIdx.x&15); + hData3 = shData3 + ((threadIdx.x>>4)<<6)+(threadIdx.x&15); + + RTGEN_PROLOGUE; + + // transform to the plain text + for(ii = 0; ii < 8; ii++) + hData[SHIDX(ii)] = 0; + + for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) { + uiVal64 = idx64 + cplStart[ii]; + uiVal64--; + dimItem = cplDimVec[ii]; + + uiMul64 = (uint64)dimItem.y<<32; + idx64 = __umul64hi(uiVal64, uiMul64); + uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x; + uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y); + uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x; + idx64 += uiVal; + if(uiDiv >= dimItem.x) { + uiDiv -= dimItem.x; + idx64++; + } + hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv]; + } + + for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) { + uiVal = idx + cplStart[ii]; + uiVal--; + dimItem = cplDimVec[ii]; + + idx = __umulhi(uiVal, dimItem.y); + uiDiv = uiVal - idx*dimItem.x; + if(uiDiv >= dimItem.x) { + uiDiv -= dimItem.x; + idx++; + } + hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv]; + } + + for(jj = 8, ii--; jj < 15; jj++, ii--) + hData[SHIDX(jj)] = hData[SHIDX(ii&7)]; + + // set key + ii = 255; + uiVal = ((hData[SHIDX(10)] << 5) | (hData[SHIDX(11)] >> 3))ⅈ + uiVal = (uiVal<<8) | (((hData[SHIDX(9)] << 6) | (hData[SHIDX(10)] >> 2))&ii); + uiVal = (uiVal<<8) | (((hData[SHIDX(8)] << 7) | (hData[SHIDX(9)] >> 1))&ii); + uiVal = (uiVal<<8) | hData[SHIDX(8)]; + + uiDiv = (hData[SHIDX(14)] << 1)ⅈ + uiDiv = (uiDiv<<8) | (((hData[SHIDX(13)] << 2) | (hData[SHIDX(14)] >> 6))&ii); + uiDiv = (uiDiv<<8) | (((hData[SHIDX(12)] << 3) | (hData[SHIDX(13)] >> 5))&ii); + uiDiv = (uiDiv<<8) | (((hData[SHIDX(11)] << 4) | (hData[SHIDX(12)] >> 4))&ii); + + hData[SHIDX(0)] = uiVal; + hData[SHIDX(1)] = uiDiv; + RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 4, 0x0f0f0f0f, hData); + RC_LM::HPERM_OP(SHIDX(0), SHIDX(2), -2, 0xcccc0000, hData); + RC_LM::HPERM_OP(SHIDX(1), SHIDX(2), -2, 0xcccc0000, hData); + RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData); + RC_LM::PERM_OP(SHIDX(0), SHIDX(1), SHIDX(2), 8, 0x00ff00ff, hData); + RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData); + uiVal = hData[SHIDX(0)]; + uiDiv = hData[SHIDX(1)]; + uiDiv = ((uiDiv&0x000000ff)<<16) | (uiDiv&0x0000ff00) | ((uiDiv&0x00ff0000)>>16) | ((uiVal&0xf0000000)>>4); + uiVal &= 0x0fffffff; + + for(ii = 0; ii < ITERATIONS; ii++) { + if(RC_LM::shifts2[ii]) { + uiVal = ((uiVal>>2)|(uiVal<<26)); + uiDiv =((uiDiv>>2)|(uiDiv<<26)); + } else { + uiVal = ((uiVal>>1)|(uiVal<<27)); + uiDiv = ((uiDiv>>1)|(uiDiv<<27)); + } + uiVal &= 0x0fffffff; + uiDiv &= 0x0fffffff; + + rs = RC_LM::des_skb[0][uiVal&0x3f] | + RC_LM::des_skb[1][((uiVal>>6)&0x03)|((uiVal>>7)&0x3c)] | + RC_LM::des_skb[2][((uiVal>>13)&0x0f)|((uiVal>>14)&0x30)] | + RC_LM::des_skb[3][((uiVal>>20)&0x01)|((uiVal>>21)&0x06) | + ((uiVal>>22)&0x38)]; + rt = RC_LM::des_skb[4][uiDiv&0x3f] | + RC_LM::des_skb[5][((uiDiv>>7)&0x03)|((uiDiv>>8)&0x3c)] | + RC_LM::des_skb[6][(uiDiv>>15)&0x3f] | + RC_LM::des_skb[7][((uiDiv>>21)&0x0f)|((uiDiv>>22)&0x30)]; + + /* table contained 0213 4657 */ + idx = (rt<<16)|(rs&0x0000ffff); + hData[SHIDX(ii)] = (idx>>30)|(idx<<2); + idx = (rs>>16)|(rt&0xffff0000); + hData2[SHIDX(ii)] = (idx>>26)|(idx<<6); + } + + // encrypt the "magic" data + hData3[SHIDX(0)] = 0x2153474B; + hData3[SHIDX(1)] = 0x25242340; + + RC_LM::IP(SHIDX(0), SHIDX(1), SHIDX(2), hData3); + uiVal = hData3[SHIDX(0)]; + uiVal = ((uiVal>>29)|(uiVal<<3)); + uiDiv = hData3[SHIDX(1)]; + uiDiv = ((uiDiv>>29)|(uiDiv<<3)); + + for(ii = 0; ii < 16; ii+=2) { + uiDiv = RC_LM::D_ENCRYPT(uiDiv, uiVal^hData[SHIDX(ii)], uiVal^hData2[SHIDX(ii)]); + uiVal = RC_LM::D_ENCRYPT(uiVal, uiDiv^hData[SHIDX(ii+1)], uiDiv^hData2[SHIDX(ii+1)]); + } + + hData3[SHIDX(0)] = ((uiVal>>3)|(uiVal<<29)); + hData3[SHIDX(1)] = ((uiDiv>>3)|(uiDiv<<29)); + RC_LM::FP(SHIDX(0), SHIDX(1), SHIDX(2), hData3); + + hData[SHIDX(0)] = hData3[SHIDX(1)]; + hData[SHIDX(1)] = hData3[SHIDX(0)]; + RTGEN_EPILOGUE +}