$(CXX) $(CXXFLAGS) rcuda_ext.cpp
rcuda.o: rcuda.h rcuda.cu $(COMMON_API_PATH)/Public.h $(COMMON_API_PATH)/global.h
- /usr/local/cuda/bin/nvcc $(DEBUG) -I../../../Common/rt\ api --compile rcuda.cu
+ /usr/local/cuda/bin/nvcc $(OPTIMIZATION) $(DEBUG) -I../../../Common/rt\ api --compile rcuda.cu
# /usr/local/cuda/bin/nvcc -G $(DEBUG) --machine 32 -I../../../Common/rt\ api --compile rcuda.cu
bool early_sleep = false;
double cpu_time = 20, comp_result;
*/
+/*
int QuickSortPartition(RainbowChainCP* pChain, int nLow, int nHigh)
{
int nRandomIndex = nLow + ((uint32)rand() * ((uint32)RAND_MAX + 1) + (uint32)rand()) % (nHigh - nLow + 1);
QuickSort(pChain, nPivotLoc + 1, nHigh);
}
}
-
+*/
int main(int argc, char **argv) {
int retval;
double fd;
fprintf(stderr, "boinc_init returned %d\n", retval);
exit(retval);
}
-
-
- // get size of input file (used to compute fraction done)
- //
- //file_size(input_path, fsize);
-
- // See if there's a valid checkpoint file.
- // If so seek input file and truncate output file
- //
+ // extract a --device option
+ std::vector<char*> argVec;
+ int cudaDevice = -1;
+ for(int ii = 0; ii < argc; ii++) {
+ if(cudaDevice < 0 && strcmp(argv[ii], "--device") == 0 && ii + 1 < argc)
+ cudaDevice = atoi(argv[++ii]);
+ else
+ argVec.push_back(argv[ii]);
+ }
+ argc = (int)argVec.size();
+ argv = &argVec[0];
+ if(!(cudaDevice < 0))
+ // set the cuda device
+ if(rcuda::SetCudaDevice(cudaDevice) != 0)
+ {
+ //XXX this call doesn't work on linux
+ // fixed in upstream source 2010-09-16
+ // http://bolt.berkeley.edu/trac/changeset/22382
+ #ifdef _WIN32
+ boinc_temporary_exit(60);
+ #endif
+ }
if(argc < 10)
{
// Open file
-// fclose(fopen(sFilename.c_str(), "a"));
-// FILE* file = fopen(sFilename.c_str(), "r+b");
- boinc_resolve_filename("result", output_path, sizeof(output_path));
+ boinc_resolve_filename("result", output_path, sizeof(output_path));
fclose(boinc_fopen(output_path, "a"));
FILE *outfile = boinc_fopen(output_path, "r+b");
unsigned int nFileLen;
// Round to boundary
- nDataLen = nDataLen / 18 * 18;
- if (nDataLen == nRainbowChainCount * 18)
+ nDataLen = nDataLen / 10 * 10;
+ if (nDataLen == nRainbowChainCount * 10)
{
std::cerr << "precomputation of this rainbow table already finished" << std::endl;
fclose(outfile);
}
fseek(outfile, nDataLen, SEEK_SET);
+ //XXX size_t isn't 32/64 clean
size_t nReturn;
CChainWalkContext cwc;
uint64 nIndex[2];
+ time_t tStart = time(NULL);
// std::cout << "Starting to generate chains" << std::endl;
- int maxCalcBuffSize = rcuda::GetChainsBufferSize(5000);
+ int maxCalcBuffSize = rcuda::GetChainsBufferSize(0x2000);
uint64 *calcBuff = new uint64[2*maxCalcBuffSize];
int ii;
rcuda::RCudaTask cuTask;
std::vector<unsigned char> stPlain;
ex.Init();
-time_t tStart, tStartFinal, tEndFinal;
-time_t tEnd;
- tStartFinal = time(NULL);
- for(uint32 nCurrentCalculatedChains = nDataLen / 18, calcSize; nCurrentCalculatedChains < nRainbowChainCount; )
+
+ for(int nCurrentCalculatedChains = nDataLen / 10, calcSize; nCurrentCalculatedChains < nRainbowChainCount; )
{
fd = (double)nCurrentCalculatedChains / (double)nRainbowChainCount;
boinc_fraction_done(fd);
calcBuff[2*ii] = cuTask.startIdx + ii;
calcBuff[2*ii+1] = 0;
}
-
- tStart = time(NULL);
-
calcSize = rcuda::CalcChainsOnCUDA(&cuTask, calcBuff);
- tEnd = time(NULL);
- std::cerr << "CUDA time taken: " << tEnd - tStart << std::endl;
- tStart = time(NULL);
+
if(calcSize > 0) {
nCurrentCalculatedChains += calcSize;
for(ii = 0; ii < cuTask.idxCount; ii++) {
nIndex[0] = cuTask.startIdx + ii;
- nReturn = fwrite(nIndex, 1, 8, outfile);
- nReturn += fwrite(calcBuff+(2*ii), 1, 8, outfile);
+// nReturn = fwrite(nIndex, 1, 8, outfile);
+ nReturn = fwrite(calcBuff+(2*ii), 1, 8, outfile);
nReturn += fwrite(calcBuff+(2*ii+1), 1, 2, outfile);
- if(nReturn != 18) {
+ if(nReturn != 10) {
std::cerr << "disk write fail" << std::endl;
fclose(outfile);
return 9;
return 0;
}
}
- tEndFinal = time(NULL);
- std::cerr << "Time taken: " << tEndFinal - tStartFinal << " secs" << std::endl;
delete [] calcBuff;
#ifdef _DEBUG
std::cout << "Generation completed" << std::endl;
#endif
+/*
fseek(outfile, 0, SEEK_SET);
nFileLen = GetFileLen(outfile);
nRainbowChainCount = nFileLen / 18;
}
delete[] pChain;
}
-
+*/
fclose(outfile);
// main loop - read characters, convert to UC, write
return main(argc, argv);
}
#endif
-
-const char *BOINC_RCSID_33ac47a071 = "$Id: upper_case.C 12135 2007-02-21 20:04:14Z davea $";
-
+//============================================================================
+// Name : rcuda.cu
+// Author : Jan Kyska
+// Version : 1.00
+// Description : Generator of FreeRainbowTables / MD5, MD4, NTLM, SHA1, LM
+//============================================================================
+
#include <stdio.h>
#include <cuda.h>
#include "rcuda.h"
#define GRID_X_L2 6
#define GRID_Y_L2 6
-#define BLOCK_X_L2 7
+#define BLOCK_X_L2 6
#define GRID_X_SIZE (1<<GRID_X_L2)
#define GRID_Y_SIZE (1<<GRID_Y_L2)
#define BLOCK_X_SIZE (1<<BLOCK_X_L2)
+#define PLAIN_MAX_SIZE 20
#define KERN_CHAIN_SIZE 100
+#define CHAR_SET_MAXLEN 512
#define SHIDX(x) ((x)<<4)
-__device__ __constant__ unsigned int h[4] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };
-__device__ __constant__ unsigned char r[64] = { \
- 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, \
- 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, \
- 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, \
- 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 };
-__device__ __constant__ unsigned char g[64] = { \
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
- 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, \
- 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, \
- 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 };
-__device__ __constant__ unsigned int ac[64] = { \
- 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, \
- 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, \
- 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, \
- 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, \
- \
- 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, \
- 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, \
- 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, \
- 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, \
- \
- 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, \
- 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, \
- 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, \
- 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, \
- \
- 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, \
- 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, \
- 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, \
- 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
-
-__device__ __constant__ unsigned int testData[16] = { \
- 0x79706d63, 0x6d627667, 0x00000080, 0x00000000, \
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0x00000000, 0x00000000, 0x00000040, 0x00000000 };
-
-__device__ unsigned int FF(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {
- unsigned int ret;
- ret = a + ((b&c)|((~b)&d)) + ac[i] + data[SHIDX(g[i])];
- ret = (ret<<r[i])|(ret>>(32-r[i]));
- ret += b;
- return ret;
-}
-
-__device__ unsigned int GG(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {
- unsigned int ret;
- ret = a + ((b&d)|(c&(~d))) + ac[i] + data[SHIDX(g[i])];
- ret = (ret<<r[i])|(ret>>(32-r[i]));
- ret += b;
- return ret;
-}
-
-__device__ unsigned int HH(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {
- unsigned int ret;
- ret = a + (b^c^d) + ac[i] + data[SHIDX(g[i])];
- ret = (ret<<r[i])|(ret>>(32-r[i]));
- ret += b;
- return ret;
-}
-
-__device__ unsigned int II(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {
- unsigned int ret;
- ret = a + (c^(b|(~d))) + ac[i] + data[SHIDX(g[i])];
- ret = (ret<<r[i])|(ret>>(32-r[i]));
- ret += b;
- return ret;
-}
-
-__device__ void MD5(unsigned int* dataHash) {
- unsigned int a = h[0], b = h[1], c = h[2], d = h[3], x;
- int ii;
-
- // Round 1
- for(ii = 0; ii < 16; ii++) {
- x = b;
- b = FF(a, b, c, d, ii, dataHash);
- a = d; d = c; c = x;
- }
-
- // Round 2
- for(; ii < 32; ii++) {
- x = b;
- b = GG(a, b, c, d, ii, dataHash);
- a = d; d = c; c = x;
- }
-
- // Round 3
- for(; ii < 48; ii++) {
- x = b;
- b = HH(a, b, c, d, ii, dataHash);
- a = d; d = c; c = x;
- }
-
- // Round 4
- for(; ii < 64; ii++) {
- x = b;
- b = II(a, b, c, d, ii, dataHash);
- a = d; d = c; c = x;
- }
-
- dataHash[SHIDX(0)] = a + h[0];
- dataHash[SHIDX(1)] = b + h[1];
- dataHash[SHIDX(2)] = c + h[2];
- dataHash[SHIDX(3)] = d + h[3];
-}
-
-
-#define PLAIN_MAX_SIZE 20
-
__device__ uint64 *dataHeap;
__device__ unsigned char *plStart;
__device__ uint3 *plDimVec;
__device__ uint64 plainSpaceTotal;
__device__ uint64 rPlainSpaceTotal;
-
-__global__ void RTGenMD5Kernel(unsigned int chainStart, unsigned int chainStop) {
- unsigned int *hData;
- uint3 dimItem;
- unsigned int uiVal, uiMul, uiDiv, idx;
- uint64 uiVal64, uiMul64, uiDiv64, idx64;
- unsigned int nPos, size, ii, jj, kk;
- unsigned int cpcheck, checkpoint;
- unsigned int plain;
-
- __shared__ unsigned int shData[SHIDX(BLOCK_X_SIZE)];
-
- if(threadIdx.x == 0) {
- nPos = ((((blockIdx.y<<GRID_X_L2) + blockIdx.x)<<BLOCK_X_L2) + threadIdx.x) << 1;
- for(ii = 0; ii < BLOCK_X_SIZE; ii++, nPos+=2) {
- hData = shData + ((ii>>4)<<8)+(ii&15);
- hData[SHIDX(0)] = dataHeap[nPos];
- hData[SHIDX(1)] = dataHeap[nPos]>>32;
- hData[SHIDX(2)] = dataHeap[nPos+1];
- }
- }
- __syncthreads();
-
- hData = shData + ((threadIdx.x>>4)<<8)+(threadIdx.x&15);
-
- idx64 = hData[SHIDX(1)];
- idx64 = (idx64<<32) | hData[SHIDX(0)];
- cpcheck = hData[SHIDX(2)];
- checkpoint = cpcheck&0x0000ffff;
- cpcheck = cpcheck>>16;
-
+#define RTGEN_PROLOGUE \
+ unsigned int *hData; \
+ unsigned int idx; \
+ uint64 idx64; \
+ unsigned int nPos, ii; \
+ unsigned int cpcheck, checkpoint; \
+ \
+ __shared__ unsigned int shData[SHIDX(BLOCK_X_SIZE)]; \
+ __shared__ unsigned char cplChrSet[CHAR_SET_MAXLEN]; \
+ __shared__ unsigned char cplStart[PLAIN_MAX_SIZE]; \
+ __shared__ uint3 cplDimVec[PLAIN_MAX_SIZE]; \
+ \
+ if(threadIdx.x == 0) { \
+ nPos = ((((blockIdx.y<<GRID_X_L2) + blockIdx.x)<<BLOCK_X_L2) + threadIdx.x) << 1; \
+ for(ii = 0; ii < BLOCK_X_SIZE; ii++, nPos+=2) { \
+ hData = shData + ((ii>>4)<<8)+(ii&15); \
+ hData[SHIDX(0)] = dataHeap[nPos]; \
+ hData[SHIDX(1)] = dataHeap[nPos]>>32; \
+ hData[SHIDX(2)] = dataHeap[nPos+1]; \
+ } \
+ memcpy(cplChrSet, plChrSet, CHAR_SET_MAXLEN); \
+ memcpy(cplStart, plStart, PLAIN_MAX_SIZE); \
+ memcpy(cplDimVec, plDimVec, PLAIN_MAX_SIZE*sizeof(uint3)); \
+ } \
+ __syncthreads(); \
+ \
+ hData = shData + ((threadIdx.x>>4)<<8)+(threadIdx.x&15); \
+ \
+ idx64 = hData[SHIDX(1)]; \
+ idx64 = (idx64<<32) | hData[SHIDX(0)]; \
+ cpcheck = hData[SHIDX(2)]; \
+ checkpoint = cpcheck&0x0000ffff; \
+ cpcheck = cpcheck>>16; \
+ \
for(nPos = chainStart; nPos < chainStop; nPos++) {
- // transform to the plain text
- plain = 0x80;
- jj = (PLAIN_MAX_SIZE>>2)+1;
- for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {
- uiVal64 = idx64 + plStart[ii];
- uiVal64--;
- dimItem = plDimVec[ii];
- uiMul64 = (uint64)dimItem.y<<32;
- idx64 = __umul64hi(uiVal64, uiMul64);
- uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;
- uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);
- uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;
- idx64 += uiVal;
- if(uiDiv >= dimItem.x) {
- uiDiv -= dimItem.x;
- idx64++;
- }
- plain = (plain<<8) | plChrSet[dimItem.z + uiDiv];
- if((ii&3) == 2) {
- hData[SHIDX(jj--)] = plain;
- plain = 0;
- }
- }
- for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {
- uiVal = idx + plStart[ii];
- uiVal--;
- dimItem = plDimVec[ii];
- idx = __umulhi(uiVal, dimItem.y);
- uiDiv = uiVal - idx*dimItem.x;
- if(uiDiv >= dimItem.x) {
- uiDiv -= dimItem.x;
- idx++;
- }
- plain = (plain<<8) | plChrSet[dimItem.z + uiDiv];
- if((ii&3) == 2) {
- hData[SHIDX(jj--)] = plain;
- plain = 0;
- }
- }
-
- // prepare for MD5
- size = ii;
- ii = ((((3-(ii&3))<<3)-1)&0x1f)+1;
- plain = plain<<ii;
- for(jj++, kk = 0; jj <= (PLAIN_MAX_SIZE>>2)+1; plain = hData[SHIDX(jj++)], kk++)
- hData[SHIDX(kk)] = (plain>>ii)|(hData[SHIDX(jj)]<<(32-ii));
- hData[SHIDX(kk)] = plain>>ii;
- for(kk++; kk < 14; kk++)
- hData[SHIDX(kk)] = 0;
- hData[SHIDX(kk++)] = size<<3;
- hData[SHIDX(kk)] = 0;
- // hash
- MD5(hData);
- idx64 = hData[SHIDX(1)];
- idx64 = (idx64<<32) | hData[SHIDX(0)];
- idx64 += reduceOffset + nPos;
- uiDiv64 = __umul64hi(idx64, rPlainSpaceTotal);
- idx64 -= uiDiv64*plainSpaceTotal;
- if(idx64 >= plainSpaceTotal)
- idx64 -= plainSpaceTotal;
-
- if(cpcheck < plCpPosSize && nPos == plCpPos[cpcheck]) {
- checkpoint |= ((unsigned int)idx64&1) << cpcheck;
- cpcheck++;
- }
+#define RTGEN_EPILOGUE \
+ idx64 = hData[SHIDX(1)]; \
+ idx64 = (idx64<<32) | hData[SHIDX(0)]; \
+ idx64 += reduceOffset + nPos; \
+ uiDiv64 = __umul64hi(idx64, rPlainSpaceTotal); \
+ idx64 -= uiDiv64*plainSpaceTotal; \
+ if(idx64 >= plainSpaceTotal) \
+ idx64 -= plainSpaceTotal; \
+ \
+ if(cpcheck < plCpPosSize && nPos == plCpPos[cpcheck]) { \
+ checkpoint |= ((unsigned int)idx64&1) << cpcheck; \
+ cpcheck++; \
+ } \
+ } \
+ \
+ hData[SHIDX(0)] = idx64; \
+ hData[SHIDX(1)] = idx64>>32; \
+ hData[SHIDX(2)] = (cpcheck<<16)|(checkpoint&0x0000ffff); \
+ __syncthreads(); \
+ \
+ if(threadIdx.x == 0) { \
+ nPos = ((((blockIdx.y<<GRID_X_L2) + blockIdx.x)<<BLOCK_X_L2) + threadIdx.x) << 1; \
+ for(ii = 0; ii < BLOCK_X_SIZE; ii++, nPos+=2) { \
+ hData = shData + ((ii>>4)<<8)+(ii&15); \
+ dataHeap[nPos] = ((uint64)hData[SHIDX(1)]<<32)|(uint64)hData[SHIDX(0)]; \
+ dataHeap[nPos+1] = hData[SHIDX(2)]; \
+ } \
}
- hData[SHIDX(0)] = idx64;
- hData[SHIDX(1)] = idx64>>32;
- hData[SHIDX(2)] = (cpcheck<<16)|(checkpoint&0x0000ffff);
- __syncthreads();
-
- if(threadIdx.x == 0) {
- nPos = ((((blockIdx.y<<GRID_X_L2) + blockIdx.x)<<BLOCK_X_L2) + threadIdx.x) << 1;
- for(ii = 0; ii < BLOCK_X_SIZE; ii++, nPos+=2) {
- hData = shData + ((ii>>4)<<8)+(ii&15);
- dataHeap[nPos] = ((uint64)hData[SHIDX(1)]<<32)|(uint64)hData[SHIDX(0)];
- dataHeap[nPos+1] = hData[SHIDX(2)];
- }
- }
- __syncthreads();
-}
+#include "rcuda_md5.inc"
+#include "rcuda_md4.inc"
+#include "rcuda_ntlm.inc"
+#include "rcuda_sha1.inc"
+#include "rcuda_lm.inc"
extern "C" int CalcChainsOnCUDA(const rcuda::RCudaTask* task, uint64 *resultBuff) {
cudaError_t cuErr;
unsigned char *charSet;
int *cpPos;
uint64 uiVal64;
- time_t tStart, tEnd;
- if(task->hash != rcuda::RHASH_MD5)
- return 0;
+ if(task->charSetSize > CHAR_SET_MAXLEN)
+ return -1;
+
+ switch(task->hash) {
+ case rcuda::RHASH_MD5:
+ case rcuda::RHASH_MD4:
+ case rcuda::RHASH_NTLM:
+ case rcuda::RHASH_SHA1:
+ case rcuda::RHASH_LM:
+ break;
+ default:
+ return 0;
+ }
+
memset(buff, 0, PLAIN_MAX_SIZE);
cudaMalloc((void**)&data, task->idxCount*2*sizeof(uint64));
cudaMalloc((void**)&stPlain, PLAIN_MAX_SIZE);
- cudaMalloc((void**)&dimVec, task->dimVecSize*sizeof(uint3));
- cudaMalloc((void**)&charSet, task->charSetSize);
+ cudaMalloc((void**)&dimVec, max(task->dimVecSize, PLAIN_MAX_SIZE)*sizeof(uint3));
+ cudaMalloc((void**)&charSet, CHAR_SET_MAXLEN);
cudaMalloc((void**)&cpPos, task->cpPosSize*sizeof(int));
cudaMemcpy(data, resultBuff, task->idxCount*2*sizeof(uint64), cudaMemcpyHostToDevice);
cudaMemcpy(stPlain, buff, PLAIN_MAX_SIZE, cudaMemcpyHostToDevice);
cudaMemcpy(stPlain, task->stPlain, min(task->stPlainSize, PLAIN_MAX_SIZE), cudaMemcpyHostToDevice);
- cudaMemcpy(dimVec, task->dimVec, task->dimVecSize*sizeof(uint3), cudaMemcpyHostToDevice);
- cudaMemcpy(charSet, task->charSet, task->charSetSize, cudaMemcpyHostToDevice);
+ cudaMemcpy(dimVec, task->dimVec, min(task->dimVecSize, PLAIN_MAX_SIZE)*sizeof(uint3), cudaMemcpyHostToDevice);
+ cudaMemcpy(charSet, task->charSet, min(task->charSetSize, CHAR_SET_MAXLEN), cudaMemcpyHostToDevice);
cudaMemcpy(cpPos, task->cpPositions, task->cpPosSize*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(dataHeap, &data, sizeof(data));
grSizeX = GRID_X_SIZE;
dim3 numBlocks(grSizeX, grSizeY);
cuErr = cudaSuccess;
- tStart = time(NULL);
+
+ cudaSetDeviceFlags(cudaDeviceBlockingSync);
for(int idx = 0; idx < task->rainbowChainLen-1 && cuErr == cudaSuccess; idx+=KERN_CHAIN_SIZE) {
- RTGenMD5Kernel<<<numBlocks, BLOCK_X_SIZE>>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1));
+ switch(task->hash) {
+ case rcuda::RHASH_MD5:
+ RTGenMD5Kernel<<<numBlocks, BLOCK_X_SIZE>>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1));
+ break;
+ case rcuda::RHASH_MD4:
+ RTGenMD4Kernel<<<numBlocks, BLOCK_X_SIZE>>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1));
+ break;
+ case rcuda::RHASH_NTLM:
+ RTGenNTLMKernel<<<numBlocks, BLOCK_X_SIZE>>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1));
+ break;
+ case rcuda::RHASH_SHA1:
+ RTGenSHA1Kernel<<<numBlocks, BLOCK_X_SIZE>>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1));
+ break;
+ case rcuda::RHASH_LM:
+ RTGenLMKernel<<<numBlocks, BLOCK_X_SIZE>>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1));
+ break;
+ }
cuErr = cudaGetLastError();
if(cuErr == cudaSuccess)
cuErr = cudaThreadSynchronize();
-
}
- tEnd = time(NULL);
- fprintf(stderr, "Kernel run time: %i\n", (tEnd - tStart));
if(cuErr == cudaSuccess)
cudaMemcpy(resultBuff, data, task->idxCount*2*sizeof(uint64), cudaMemcpyDeviceToHost);
grSizeX = GRID_X_SIZE;
return grSizeX*grSizeY*BLOCK_X_SIZE;
}
+
+extern "C" int SetCudaDevice(int device) {
+ return cudaSetDevice(device);
+}
unsigned int rainbowChainLen;
};
-extern "C" int CalcChainsOnCUDA(const RCudaTask* task, uint64 *resultBuff);
+extern "C" int SetCudaDevice(int device);
extern "C" int GetChainsBufferSize(int minSize);
+extern "C" int CalcChainsOnCUDA(const RCudaTask* task, uint64 *resultBuff);
}
//============================================================================
// Name : rcuda_ext.cpp
// Author : Jan Kyska
-// Version : 0.9
+// Version : 1.00
// Description : A code to access internals of the CChainWalkContext
// for the CUDA generator of FreeRainbowTables
//============================================================================
stCharset &chs = CChainWalkContext::m_vCharset[jj];
nCharsetLen += chs.m_nPlainLenMax;
if(ii < nCharsetLen) { // We found the correct charset
- stPlain.push_back(nIndexOfX % chs.m_nPlainCharsetLen + 1);
+ //XXX from md5 only cuda stPlain.push_back(nIndexOfX % chs.m_nPlainCharsetLen + 1);
+ stPlain.push_back((unsigned char)(nIndexOfX % (uint64)chs.m_nPlainCharsetLen + 1));
nIndexOfX /= chs.m_nPlainCharsetLen;
}
}
--- /dev/null
+//============================================================================\r
+// Name : rcuda_lm.inc\r
+// Author : Jan Kyska\r
+// Version : 1.00\r
+// Description : LM hash kernel for Generator of FreeRainbowTables\r
+//============================================================================ \r
+\r
+namespace RC_LM {\r
+\r
+#define ITERATIONS 16\r
+\r
+\r
+__device__ __constant__ int shifts2[16] = { 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 };\r
+\r
+__device__ __constant__ unsigned int des_skb[8][64] = {\r
+{\r
+/* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */\r
+0x00000000L,0x00000010L,0x20000000L,0x20000010L,\r
+0x00010000L,0x00010010L,0x20010000L,0x20010010L,\r
+0x00000800L,0x00000810L,0x20000800L,0x20000810L,\r
+0x00010800L,0x00010810L,0x20010800L,0x20010810L,\r
+0x00000020L,0x00000030L,0x20000020L,0x20000030L,\r
+0x00010020L,0x00010030L,0x20010020L,0x20010030L,\r
+0x00000820L,0x00000830L,0x20000820L,0x20000830L,\r
+0x00010820L,0x00010830L,0x20010820L,0x20010830L,\r
+0x00080000L,0x00080010L,0x20080000L,0x20080010L,\r
+0x00090000L,0x00090010L,0x20090000L,0x20090010L,\r
+0x00080800L,0x00080810L,0x20080800L,0x20080810L,\r
+0x00090800L,0x00090810L,0x20090800L,0x20090810L,\r
+0x00080020L,0x00080030L,0x20080020L,0x20080030L,\r
+0x00090020L,0x00090030L,0x20090020L,0x20090030L,\r
+0x00080820L,0x00080830L,0x20080820L,0x20080830L,\r
+0x00090820L,0x00090830L,0x20090820L,0x20090830L,\r
+},{\r
+/* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */\r
+0x00000000L,0x02000000L,0x00002000L,0x02002000L,\r
+0x00200000L,0x02200000L,0x00202000L,0x02202000L,\r
+0x00000004L,0x02000004L,0x00002004L,0x02002004L,\r
+0x00200004L,0x02200004L,0x00202004L,0x02202004L,\r
+0x00000400L,0x02000400L,0x00002400L,0x02002400L,\r
+0x00200400L,0x02200400L,0x00202400L,0x02202400L,\r
+0x00000404L,0x02000404L,0x00002404L,0x02002404L,\r
+0x00200404L,0x02200404L,0x00202404L,0x02202404L,\r
+0x10000000L,0x12000000L,0x10002000L,0x12002000L,\r
+0x10200000L,0x12200000L,0x10202000L,0x12202000L,\r
+0x10000004L,0x12000004L,0x10002004L,0x12002004L,\r
+0x10200004L,0x12200004L,0x10202004L,0x12202004L,\r
+0x10000400L,0x12000400L,0x10002400L,0x12002400L,\r
+0x10200400L,0x12200400L,0x10202400L,0x12202400L,\r
+0x10000404L,0x12000404L,0x10002404L,0x12002404L,\r
+0x10200404L,0x12200404L,0x10202404L,0x12202404L,\r
+},{\r
+/* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */\r
+0x00000000L,0x00000001L,0x00040000L,0x00040001L,\r
+0x01000000L,0x01000001L,0x01040000L,0x01040001L,\r
+0x00000002L,0x00000003L,0x00040002L,0x00040003L,\r
+0x01000002L,0x01000003L,0x01040002L,0x01040003L,\r
+0x00000200L,0x00000201L,0x00040200L,0x00040201L,\r
+0x01000200L,0x01000201L,0x01040200L,0x01040201L,\r
+0x00000202L,0x00000203L,0x00040202L,0x00040203L,\r
+0x01000202L,0x01000203L,0x01040202L,0x01040203L,\r
+0x08000000L,0x08000001L,0x08040000L,0x08040001L,\r
+0x09000000L,0x09000001L,0x09040000L,0x09040001L,\r
+0x08000002L,0x08000003L,0x08040002L,0x08040003L,\r
+0x09000002L,0x09000003L,0x09040002L,0x09040003L,\r
+0x08000200L,0x08000201L,0x08040200L,0x08040201L,\r
+0x09000200L,0x09000201L,0x09040200L,0x09040201L,\r
+0x08000202L,0x08000203L,0x08040202L,0x08040203L,\r
+0x09000202L,0x09000203L,0x09040202L,0x09040203L,\r
+},{\r
+/* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */\r
+0x00000000L,0x00100000L,0x00000100L,0x00100100L,\r
+0x00000008L,0x00100008L,0x00000108L,0x00100108L,\r
+0x00001000L,0x00101000L,0x00001100L,0x00101100L,\r
+0x00001008L,0x00101008L,0x00001108L,0x00101108L,\r
+0x04000000L,0x04100000L,0x04000100L,0x04100100L,\r
+0x04000008L,0x04100008L,0x04000108L,0x04100108L,\r
+0x04001000L,0x04101000L,0x04001100L,0x04101100L,\r
+0x04001008L,0x04101008L,0x04001108L,0x04101108L,\r
+0x00020000L,0x00120000L,0x00020100L,0x00120100L,\r
+0x00020008L,0x00120008L,0x00020108L,0x00120108L,\r
+0x00021000L,0x00121000L,0x00021100L,0x00121100L,\r
+0x00021008L,0x00121008L,0x00021108L,0x00121108L,\r
+0x04020000L,0x04120000L,0x04020100L,0x04120100L,\r
+0x04020008L,0x04120008L,0x04020108L,0x04120108L,\r
+0x04021000L,0x04121000L,0x04021100L,0x04121100L,\r
+0x04021008L,0x04121008L,0x04021108L,0x04121108L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */\r
+0x00000000L,0x10000000L,0x00010000L,0x10010000L,\r
+0x00000004L,0x10000004L,0x00010004L,0x10010004L,\r
+0x20000000L,0x30000000L,0x20010000L,0x30010000L,\r
+0x20000004L,0x30000004L,0x20010004L,0x30010004L,\r
+0x00100000L,0x10100000L,0x00110000L,0x10110000L,\r
+0x00100004L,0x10100004L,0x00110004L,0x10110004L,\r
+0x20100000L,0x30100000L,0x20110000L,0x30110000L,\r
+0x20100004L,0x30100004L,0x20110004L,0x30110004L,\r
+0x00001000L,0x10001000L,0x00011000L,0x10011000L,\r
+0x00001004L,0x10001004L,0x00011004L,0x10011004L,\r
+0x20001000L,0x30001000L,0x20011000L,0x30011000L,\r
+0x20001004L,0x30001004L,0x20011004L,0x30011004L,\r
+0x00101000L,0x10101000L,0x00111000L,0x10111000L,\r
+0x00101004L,0x10101004L,0x00111004L,0x10111004L,\r
+0x20101000L,0x30101000L,0x20111000L,0x30111000L,\r
+0x20101004L,0x30101004L,0x20111004L,0x30111004L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */\r
+0x00000000L,0x08000000L,0x00000008L,0x08000008L,\r
+0x00000400L,0x08000400L,0x00000408L,0x08000408L,\r
+0x00020000L,0x08020000L,0x00020008L,0x08020008L,\r
+0x00020400L,0x08020400L,0x00020408L,0x08020408L,\r
+0x00000001L,0x08000001L,0x00000009L,0x08000009L,\r
+0x00000401L,0x08000401L,0x00000409L,0x08000409L,\r
+0x00020001L,0x08020001L,0x00020009L,0x08020009L,\r
+0x00020401L,0x08020401L,0x00020409L,0x08020409L,\r
+0x02000000L,0x0A000000L,0x02000008L,0x0A000008L,\r
+0x02000400L,0x0A000400L,0x02000408L,0x0A000408L,\r
+0x02020000L,0x0A020000L,0x02020008L,0x0A020008L,\r
+0x02020400L,0x0A020400L,0x02020408L,0x0A020408L,\r
+0x02000001L,0x0A000001L,0x02000009L,0x0A000009L,\r
+0x02000401L,0x0A000401L,0x02000409L,0x0A000409L,\r
+0x02020001L,0x0A020001L,0x02020009L,0x0A020009L,\r
+0x02020401L,0x0A020401L,0x02020409L,0x0A020409L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */\r
+0x00000000L,0x00000100L,0x00080000L,0x00080100L,\r
+0x01000000L,0x01000100L,0x01080000L,0x01080100L,\r
+0x00000010L,0x00000110L,0x00080010L,0x00080110L,\r
+0x01000010L,0x01000110L,0x01080010L,0x01080110L,\r
+0x00200000L,0x00200100L,0x00280000L,0x00280100L,\r
+0x01200000L,0x01200100L,0x01280000L,0x01280100L,\r
+0x00200010L,0x00200110L,0x00280010L,0x00280110L,\r
+0x01200010L,0x01200110L,0x01280010L,0x01280110L,\r
+0x00000200L,0x00000300L,0x00080200L,0x00080300L,\r
+0x01000200L,0x01000300L,0x01080200L,0x01080300L,\r
+0x00000210L,0x00000310L,0x00080210L,0x00080310L,\r
+0x01000210L,0x01000310L,0x01080210L,0x01080310L,\r
+0x00200200L,0x00200300L,0x00280200L,0x00280300L,\r
+0x01200200L,0x01200300L,0x01280200L,0x01280300L,\r
+0x00200210L,0x00200310L,0x00280210L,0x00280310L,\r
+0x01200210L,0x01200310L,0x01280210L,0x01280310L,\r
+},{\r
+/* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */\r
+0x00000000L,0x04000000L,0x00040000L,0x04040000L,\r
+0x00000002L,0x04000002L,0x00040002L,0x04040002L,\r
+0x00002000L,0x04002000L,0x00042000L,0x04042000L,\r
+0x00002002L,0x04002002L,0x00042002L,0x04042002L,\r
+0x00000020L,0x04000020L,0x00040020L,0x04040020L,\r
+0x00000022L,0x04000022L,0x00040022L,0x04040022L,\r
+0x00002020L,0x04002020L,0x00042020L,0x04042020L,\r
+0x00002022L,0x04002022L,0x00042022L,0x04042022L,\r
+0x00000800L,0x04000800L,0x00040800L,0x04040800L,\r
+0x00000802L,0x04000802L,0x00040802L,0x04040802L,\r
+0x00002800L,0x04002800L,0x00042800L,0x04042800L,\r
+0x00002802L,0x04002802L,0x00042802L,0x04042802L,\r
+0x00000820L,0x04000820L,0x00040820L,0x04040820L,\r
+0x00000822L,0x04000822L,0x00040822L,0x04040822L,\r
+0x00002820L,0x04002820L,0x00042820L,0x04042820L,\r
+0x00002822L,0x04002822L,0x00042822L,0x04042822L,\r
+}};\r
+\r
+__device__ __constant__ unsigned int des_SPtrans[8][64] = {\r
+{\r
+/* nibble 0 */\r
+0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,\r
+0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,\r
+0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,\r
+0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,\r
+0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,\r
+0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,\r
+0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,\r
+0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,\r
+0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,\r
+0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,\r
+0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,\r
+0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,\r
+0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,\r
+0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,\r
+0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,\r
+0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L,\r
+},{\r
+/* nibble 1 */\r
+0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,\r
+0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,\r
+0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,\r
+0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,\r
+0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,\r
+0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,\r
+0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,\r
+0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,\r
+0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,\r
+0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,\r
+0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,\r
+0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,\r
+0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,\r
+0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,\r
+0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,\r
+0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L,\r
+},{\r
+/* nibble 2 */\r
+0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,\r
+0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,\r
+0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,\r
+0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,\r
+0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,\r
+0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,\r
+0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,\r
+0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,\r
+0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,\r
+0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,\r
+0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,\r
+0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,\r
+0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,\r
+0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,\r
+0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,\r
+0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L,\r
+},{\r
+/* nibble 3 */\r
+0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,\r
+0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,\r
+0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,\r
+0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,\r
+0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,\r
+0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,\r
+0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,\r
+0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,\r
+0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,\r
+0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,\r
+0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,\r
+0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,\r
+0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,\r
+0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,\r
+0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,\r
+0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L,\r
+},{\r
+/* nibble 4 */\r
+0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,\r
+0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,\r
+0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,\r
+0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,\r
+0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,\r
+0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,\r
+0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,\r
+0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,\r
+0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,\r
+0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,\r
+0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,\r
+0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,\r
+0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,\r
+0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,\r
+0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,\r
+0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L,\r
+},{\r
+/* nibble 5 */\r
+0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,\r
+0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,\r
+0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,\r
+0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,\r
+0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,\r
+0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,\r
+0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,\r
+0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,\r
+0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,\r
+0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,\r
+0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,\r
+0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,\r
+0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,\r
+0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,\r
+0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,\r
+0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,\r
+},{\r
+/* nibble 6 */\r
+0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,\r
+0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,\r
+0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,\r
+0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,\r
+0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,\r
+0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,\r
+0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,\r
+0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,\r
+0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,\r
+0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,\r
+0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,\r
+0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,\r
+0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,\r
+0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,\r
+0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,\r
+0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,\r
+},{\r
+/* nibble 7 */\r
+0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,\r
+0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,\r
+0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,\r
+0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,\r
+0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,\r
+0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,\r
+0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,\r
+0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,\r
+0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,\r
+0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,\r
+0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,\r
+0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,\r
+0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,\r
+0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,\r
+0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,\r
+0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,\r
+}};\r
+\r
+\r
+__device__ void PERM_OP(int ia, int ib, int it, unsigned int n, unsigned int m, unsigned int* data) {\r
+ data[it] =((data[ia] >> n ) ^ data[ib]) & m;\r
+ data[ib] ^= data[it];\r
+ data[ia] ^= data[it] << n;\r
+}\r
+\r
+__device__ void HPERM_OP(int ia, int it, int n, unsigned int m, unsigned int* data) {\r
+ data[it] = ((data[ia] << (16-n)) ^ data[ia]) & m;\r
+ data[ia] = data[ia] ^ data[it] ^ (data[it]>>(16-n));\r
+}\r
+\r
+__device__ void IP(int il, int ir, int it, unsigned int* data) {\r
+ PERM_OP(ir, il, it, 4, 0x0f0f0f0f, data);\r
+ PERM_OP(il, ir, it, 16, 0x0000ffff, data);\r
+ PERM_OP(ir, il, it, 2, 0x33333333, data);\r
+ PERM_OP(il, ir, it, 8, 0x00ff00ff, data);\r
+ PERM_OP(ir, il, it, 1, 0x55555555, data);\r
+}\r
+\r
+__device__ void FP(int il, int ir, int it, unsigned int* data) {\r
+ PERM_OP(il, ir, it, 1, 0x55555555, data);\r
+ PERM_OP(ir, il, it, 8, 0x00ff00ff, data);\r
+ PERM_OP(il, ir, it, 2, 0x33333333, data);\r
+ PERM_OP(ir, il, it, 16, 0x0000ffff, data);\r
+ PERM_OP(il, ir, it, 4, 0x0f0f0f0f, data);\r
+}\r
+\r
+__device__ unsigned int D_ENCRYPT(unsigned int ll, unsigned int uu, unsigned int tt) {\r
+ tt = (tt>>4)|(tt<<28);\r
+ return ll ^ des_SPtrans[0][(uu>>2)&0x3f] ^\r
+ des_SPtrans[2][(uu>>10)&0x3f] ^\r
+ des_SPtrans[4][(uu>>18)&0x3f] ^\r
+ des_SPtrans[6][(uu>>26)&0x3f] ^\r
+ des_SPtrans[1][(tt>>2)&0x3f] ^\r
+ des_SPtrans[3][(tt>>10)&0x3f] ^\r
+ des_SPtrans[5][(tt>>18)&0x3f] ^\r
+ des_SPtrans[7][(tt>>26)&0x3f];\r
+}\r
+\r
+}\r
+\r
+__global__ void RTGenLMKernel(unsigned int chainStart, unsigned int chainStop) {\r
+ uint3 dimItem;\r
+ unsigned int *hData2, *hData3;\r
+ uint64 uiDiv64, uiVal64, uiMul64;\r
+ unsigned int uiVal, uiDiv;\r
+ unsigned int jj, rs, rt;\r
+\r
+ __shared__ unsigned int shData2[SHIDX(BLOCK_X_SIZE)];\r
+ __shared__ unsigned int shData3[BLOCK_X_SIZE<<2];\r
+ hData2 = shData2 + ((threadIdx.x>>4)<<8)+(threadIdx.x&15);\r
+ hData3 = shData3 + ((threadIdx.x>>4)<<6)+(threadIdx.x&15);\r
+\r
+ RTGEN_PROLOGUE;\r
+\r
+ // transform to the plain text\r
+ for(ii = 0; ii < 8; ii++)\r
+ hData[SHIDX(ii)] = 0;\r
+ \r
+ for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal64 = idx64 + cplStart[ii];\r
+ uiVal64--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ uiMul64 = (uint64)dimItem.y<<32;\r
+ idx64 = __umul64hi(uiVal64, uiMul64);\r
+ uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
+ uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
+ uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
+ idx64 += uiVal;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx64++;\r
+ }\r
+ hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv];\r
+ }\r
+\r
+ for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal = idx + cplStart[ii];\r
+ uiVal--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ idx = __umulhi(uiVal, dimItem.y);\r
+ uiDiv = uiVal - idx*dimItem.x;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx++;\r
+ }\r
+ hData[SHIDX(ii&7)] = cplChrSet[dimItem.z + uiDiv];\r
+ }\r
+\r
+ for(jj = 8, ii--; jj < 15; jj++, ii--)\r
+ hData[SHIDX(jj)] = hData[SHIDX(ii&7)];\r
+\r
+ // set key\r
+ ii = 255;\r
+ uiVal = ((hData[SHIDX(10)] << 5) | (hData[SHIDX(11)] >> 3))ⅈ\r
+ uiVal = (uiVal<<8) | (((hData[SHIDX(9)] << 6) | (hData[SHIDX(10)] >> 2))&ii);\r
+ uiVal = (uiVal<<8) | (((hData[SHIDX(8)] << 7) | (hData[SHIDX(9)] >> 1))&ii);\r
+ uiVal = (uiVal<<8) | hData[SHIDX(8)];\r
+ \r
+ uiDiv = (hData[SHIDX(14)] << 1)ⅈ\r
+ uiDiv = (uiDiv<<8) | (((hData[SHIDX(13)] << 2) | (hData[SHIDX(14)] >> 6))&ii);\r
+ uiDiv = (uiDiv<<8) | (((hData[SHIDX(12)] << 3) | (hData[SHIDX(13)] >> 5))&ii);\r
+ uiDiv = (uiDiv<<8) | (((hData[SHIDX(11)] << 4) | (hData[SHIDX(12)] >> 4))&ii);\r
+\r
+ hData[SHIDX(0)] = uiVal;\r
+ hData[SHIDX(1)] = uiDiv;\r
+ RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 4, 0x0f0f0f0f, hData);\r
+ RC_LM::HPERM_OP(SHIDX(0), SHIDX(2), -2, 0xcccc0000, hData);\r
+ RC_LM::HPERM_OP(SHIDX(1), SHIDX(2), -2, 0xcccc0000, hData);\r
+ RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData);\r
+ RC_LM::PERM_OP(SHIDX(0), SHIDX(1), SHIDX(2), 8, 0x00ff00ff, hData);\r
+ RC_LM::PERM_OP(SHIDX(1), SHIDX(0), SHIDX(2), 1, 0x55555555, hData);\r
+ uiVal = hData[SHIDX(0)];\r
+ uiDiv = hData[SHIDX(1)];\r
+ uiDiv = ((uiDiv&0x000000ff)<<16) | (uiDiv&0x0000ff00) | ((uiDiv&0x00ff0000)>>16) | ((uiVal&0xf0000000)>>4);\r
+ uiVal &= 0x0fffffff;\r
+\r
+ for(ii = 0; ii < ITERATIONS; ii++) {\r
+ if(RC_LM::shifts2[ii]) { \r
+ uiVal = ((uiVal>>2)|(uiVal<<26)); \r
+ uiDiv =((uiDiv>>2)|(uiDiv<<26)); \r
+ } else { \r
+ uiVal = ((uiVal>>1)|(uiVal<<27)); \r
+ uiDiv = ((uiDiv>>1)|(uiDiv<<27)); \r
+ }\r
+ uiVal &= 0x0fffffff;\r
+ uiDiv &= 0x0fffffff;\r
+\r
+ rs = RC_LM::des_skb[0][uiVal&0x3f] |\r
+ RC_LM::des_skb[1][((uiVal>>6)&0x03)|((uiVal>>7)&0x3c)] |\r
+ RC_LM::des_skb[2][((uiVal>>13)&0x0f)|((uiVal>>14)&0x30)] |\r
+ RC_LM::des_skb[3][((uiVal>>20)&0x01)|((uiVal>>21)&0x06) |\r
+ ((uiVal>>22)&0x38)];\r
+ rt = RC_LM::des_skb[4][uiDiv&0x3f] |\r
+ RC_LM::des_skb[5][((uiDiv>>7)&0x03)|((uiDiv>>8)&0x3c)] |\r
+ RC_LM::des_skb[6][(uiDiv>>15)&0x3f] |\r
+ RC_LM::des_skb[7][((uiDiv>>21)&0x0f)|((uiDiv>>22)&0x30)];\r
+\r
+ /* table contained 0213 4657 */\r
+ idx = (rt<<16)|(rs&0x0000ffff);\r
+ hData[SHIDX(ii)] = (idx>>30)|(idx<<2);\r
+ idx = (rs>>16)|(rt&0xffff0000);\r
+ hData2[SHIDX(ii)] = (idx>>26)|(idx<<6);\r
+ }\r
+\r
+ // encrypt the "magic" data\r
+ hData3[SHIDX(0)] = 0x2153474B;\r
+ hData3[SHIDX(1)] = 0x25242340;\r
+\r
+ RC_LM::IP(SHIDX(0), SHIDX(1), SHIDX(2), hData3);\r
+ uiVal = hData3[SHIDX(0)];\r
+ uiVal = ((uiVal>>29)|(uiVal<<3));\r
+ uiDiv = hData3[SHIDX(1)];\r
+ uiDiv = ((uiDiv>>29)|(uiDiv<<3));\r
+\r
+ for(ii = 0; ii < 16; ii+=2) {\r
+ uiDiv = RC_LM::D_ENCRYPT(uiDiv, uiVal^hData[SHIDX(ii)], uiVal^hData2[SHIDX(ii)]);\r
+ uiVal = RC_LM::D_ENCRYPT(uiVal, uiDiv^hData[SHIDX(ii+1)], uiDiv^hData2[SHIDX(ii+1)]);\r
+ }\r
+\r
+ hData3[SHIDX(0)] = ((uiVal>>3)|(uiVal<<29));\r
+ hData3[SHIDX(1)] = ((uiDiv>>3)|(uiDiv<<29));\r
+ RC_LM::FP(SHIDX(0), SHIDX(1), SHIDX(2), hData3);\r
+ \r
+ hData[SHIDX(0)] = hData3[SHIDX(1)];\r
+ hData[SHIDX(1)] = hData3[SHIDX(0)];\r
+ RTGEN_EPILOGUE\r
+}\r
--- /dev/null
+//============================================================================\r
+// Name : rcuda_md4.inc\r
+// Author : Jan Kyska\r
+// Version : 1.00\r
+// Description : MD4 hash kernel for Generator of FreeRainbowTables\r
+//============================================================================ \r
+\r
+namespace RC_MD4 {\r
+\r
+__device__ __constant__ unsigned int h[4] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };\r
+__device__ __constant__ unsigned char r[48] = { \\r
+ 3, 7, 11, 19, 3, 7, 11, 19, 3, 7, 11, 19, 3, 7, 11, 19, \\r
+ 3, 5, 9, 13, 3, 5, 9, 13, 3, 5, 9, 13, 3, 5, 9, 13, \\r
+ 3, 9, 11, 15, 3, 9, 11, 15, 3, 9, 11, 15, 3, 9, 11, 15 };\r
+__device__ __constant__ unsigned char g[48] = { \\r
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \\r
+ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, \\r
+ 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };\r
+\r
+__device__ unsigned int FF(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + ((b&c)|((~b)&d)) + data[SHIDX(g[i])];\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int GG(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + ((b&c)|(b&d)|(c&d)) + data[SHIDX(g[i])] + 0x5a827999u;\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int HH(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + (b^c^d) + data[SHIDX(g[i])] + 0x6ed9eba1u;\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ return ret;\r
+}\r
+\r
+__device__ void MD4(unsigned int* dataHash) {\r
+ unsigned int a = h[0], b = h[1], c = h[2], d = h[3], x;\r
+ int ii;\r
+\r
+ // Round 1\r
+ for(ii = 0; ii < 16; ii++) {\r
+ x = b;\r
+ b = FF(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+\r
+ // Round 2\r
+ for(; ii < 32; ii++) {\r
+ x = b;\r
+ b = GG(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+ \r
+ // Round 3\r
+ for(; ii < 48; ii++) {\r
+ x = b;\r
+ b = HH(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+ \r
+ dataHash[SHIDX(0)] = a + h[0];\r
+ dataHash[SHIDX(1)] = b + h[1];\r
+ dataHash[SHIDX(2)] = c + h[2];\r
+ dataHash[SHIDX(3)] = d + h[3];\r
+}\r
+\r
+}\r
+\r
+__global__ void RTGenMD4Kernel(unsigned int chainStart, unsigned int chainStop) {\r
+ uint3 dimItem;\r
+ uint64 uiDiv64, uiVal64, uiMul64;\r
+ unsigned int uiVal, uiDiv;\r
+ unsigned int size, jj;\r
+ unsigned int plain;\r
+\r
+ RTGEN_PROLOGUE;\r
+ \r
+ // transform to the plain text\r
+ plain = 0x80;\r
+ jj = (PLAIN_MAX_SIZE>>2)+1;\r
+\r
+ for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal64 = idx64 + cplStart[ii];\r
+ uiVal64--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ uiMul64 = (uint64)dimItem.y<<32;\r
+ idx64 = __umul64hi(uiVal64, uiMul64);\r
+ uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
+ uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
+ uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
+ idx64 += uiVal;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx64++;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ if((ii&3) == 2) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ }\r
+\r
+ for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal = idx + cplStart[ii];\r
+ uiVal--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ idx = __umulhi(uiVal, dimItem.y);\r
+ uiDiv = uiVal - idx*dimItem.x;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx++;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ if((ii&3) == 2) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ }\r
+\r
+ // prepare for MD4\r
+ size = ii;\r
+ ii = ((((3-(ii&3))<<3)-1)&0x1f)+1;\r
+ plain = plain<<ii;\r
+ for(jj++, idx = 0; jj <= (PLAIN_MAX_SIZE>>2)+1; plain = hData[SHIDX(jj++)], idx++)\r
+ hData[SHIDX(idx)] = (plain>>ii)|(hData[SHIDX(jj)]<<(32-ii));\r
+ hData[SHIDX(idx)] = plain>>ii;\r
+ for(idx++; idx < 14; idx++)\r
+ hData[SHIDX(idx)] = 0;\r
+ hData[SHIDX(idx++)] = size<<3;\r
+ hData[SHIDX(idx)] = 0;\r
+\r
+ // hash\r
+ RC_MD4::MD4(hData);\r
+\r
+ RTGEN_EPILOGUE\r
+}\r
--- /dev/null
+//============================================================================\r
+// Name : rcuda_md5.inc\r
+// Author : Jan Kyska\r
+// Version : 1.00\r
+// Description : MD5 hash kernel for Generator of FreeRainbowTables\r
+//============================================================================ \r
+\r
+namespace RC_MD5 {\r
+\r
+__device__ __constant__ unsigned int h[4] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };\r
+__device__ __constant__ unsigned char r[64] = { \\r
+ 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, \\r
+ 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, \\r
+ 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, \\r
+ 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 };\r
+__device__ __constant__ unsigned char g[64] = { \\r
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \\r
+ 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, \\r
+ 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, \\r
+ 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 };\r
+__device__ __constant__ unsigned int ac[64] = { \\r
+ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, \\r
+ 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, \\r
+ 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, \\r
+ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, \\r
+ \\r
+ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, \\r
+ 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, \\r
+ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, \\r
+ 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, \\r
+ \\r
+ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, \\r
+ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, \\r
+ 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, \\r
+ 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, \\r
+ \\r
+ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, \\r
+ 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, \\r
+ 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, \\r
+ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };\r
+\r
+__device__ unsigned int FF(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + ((b&c)|((~b)&d)) + ac[i] + data[SHIDX(g[i])];\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ ret += b;\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int GG(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + ((b&d)|(c&(~d))) + ac[i] + data[SHIDX(g[i])];\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ ret += b;\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int HH(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + (b^c^d) + ac[i] + data[SHIDX(g[i])];\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ ret += b;\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int II(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + (c^(b|(~d))) + ac[i] + data[SHIDX(g[i])];\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ ret += b;\r
+ return ret;\r
+}\r
+\r
+__device__ void MD5(unsigned int* dataHash) {\r
+ unsigned int a = h[0], b = h[1], c = h[2], d = h[3], x;\r
+ int ii;\r
+\r
+ // Round 1\r
+ for(ii = 0; ii < 16; ii++) {\r
+ x = b;\r
+ b = FF(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+\r
+ // Round 2\r
+ for(; ii < 32; ii++) {\r
+ x = b;\r
+ b = GG(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+ \r
+ // Round 3\r
+ for(; ii < 48; ii++) {\r
+ x = b;\r
+ b = HH(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+ \r
+ // Round 4\r
+ for(; ii < 64; ii++) {\r
+ x = b;\r
+ b = II(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+\r
+ dataHash[SHIDX(0)] = a + h[0];\r
+ dataHash[SHIDX(1)] = b + h[1];\r
+ dataHash[SHIDX(2)] = c + h[2];\r
+ dataHash[SHIDX(3)] = d + h[3];\r
+}\r
+\r
+}\r
+\r
+__global__ void RTGenMD5Kernel(unsigned int chainStart, unsigned int chainStop) {\r
+ uint3 dimItem;\r
+ uint64 uiDiv64, uiVal64, uiMul64;\r
+ unsigned int uiVal, uiDiv;\r
+ unsigned int size, jj;\r
+ unsigned int plain;\r
+\r
+ RTGEN_PROLOGUE;\r
+ \r
+ // transform to the plain text\r
+ plain = 0x80;\r
+ jj = (PLAIN_MAX_SIZE>>2)+1;\r
+\r
+ for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal64 = idx64 + cplStart[ii];\r
+ uiVal64--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ uiMul64 = (uint64)dimItem.y<<32;\r
+ idx64 = __umul64hi(uiVal64, uiMul64);\r
+ uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
+ uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
+ uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
+ idx64 += uiVal;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx64++;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ if((ii&3) == 2) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ }\r
+\r
+ for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal = idx + cplStart[ii];\r
+ uiVal--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ idx = __umulhi(uiVal, dimItem.y);\r
+ uiDiv = uiVal - idx*dimItem.x;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx++;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ if((ii&3) == 2) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ }\r
+\r
+ // prepare for MD5\r
+ size = ii;\r
+ ii = ((((3-(ii&3))<<3)-1)&0x1f)+1;\r
+ plain = plain<<ii;\r
+ for(jj++, idx = 0; jj <= (PLAIN_MAX_SIZE>>2)+1; plain = hData[SHIDX(jj++)], idx++)\r
+ hData[SHIDX(idx)] = (plain>>ii)|(hData[SHIDX(jj)]<<(32-ii));\r
+ hData[SHIDX(idx)] = plain>>ii;\r
+ for(idx++; idx < 14; idx++)\r
+ hData[SHIDX(idx)] = 0;\r
+ hData[SHIDX(idx++)] = size<<3;\r
+ hData[SHIDX(idx)] = 0;\r
+\r
+ // hash\r
+ RC_MD5::MD5(hData);\r
+\r
+ RTGEN_EPILOGUE\r
+}\r
--- /dev/null
+//============================================================================\r
+// Name : rcuda_ntlm.inc\r
+// Author : Jan Kyska\r
+// Version : 1.00\r
+// Description : NTLM hash kernel for Generator of FreeRainbowTables\r
+//============================================================================ \r
+\r
+namespace RC_NTLM {\r
+\r
+__device__ __constant__ unsigned int h[4] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };\r
+__device__ __constant__ unsigned char r[48] = { \\r
+ 3, 7, 11, 19, 3, 7, 11, 19, 3, 7, 11, 19, 3, 7, 11, 19, \\r
+ 3, 5, 9, 13, 3, 5, 9, 13, 3, 5, 9, 13, 3, 5, 9, 13, \\r
+ 3, 9, 11, 15, 3, 9, 11, 15, 3, 9, 11, 15, 3, 9, 11, 15 };\r
+__device__ __constant__ unsigned char g[48] = { \\r
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \\r
+ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, \\r
+ 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };\r
+\r
+__device__ unsigned int FF(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + ((b&c)|((~b)&d)) + data[SHIDX(g[i])];\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int GG(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + ((b&c)|(b&d)|(c&d)) + data[SHIDX(g[i])] + 0x5a827999u;\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ return ret;\r
+}\r
+\r
+__device__ unsigned int HH(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) {\r
+ unsigned int ret;\r
+ ret = a + (b^c^d) + data[SHIDX(g[i])] + 0x6ed9eba1u;\r
+ ret = (ret<<r[i])|(ret>>(32-r[i]));\r
+ return ret;\r
+}\r
+\r
+__device__ void MD4(unsigned int* dataHash) {\r
+ unsigned int a = h[0], b = h[1], c = h[2], d = h[3], x;\r
+ int ii;\r
+\r
+ // Round 1\r
+ for(ii = 0; ii < 16; ii++) {\r
+ x = b;\r
+ b = FF(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+\r
+ // Round 2\r
+ for(; ii < 32; ii++) {\r
+ x = b;\r
+ b = GG(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+ \r
+ // Round 3\r
+ for(; ii < 48; ii++) {\r
+ x = b;\r
+ b = HH(a, b, c, d, ii, dataHash);\r
+ a = d; d = c; c = x;\r
+ }\r
+ \r
+ dataHash[SHIDX(0)] = a + h[0];\r
+ dataHash[SHIDX(1)] = b + h[1];\r
+ dataHash[SHIDX(2)] = c + h[2];\r
+ dataHash[SHIDX(3)] = d + h[3];\r
+}\r
+\r
+}\r
+\r
+__global__ void RTGenNTLMKernel(unsigned int chainStart, unsigned int chainStop) {\r
+ uint3 dimItem;\r
+ uint64 uiDiv64, uiVal64, uiMul64;\r
+ unsigned int uiVal, uiDiv;\r
+ unsigned int size, jj, kk;\r
+ unsigned int plain;\r
+\r
+ RTGEN_PROLOGUE;\r
+ \r
+ // transform to the plain text\r
+ plain = 0x80;\r
+ jj = (PLAIN_MAX_SIZE>>1)+1;\r
+\r
+ for(ii = kk = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal64 = idx64 + cplStart[ii];\r
+ uiVal64--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ uiMul64 = (uint64)dimItem.y<<32;\r
+ idx64 = __umul64hi(uiVal64, uiMul64);\r
+ uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
+ uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
+ uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
+ idx64 += uiVal;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx64++;\r
+ }\r
+ plain = (plain<<8);\r
+ if((kk++)&1) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ }\r
+\r
+ for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal = idx + cplStart[ii];\r
+ uiVal--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ idx = __umulhi(uiVal, dimItem.y);\r
+ uiDiv = uiVal - idx*dimItem.x;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx++;\r
+ }\r
+ plain = (plain<<8);\r
+ if((kk++)&1) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ }\r
+\r
+ // prepare for MD4\r
+ size = (ii<<1);\r
+ ii = (((kk^1)&1)<<4)+8;\r
+ plain = plain<<ii;\r
+ for(jj++, idx = 0; jj <= (PLAIN_MAX_SIZE>>1)+1; plain = hData[SHIDX(jj++)], idx++)\r
+ hData[SHIDX(idx)] = (plain>>ii)|(hData[SHIDX(jj)]<<(32-ii));\r
+ hData[SHIDX(idx)] = plain>>ii;\r
+ for(idx++; idx < 14; idx++)\r
+ hData[SHIDX(idx)] = 0;\r
+ hData[SHIDX(idx++)] = size<<3;\r
+ hData[SHIDX(idx)] = 0;\r
+\r
+ // hash\r
+ RC_NTLM::MD4(hData);\r
+\r
+ RTGEN_EPILOGUE\r
+}\r
--- /dev/null
+//============================================================================\r
+// Name : rcuda_sha1.inc\r
+// Author : Jan Kyska\r
+// Version : 1.00\r
+// Description : SHA1 hash kernel for Generator of FreeRainbowTables\r
+//============================================================================ \r
+\r
+namespace RC_SHA1 {\r
+\r
+__device__ __constant__ unsigned int h[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 };\r
+\r
+__device__ unsigned int SwapEndian(unsigned int n) {\r
+ return (n<<24)|((n&0x0000ff00)<<8)|((n>>8)&0x0000ff00)|(n>>24);\r
+}\r
+\r
+__device__ unsigned int FF(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, int i, unsigned int* data) {\r
+ return ((a<<5)|(a>>27)) + ((b&c)|((~b)&d)) + e + 0x5A827999 + data[SHIDX(i)];\r
+}\r
+\r
+__device__ unsigned int FF2(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, int i, unsigned int* data) {\r
+ unsigned int dt;\r
+ dt = data[SHIDX(i&15)]^data[SHIDX((i-3)&15)]^data[SHIDX((i-8)&15)]^data[SHIDX((i-14)&15)];\r
+ data[SHIDX(i&15)] = dt = ((dt<<1)|(dt>>31));\r
+ return ((a<<5)|(a>>27)) + ((b&c)|((~b)&d)) + e + 0x5A827999 + dt;\r
+}\r
+\r
+__device__ unsigned int GG(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, int i, unsigned int* data) {\r
+ unsigned int dt;\r
+ dt = data[SHIDX(i&15)]^data[SHIDX((i-3)&15)]^data[SHIDX((i-8)&15)]^data[SHIDX((i-14)&15)];\r
+ data[SHIDX(i&15)] = dt = ((dt<<1)|(dt>>31));\r
+ return ((a<<5)|(a>>27)) + (b^c^d) + e + 0x6ED9EBA1 + dt;\r
+}\r
+\r
+__device__ unsigned int HH(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, int i, unsigned int* data) {\r
+ unsigned int dt;\r
+ dt = data[SHIDX(i&15)]^data[SHIDX((i-3)&15)]^data[SHIDX((i-8)&15)]^data[SHIDX((i-14)&15)];\r
+ data[SHIDX(i&15)] = dt = ((dt<<1)|(dt>>31));\r
+ return ((a<<5)|(a>>27)) + ((b&c)|(b&d)|(c&d)) + e + 0x8F1BBCDC + dt;\r
+}\r
+\r
+__device__ unsigned int II(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, int i, unsigned int* data) {\r
+ unsigned int dt;\r
+ dt = data[SHIDX(i&15)]^data[SHIDX((i-3)&15)]^data[SHIDX((i-8)&15)]^data[SHIDX((i-14)&15)];\r
+ data[SHIDX(i&15)] = dt = ((dt<<1)|(dt>>31));\r
+ return ((a<<5)|(a>>27)) + (b^c^d) + e + 0xCA62C1D6 + dt;\r
+}\r
+\r
+__device__ void SHA1(unsigned int* dataHash) {\r
+ unsigned int a = h[0], b = h[1], c = h[2], d = h[3], e = h[4], x;\r
+ int ii;\r
+\r
+ // Round 1\r
+ for(ii = 0; ii < 16; ii++) {\r
+ x = FF(a, b, c, d, e, ii, dataHash);\r
+ e = d; d = c; c = ((b<<30)|(b>>2)); b = a; a = x;\r
+ }\r
+ for(; ii < 20; ii++) {\r
+ x = FF2(a, b, c, d, e, ii, dataHash);\r
+ e = d; d = c; c = ((b<<30)|(b>>2)); b = a; a = x;\r
+ }\r
+ \r
+ // Round 2\r
+ for(; ii < 40; ii++) {\r
+ x = GG(a, b, c, d, e, ii, dataHash);\r
+ e = d; d = c; c = ((b<<30)|(b>>2)); b = a; a = x;\r
+ }\r
+\r
+ // Round 3\r
+ for(; ii < 60; ii++) {\r
+ x = HH(a, b, c, d, e, ii, dataHash);\r
+ e = d; d = c; c = ((b<<30)|(b>>2)); b = a; a = x;\r
+ }\r
+ \r
+ // Round 4\r
+ for(; ii < 80; ii++) {\r
+ x = II(a, b, c, d, e, ii, dataHash);\r
+ e = d; d = c; c = ((b<<30)|(b>>2)); b = a; a = x;\r
+ }\r
+\r
+ dataHash[SHIDX(0)] = a + h[0];\r
+ dataHash[SHIDX(1)] = b + h[1];\r
+ dataHash[SHIDX(2)] = c + h[2];\r
+ dataHash[SHIDX(3)] = d + h[3];\r
+ dataHash[SHIDX(4)] = e + h[4];\r
+}\r
+\r
+}\r
+\r
+__global__ void RTGenSHA1Kernel(unsigned int chainStart, unsigned int chainStop) {\r
+ uint3 dimItem;\r
+ uint64 uiDiv64, uiVal64, uiMul64;\r
+ unsigned int uiVal, uiDiv;\r
+ unsigned int size, jj;\r
+ unsigned int plain;\r
+\r
+ RTGEN_PROLOGUE;\r
+ \r
+ // transform to the plain text\r
+ plain = 0x80;\r
+ jj = (PLAIN_MAX_SIZE>>2)+1;\r
+\r
+ for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal64 = idx64 + cplStart[ii];\r
+ uiVal64--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ uiMul64 = (uint64)dimItem.y<<32;\r
+ idx64 = __umul64hi(uiVal64, uiMul64);\r
+ uiDiv64 = uiVal64 - idx64*(uint64)dimItem.x;\r
+ uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y);\r
+ uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x;\r
+ idx64 += uiVal;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx64++;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ if((ii&3) == 2) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ }\r
+\r
+ for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) {\r
+ uiVal = idx + cplStart[ii];\r
+ uiVal--;\r
+ dimItem = cplDimVec[ii];\r
+\r
+ idx = __umulhi(uiVal, dimItem.y);\r
+ uiDiv = uiVal - idx*dimItem.x;\r
+ if(uiDiv >= dimItem.x) {\r
+ uiDiv -= dimItem.x;\r
+ idx++;\r
+ }\r
+ plain = (plain<<8) | cplChrSet[dimItem.z + uiDiv];\r
+ if((ii&3) == 2) {\r
+ hData[SHIDX(jj--)] = plain;\r
+ plain = 0;\r
+ }\r
+ }\r
+\r
+ // prepare for SHA1\r
+ size = ii;\r
+ ii = ((((3-(ii&3))<<3)-1)&0x1f)+1;\r
+ plain = plain<<ii;\r
+ for(jj++, idx = 0; jj <= (PLAIN_MAX_SIZE>>2)+1; plain = hData[SHIDX(jj++)], idx++)\r
+ hData[SHIDX(idx)] = RC_SHA1::SwapEndian((plain>>ii)|(hData[SHIDX(jj)]<<(32-ii)));\r
+ hData[SHIDX(idx)] = RC_SHA1::SwapEndian(plain>>ii);\r
+ for(idx++; idx < 14; idx++)\r
+ hData[SHIDX(idx)] = 0;\r
+ hData[SHIDX(idx++)] = 0;\r
+ hData[SHIDX(idx)] = size<<3;\r
+\r
+ // hash\r
+ RC_SHA1::SHA1(hData);\r
+ \r
+ hData[SHIDX(0)] = RC_SHA1::SwapEndian(hData[SHIDX(0)]);\r
+ hData[SHIDX(1)] = RC_SHA1::SwapEndian(hData[SHIDX(1)]);\r
+ RTGEN_EPILOGUE\r
+}\r