From: Martin Westergaard Date: Tue, 26 Oct 2010 05:31:31 +0000 (+0200) Subject: distrrtgen_cuda code added X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=fcc09bb2d8e4fdcca60f465a04c7babf1f5e573c;p=freerainbowtables distrrtgen_cuda code added --- diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.cpp b/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.cpp new file mode 100644 index 0000000..001e54a --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.cpp @@ -0,0 +1,373 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2008 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + +// This program serves as both +// - An example BOINC application, illustrating the use of the BOINC API +// - A program for testing various features of BOINC +// +// NOTE: this file exists as both +// boinc/apps/upper_case.C +// and +// boinc_samples/example_app/uc2.C +// If you update one, please update the other! + +// The program converts a mixed-case file to upper case: +// read "in", convert to upper case, write to "out" +// +// command line options +// -run_slow: sleep 1 second after each character +// -cpu_time N: use about N CPU seconds after copying files +// -early_exit: exit(10) after 30 chars +// -early_crash: crash after 30 chars +// + +#ifdef _WIN32 +#include "boinc_win.h" +#else +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#endif + +#include +#include +#include +#include "str_util.h" +#include "util.h" +#include "filesys.h" +#include "boinc_api.h" +#include "Public.h" +// Rainbowcrack code +#include "ChainWalkContext.h" +//typedef unsigned int uint32; +//typedef unsigned __int64 uint64; +#include "rcuda.h" +#include "rcuda_ext.h" + + +using std::string; + +/* +bool early_exit = false; +bool early_crash = false; +bool early_sleep = false; +double cpu_time = 20, comp_result; +*/ +int QuickSortPartition(RainbowChainCP* pChain, int nLow, int nHigh) +{ + int nRandomIndex = nLow + ((unsigned int)rand() * (RAND_MAX + 1) + (unsigned int)rand()) % (nHigh - nLow + 1); + RainbowChainCP TempChain; + TempChain = pChain[nLow]; + pChain[nLow] = pChain[nRandomIndex]; + pChain[nRandomIndex] = TempChain; + + TempChain = pChain[nLow]; + uint64 nPivotKey = pChain[nLow].nIndexE; + while (nLow < nHigh) + { + while (nLow < nHigh && pChain[nHigh].nIndexE >= nPivotKey) + nHigh--; + pChain[nLow] = pChain[nHigh]; + while (nLow < nHigh && pChain[nLow].nIndexE <= nPivotKey) + nLow++; + pChain[nHigh] = pChain[nLow]; + } + pChain[nLow] = TempChain; + return nLow; +} + +void QuickSort(RainbowChainCP* pChain, int nLow, int nHigh) +{ + if (nLow < nHigh) + { + int nPivotLoc = QuickSortPartition(pChain, nLow, nHigh); + QuickSort(pChain, nLow, nPivotLoc - 1); + QuickSort(pChain, nPivotLoc + 1, nHigh); + } +} + +int main(int argc, char **argv) { + int retval; + double fd; + char output_path[512], chkpt_path[512]; + FILE* state; + retval = boinc_init(); + if (retval) { + fprintf(stderr, "boinc_init returned %d\n", retval); + exit(retval); + } + + + // get size of input file (used to compute fraction done) + // + //file_size(input_path, fsize); + + // See if there's a valid checkpoint file. + // If so seek input file and truncate output file + // + + + if(argc < 10) + { + fprintf(stderr, "Not enough parameters"); + return -1; + } + string sHashRoutineName, sCharsetName, sSalt, sCheckPoints; + uint32 nRainbowChainCount, nPlainLenMin, nPlainLenMax, nRainbowTableIndex, nRainbowChainLen; + uint64 nChainStart; + sHashRoutineName = argv[1]; + sCharsetName = argv[2]; + nPlainLenMin = atoi(argv[3]); + nPlainLenMax = atoi(argv[4]); + nRainbowTableIndex = atoi(argv[5]); + nRainbowChainLen = atoi(argv[6]); + nRainbowChainCount = atoi(argv[7]); +#ifdef _WIN32 + + nChainStart = _atoi64(argv[8]); + +#else + nChainStart = atoll(argv[8]); +#endif + sCheckPoints = argv[9]; + vector vCPPositions; + char *cp = strtok((char *)sCheckPoints.c_str(), ","); + while(cp != NULL) + { + vCPPositions.push_back(atoi(cp)); + cp = strtok(NULL, ","); + } + if(argc == 11) + { + sSalt = argv[10]; + } + //std::cout << "Starting ChainGenerator" << std::endl; + // Setup CChainWalkContext + //std::cout << "ChainGenerator started." << std::endl; + + if (!CChainWalkContext::SetHashRoutine(sHashRoutineName)) + { + fprintf(stderr, "hash routine %s not supported\n", sHashRoutineName.c_str()); + return 1; + } + //std::cout << "Hash routine validated" << std::endl; + + if (!CChainWalkContext::SetPlainCharset(sCharsetName, nPlainLenMin, nPlainLenMax)) + { + std::cerr << "charset " << sCharsetName << " not supported" << std::endl; + return 2; + } + //std::cout << "Plain charset validated" << std::endl; + + if (!CChainWalkContext::SetRainbowTableIndex(nRainbowTableIndex)) + { + std::cerr << "invalid rainbow table index " << nRainbowTableIndex << std::endl; + return 3; + } + //std::cout << "Rainbowtable index validated" << std::endl; + + if(sHashRoutineName == "mscache")// || sHashRoutineName == "lmchall" || sHashRoutineName == "halflmchall") + { + // Convert username to unicode + const char *szSalt = sSalt.c_str(); + int salt_length = strlen(szSalt); + unsigned char cur_salt[256]; + for (int i=0; i stPlain; + ex.Init(); +time_t tStart, tStartFinal, tEndFinal; +time_t tEnd; + tStartFinal = time(NULL); + for(int nCurrentCalculatedChains = nDataLen / 18, calcSize; nCurrentCalculatedChains < nRainbowChainCount; ) + { + fd = (double)nCurrentCalculatedChains / (double)nRainbowChainCount; + boinc_fraction_done(fd); + + cuTask.hash = ex.GetHash(); + cuTask.startIdx = nChainStart + nCurrentCalculatedChains; + cuTask.idxCount = std::min(nRainbowChainCount - nCurrentCalculatedChains, maxCalcBuffSize); + cuTask.stPlainSize = ex.IndexToStartPlain(0, stPlain); + cuTask.stPlain = &stPlain[0]; + cuTask.dimVec = ex.GetPlainDimVec(); + cuTask.dimVecSize = ex.GetPlainDimVecSize()/3; + cuTask.charSet = ex.GetCharSet(); + cuTask.charSetSize = ex.GetCharSetSize(); + cuTask.cpPositions = &vCPPositions[0]; + cuTask.cpPosSize = vCPPositions.size(); + cuTask.reduceOffset = ex.GetReduceOffset(); + cuTask.plainSpaceTotal = ex.GetPlainSpaceTotal(); + cuTask.rainbowChainLen = nRainbowChainLen; + for(int ii = 0; ii < cuTask.idxCount; ii++) { + calcBuff[2*ii] = cuTask.startIdx + ii; + calcBuff[2*ii+1] = 0; + } + + tStart = time(NULL); + + calcSize = rcuda::CalcChainsOnCUDA(&cuTask, calcBuff); + tEnd = time(NULL); + std::cerr << "CUDA time taken: " << tEnd - tStart << std::endl; + tStart = time(NULL); + if(calcSize > 0) { + nCurrentCalculatedChains += calcSize; + for(ii = 0; ii < cuTask.idxCount; ii++) { + nIndex[0] = cuTask.startIdx + ii; + nReturn = fwrite(nIndex, 1, 8, outfile); + nReturn += fwrite(calcBuff+(2*ii), 1, 8, outfile); + nReturn += fwrite(calcBuff+(2*ii+1), 1, 2, outfile); + if(nReturn != 18) { + std::cerr << "disk write fail" << std::endl; + fclose(outfile); + return 9; + } + } + } else { + std::cerr << "Calculations on CUDA failed!" << std::endl; + fclose(outfile); + return 0; + } + } + tEndFinal = time(NULL); + std::cerr << "Time taken: " << tEndFinal - tStartFinal << " secs" << std::endl; + delete [] calcBuff; +#ifdef _DEBUG + std::cout << "Generation completed" << std::endl; +#endif + fseek(outfile, 0, SEEK_SET); + nFileLen = GetFileLen(outfile); + nRainbowChainCount = nFileLen / 18; + + RainbowChainCP* pChain = (RainbowChainCP*)new unsigned char[sizeof(RainbowChainCP) * nRainbowChainCount]; + + if (pChain != NULL) + { + // Load file +#ifdef _DEBUG + std::cout << "Sorting file" << std::endl; +#endif + fseek(outfile, 0, SEEK_SET); + for(int i = 0; i < nRainbowChainCount; i++) + { + if(fread(&pChain[i], 1, 16, outfile) != 16) + { + printf("disk read fail\n"); + return 9; + } + if(fread(&pChain[i].nCheckPoint, 1, sizeof(pChain[i].nCheckPoint), outfile) != 2) + { + printf("disk read fail\n"); + return 9; + } + } + + // Sort file + QuickSort(pChain, 0, nRainbowChainCount - 1); + + // Write file + fseek(outfile, 0, SEEK_SET); + for(int i = 0; i < nRainbowChainCount; i++) + { + fwrite(&pChain[i], 1, 16, outfile); + fwrite(&pChain[i].nCheckPoint, 2, 1, outfile); + } + delete[] pChain; + } + + fclose(outfile); + + // main loop - read characters, convert to UC, write + // + + boinc_fraction_done(1); + boinc_finish(0); +} + +#ifdef _WIN32 +int WINAPI WinMain(HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR Args, int WinMode) { + LPSTR command_line; + char* argv[100]; + int argc; + + command_line = GetCommandLine(); + argc = parse_command_line( command_line, argv ); + return main(argc, argv); +} +#endif + +const char *BOINC_RCSID_33ac47a071 = "$Id: upper_case.C 12135 2007-02-21 20:04:14Z davea $"; + diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.sln b/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.sln new file mode 100644 index 0000000..1fc1879 --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "distrrtgen", "distrrtgen.vcproj", "{A3BDF5F8-4D0A-4B27-B1D9-7E77CBDA86C7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A3BDF5F8-4D0A-4B27-B1D9-7E77CBDA86C7}.Debug|Win32.ActiveCfg = Debug|Win32 + {A3BDF5F8-4D0A-4B27-B1D9-7E77CBDA86C7}.Debug|Win32.Build.0 = Debug|Win32 + {A3BDF5F8-4D0A-4B27-B1D9-7E77CBDA86C7}.Release|Win32.ActiveCfg = Release|Win32 + {A3BDF5F8-4D0A-4B27-B1D9-7E77CBDA86C7}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.vcproj b/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.vcproj new file mode 100644 index 0000000..2975f17 --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/distrrtgen.vcproj @@ -0,0 +1,327 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda.cu b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda.cu new file mode 100644 index 0000000..e626bd4 --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda.cu @@ -0,0 +1,326 @@ +#include +#include +#include "rcuda.h" + +#define GRID_X_L2 6 +#define GRID_Y_L2 6 +#define BLOCK_X_L2 7 +#define GRID_X_SIZE (1<>(32-r[i])); + ret += b; + return ret; +} + +__device__ unsigned int GG(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) { + unsigned int ret; + ret = a + ((b&d)|(c&(~d))) + ac[i] + data[SHIDX(g[i])]; + ret = (ret<>(32-r[i])); + ret += b; + return ret; +} + +__device__ unsigned int HH(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) { + unsigned int ret; + ret = a + (b^c^d) + ac[i] + data[SHIDX(g[i])]; + ret = (ret<>(32-r[i])); + ret += b; + return ret; +} + +__device__ unsigned int II(unsigned int a, unsigned int b, unsigned int c, unsigned int d, int i, const unsigned int* data) { + unsigned int ret; + ret = a + (c^(b|(~d))) + ac[i] + data[SHIDX(g[i])]; + ret = (ret<>(32-r[i])); + ret += b; + return ret; +} + +__device__ void MD5(unsigned int* dataHash) { + unsigned int a = h[0], b = h[1], c = h[2], d = h[3], x; + int ii; + + // Round 1 + for(ii = 0; ii < 16; ii++) { + x = b; + b = FF(a, b, c, d, ii, dataHash); + a = d; d = c; c = x; + } + + // Round 2 + for(; ii < 32; ii++) { + x = b; + b = GG(a, b, c, d, ii, dataHash); + a = d; d = c; c = x; + } + + // Round 3 + for(; ii < 48; ii++) { + x = b; + b = HH(a, b, c, d, ii, dataHash); + a = d; d = c; c = x; + } + + // Round 4 + for(; ii < 64; ii++) { + x = b; + b = II(a, b, c, d, ii, dataHash); + a = d; d = c; c = x; + } + + dataHash[SHIDX(0)] = a + h[0]; + dataHash[SHIDX(1)] = b + h[1]; + dataHash[SHIDX(2)] = c + h[2]; + dataHash[SHIDX(3)] = d + h[3]; +} + + +#define PLAIN_MAX_SIZE 20 + +__device__ unsigned __int64 *dataHeap; +__device__ unsigned char *plStart; +__device__ uint3 *plDimVec; +__device__ unsigned char *plChrSet; +__device__ int *plCpPos; +__device__ int plCpPosSize; +__device__ unsigned __int64 reduceOffset; +__device__ unsigned __int64 plainSpaceTotal; +__device__ unsigned __int64 rPlainSpaceTotal; + + +__global__ void RTGenMD5Kernel(unsigned int chainStart, unsigned int chainStop) { + unsigned int *hData; + uint3 dimItem; + unsigned int uiVal, uiMul, uiDiv, idx; + unsigned __int64 uiVal64, uiMul64, uiDiv64, idx64; + unsigned int nPos, size, ii, jj, kk; + unsigned int cpcheck, checkpoint; + unsigned int plain; + + __shared__ unsigned int shData[SHIDX(BLOCK_X_SIZE)]; + + if(threadIdx.x == 0) { + nPos = ((((blockIdx.y<>4)<<8)+(ii&15); + hData[SHIDX(0)] = dataHeap[nPos]; + hData[SHIDX(1)] = dataHeap[nPos]>>32; + hData[SHIDX(2)] = dataHeap[nPos+1]; + } + } + __syncthreads(); + + hData = shData + ((threadIdx.x>>4)<<8)+(threadIdx.x&15); + + idx64 = hData[SHIDX(1)]; + idx64 = (idx64<<32) | hData[SHIDX(0)]; + cpcheck = hData[SHIDX(2)]; + checkpoint = cpcheck&0x0000ffff; + cpcheck = cpcheck>>16; + + for(nPos = chainStart; nPos < chainStop; nPos++) { + // transform to the plain text + plain = 0x80; + jj = (PLAIN_MAX_SIZE>>2)+1; + for(ii = 0; idx64 > 0xfffffff0ull && ii < PLAIN_MAX_SIZE; ii++) { + uiVal64 = idx64 + plStart[ii]; + uiVal64--; + dimItem = plDimVec[ii]; + uiMul64 = (unsigned __int64)dimItem.y<<32; + idx64 = __umul64hi(uiVal64, uiMul64); + uiDiv64 = uiVal64 - idx64*(unsigned __int64)dimItem.x; + uiVal = __umulhi((unsigned int)uiDiv64, dimItem.y); + uiDiv = (unsigned int)uiDiv64 - uiVal * dimItem.x; + idx64 += uiVal; + if(uiDiv >= dimItem.x) { + uiDiv -= dimItem.x; + idx64++; + } + plain = (plain<<8) | plChrSet[dimItem.z + uiDiv]; + if((ii&3) == 2) { + hData[SHIDX(jj--)] = plain; + plain = 0; + } + } + for(idx = (unsigned int)idx64; idx != 0 && ii < PLAIN_MAX_SIZE; ii++) { + uiVal = idx + plStart[ii]; + uiVal--; + dimItem = plDimVec[ii]; + idx = __umulhi(uiVal, dimItem.y); + uiDiv = uiVal - idx*dimItem.x; + if(uiDiv >= dimItem.x) { + uiDiv -= dimItem.x; + idx++; + } + plain = (plain<<8) | plChrSet[dimItem.z + uiDiv]; + if((ii&3) == 2) { + hData[SHIDX(jj--)] = plain; + plain = 0; + } + } + + // prepare for MD5 + size = ii; + ii = ((((3-(ii&3))<<3)-1)&0x1f)+1; + plain = plain<>2)+1; plain = hData[SHIDX(jj++)], kk++) + hData[SHIDX(kk)] = (plain>>ii)|(hData[SHIDX(jj)]<<(32-ii)); + hData[SHIDX(kk)] = plain>>ii; + for(kk++; kk < 14; kk++) + hData[SHIDX(kk)] = 0; + hData[SHIDX(kk++)] = size<<3; + hData[SHIDX(kk)] = 0; + + // hash + MD5(hData); + + idx64 = hData[SHIDX(1)]; + idx64 = (idx64<<32) | hData[SHIDX(0)]; + idx64 += reduceOffset + nPos; + uiDiv64 = __umul64hi(idx64, rPlainSpaceTotal); + idx64 -= uiDiv64*plainSpaceTotal; + if(idx64 >= plainSpaceTotal) + idx64 -= plainSpaceTotal; + + if(cpcheck < plCpPosSize && nPos == plCpPos[cpcheck]) { + checkpoint |= ((unsigned int)idx64&1) << cpcheck; + cpcheck++; + } + } + + hData[SHIDX(0)] = idx64; + hData[SHIDX(1)] = idx64>>32; + hData[SHIDX(2)] = (cpcheck<<16)|(checkpoint&0x0000ffff); + __syncthreads(); + + if(threadIdx.x == 0) { + nPos = ((((blockIdx.y<>4)<<8)+(ii&15); + dataHeap[nPos] = ((unsigned __int64)hData[SHIDX(1)]<<32)|(unsigned __int64)hData[SHIDX(0)]; + dataHeap[nPos+1] = hData[SHIDX(2)]; + } + } + __syncthreads(); +} + + +extern "C" int CalcChainsOnCUDA(const rcuda::RCudaTask* task, unsigned __int64 *resultBuff) { + cudaError_t cuErr; + char buff[PLAIN_MAX_SIZE]; + unsigned __int64 *data; + unsigned char *stPlain; + uint3 *dimVec; + unsigned char *charSet; + int *cpPos; + unsigned __int64 uiVal64; + time_t tStart, tEnd; + if(task->hash != rcuda::RHASH_MD5) + return 0; + + memset(buff, 0, PLAIN_MAX_SIZE); + cudaMalloc((void**)&data, task->idxCount*2*sizeof(unsigned __int64)); + cudaMalloc((void**)&stPlain, PLAIN_MAX_SIZE); + cudaMalloc((void**)&dimVec, task->dimVecSize*sizeof(uint3)); + cudaMalloc((void**)&charSet, task->charSetSize); + cudaMalloc((void**)&cpPos, task->cpPosSize*sizeof(int)); + + cudaMemcpy(data, resultBuff, task->idxCount*2*sizeof(unsigned __int64), cudaMemcpyHostToDevice); + cudaMemcpy(stPlain, buff, PLAIN_MAX_SIZE, cudaMemcpyHostToDevice); + cudaMemcpy(stPlain, task->stPlain, min(task->stPlainSize, PLAIN_MAX_SIZE), cudaMemcpyHostToDevice); + cudaMemcpy(dimVec, task->dimVec, task->dimVecSize*sizeof(uint3), cudaMemcpyHostToDevice); + cudaMemcpy(charSet, task->charSet, task->charSetSize, cudaMemcpyHostToDevice); + cudaMemcpy(cpPos, task->cpPositions, task->cpPosSize*sizeof(int), cudaMemcpyHostToDevice); + + cudaMemcpyToSymbol(dataHeap, &data, sizeof(data)); + cudaMemcpyToSymbol(plStart, &stPlain, sizeof(stPlain)); + cudaMemcpyToSymbol(plDimVec, &dimVec, sizeof(dimVec)); + cudaMemcpyToSymbol(plChrSet, &charSet, sizeof(charSet)); + cudaMemcpyToSymbol(plCpPos, &cpPos, sizeof(cpPos)); + cudaMemcpyToSymbol(plCpPosSize, &task->cpPosSize, sizeof(task->cpPosSize)); + cudaMemcpyToSymbol(reduceOffset, &task->reduceOffset, sizeof(task->reduceOffset)); + cudaMemcpyToSymbol(plainSpaceTotal, &task->plainSpaceTotal, sizeof(task->plainSpaceTotal)); + uiVal64 = (unsigned __int64)-1/task->plainSpaceTotal; + cudaMemcpyToSymbol(rPlainSpaceTotal, &uiVal64, sizeof(uiVal64)); + + int grSizeX = (task->idxCount-1)/BLOCK_X_SIZE + 1; + int grSizeY = (grSizeX-1)/GRID_X_SIZE + 1; + grSizeX = GRID_X_SIZE; + dim3 numBlocks(grSizeX, grSizeY); + cuErr = cudaSuccess; + tStart = time(NULL); + for(int idx = 0; idx < task->rainbowChainLen-1 && cuErr == cudaSuccess; idx+=KERN_CHAIN_SIZE) { + RTGenMD5Kernel<<>>(idx, min(idx+KERN_CHAIN_SIZE, task->rainbowChainLen-1)); + cuErr = cudaGetLastError(); + if(cuErr == cudaSuccess) + cuErr = cudaThreadSynchronize(); + + } + tEnd = time(NULL); + fprintf(stderr, "Kernel run time: %i\n", (tEnd - tStart)); + + if(cuErr == cudaSuccess) + cudaMemcpy(resultBuff, data, task->idxCount*2*sizeof(unsigned __int64), cudaMemcpyDeviceToHost); + else + fprintf(stderr, "Error happened: %d (%s)\n", cuErr, cudaGetErrorString(cuErr)); + + cudaFree(cpPos); + cudaFree(charSet); + cudaFree(dimVec); + cudaFree(stPlain); + cudaFree(data); + return cuErr==cudaSuccess? task->idxCount : -1; +} + +extern "C" int GetChainsBufferSize(int minSize) { + int grSizeX = (minSize-1)/BLOCK_X_SIZE + 1; + int grSizeY = (grSizeX-1)/GRID_X_SIZE + 1; + grSizeX = GRID_X_SIZE; + return grSizeX*grSizeY*BLOCK_X_SIZE; +} diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda.h b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda.h new file mode 100644 index 0000000..eeeac69 --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda.h @@ -0,0 +1,30 @@ +#ifndef RCUDA_H +#define RCUDA_H + +namespace rcuda { + +enum RHash { RHASH_UNDEF = -1, RHASH_LM, RHASH_MD4, RHASH_MD5, RHASH_SHA1, RHASH_NTLM }; + +struct RCudaTask { + RHash hash; + unsigned __int64 startIdx; + int idxCount; + unsigned char* stPlain; + int stPlainSize; + unsigned int* dimVec; + int dimVecSize; + unsigned char* charSet; + int charSetSize; + int *cpPositions; + int cpPosSize; + unsigned __int64 reduceOffset; + unsigned __int64 plainSpaceTotal; + unsigned int rainbowChainLen; +}; + +extern "C" int CalcChainsOnCUDA(const RCudaTask* task, unsigned __int64 *resultBuff); +extern "C" int GetChainsBufferSize(int minSize); + +} + +#endif //RCUDA_H diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_ext.cpp b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_ext.cpp new file mode 100644 index 0000000..86f31ec --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_ext.cpp @@ -0,0 +1,85 @@ +//============================================================================ +// Name : rcuda_ext.cpp +// Author : Jan Kyska +// Version : 0.9 +// Description : A code to access internals of the CChainWalkContext +// for the CUDA generator of FreeRainbowTables +//============================================================================ + +#include "rcuda_ext.h" +#include +#include +#include +#include +#include + +CudaCWCExtender::CudaCWCExtender(CChainWalkContext *cwc) { + this->cwc = cwc; + hash = rcuda::RHASH_UNDEF; +} + +void CudaCWCExtender::Init(void) { + std::string hashName; + int ii, jj; + + plainDimVec.clear(); + plainCharSet.clear(); + + hashName = CChainWalkContext::m_sHashRoutineName; + std::transform(hashName.begin(), hashName.end(), hashName.begin(), tolower); + if(hashName.compare("lm") == 0) + hash = rcuda::RHASH_LM; + else if(hashName.compare("md4") == 0) + hash = rcuda::RHASH_MD4; + else if(hashName.compare("md5") == 0) + hash = rcuda::RHASH_MD5; + else if(hashName.compare("sha1") == 0) + hash = rcuda::RHASH_SHA1; + else if(hashName.compare("ntlm") == 0) + hash = rcuda::RHASH_NTLM; + else + hash = rcuda::RHASH_UNDEF; + + for(ii = 0; ii < (int)CChainWalkContext::m_vCharset.size(); ii++) { + stCharset &chs = CChainWalkContext::m_vCharset[ii]; + int chSetOffset = plainCharSet.size(); + plainCharSet.append((char*)chs.m_PlainCharset, chs.m_nPlainCharsetLen); + for(jj = 0; jj < chs.m_nPlainLenMax; jj++) { + plainDimVec.push_back((unsigned int)chs.m_nPlainCharsetLen); + plainDimVec.push_back((unsigned int)-1/(unsigned int)chs.m_nPlainCharsetLen); + plainDimVec.push_back((unsigned int)chSetOffset); + } + } +} + +int CudaCWCExtender::IndexToStartPlain(const uint64 nIndex, std::vector& stPlain) { + int nPlainLen, nCharsetLen; + int ii, jj; + + stPlain.clear(); + stPlain.reserve(0x20); + nPlainLen = 0; + for(ii = CChainWalkContext::m_nPlainLenMaxTotal - 1; ii >= CChainWalkContext::m_nPlainLenMinTotal - 1; ii--) { + if(nIndex >= CChainWalkContext::m_nPlainSpaceUpToX[ii]) { + nPlainLen = ii + 1; + break; + } + } + if(nPlainLen == 0) + nPlainLen = CChainWalkContext::m_nPlainLenMinTotal; + uint64 nIndexOfX = nIndex - CChainWalkContext::m_nPlainSpaceUpToX[nPlainLen - 1]; + + // Slow version, but never mind + for(ii = nPlainLen - 1; ii >= 0; ii--) { + nCharsetLen = 0; + for(jj = 0; jj < (int)CChainWalkContext::m_vCharset.size(); jj++) { + stCharset &chs = CChainWalkContext::m_vCharset[jj]; + nCharsetLen += chs.m_nPlainLenMax; + if(ii < nCharsetLen) { // We found the correct charset + stPlain.push_back(nIndexOfX % chs.m_nPlainCharsetLen + 1); + nIndexOfX /= chs.m_nPlainCharsetLen; + } + } + } + return stPlain.size(); +} diff --git a/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_ext.h b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_ext.h new file mode 100644 index 0000000..93c2d59 --- /dev/null +++ b/BOINC software/BOINC client apps/distrrtgen_cuda/rcuda_ext.h @@ -0,0 +1,29 @@ +#ifndef RCUDA_EXT_H +#define RCUDA_EXT_H + +#include "rcuda.h" +#include "ChainWalkContext.h" + +class CudaCWCExtender { +public: + CudaCWCExtender(CChainWalkContext *cwc); + void Init(void); + int IndexToStartPlain(const uint64 nIndex, std::vector& stPlain); + + inline rcuda::RHash GetHash(void) { return hash; } + inline unsigned int* GetPlainDimVec(void) { return &plainDimVec[0]; } + inline int GetPlainDimVecSize(void) { return plainDimVec.size(); } + inline unsigned char* GetCharSet(void) { return (unsigned char*)plainCharSet.c_str(); } + inline int GetCharSetSize(void) { return plainCharSet.size(); } + inline uint64 GetPlainSpaceTotal(void) { return CChainWalkContext::m_nPlainSpaceTotal; } + inline uint64 GetRainbowTableIndex(void) { return CChainWalkContext::m_nRainbowTableIndex; } + inline uint64 GetReduceOffset(void) { return CChainWalkContext::m_nReduceOffset; } + +protected: + CChainWalkContext *cwc; + rcuda::RHash hash; + std::vector plainDimVec; + std::string plainCharSet; +}; + +#endif //RCUDA_EXT_H