Me gustaría compartir con todos vosotros, mis primeros pasos en CUDA.
Usé pyCuda y algo de código en C. Se trata de un crackeador de MD5. No es muy rápido, pero puede servir como ilustrativo para los que deis vuestros primeros pasos con Cuda y PyCuda.
Muy mejorable, pero ahí está. El algoritmo de crackeo de MD5 en el subprograma en C, es otro más lento que el de la RSA (RFC 1321), pero más compacto. Es muy sencillo en programa, consultad la documentación de CUDA y la de pyCuda para más información.
Cualquier consulta, no dudeis en comentar.
El código, a continuación
from pycuda import gpuarray
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import numpy, re, sys, time
from pycuda.autoinit import context
mod = SourceModule("""
/*
* md5.c
*
* Created on: 22/11/2014
* Author: javier garcia glez.
*
* Implementacion del cifrado MD5
*
* Version 0.1
*
*/
#include <stdio.h>
#include <stdlib.h></stdlib.h></stdio.h>
typedef unsigned char byte;
typedef unsigned short int nibble;
typedef unsigned int word;
typedef unsigned long dword;
__device__ const word K[] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
__device__ const byte s[] = { 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9,
14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11,
16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6,
10, 15, 21, 6, 10, 15, 21 };
typedef struct dchunks {
word data[16];
} wchunk;
typedef union slength {
word wdata;
byte bdata[4];
} ulength;
__device__ void byteReverse(word *bf, word l) {
word t = *bf, longs = (l > 0) ? l : 1;
byte *buf = (byte *) bf, i;
do {
t = ((((word) buf[3] | (word) buf[2] << 8))
| (((word) buf[1] << 16 | (word) buf[0] << 24)));
*(word *) buf = t;
buf += 4;
} while (--longs > 0);
}
__global__ void md5(byte *object, word *pa, int pwlen) {
int idx = threadIdx.x;
byte pnobj[2048], *p;
word vA, vB, vC, vD, tmppad, mod, g, a, b, c, d, chunks, f, len,i;
word temp, temp2, *pw;
ulength length;
vA = 0x67452301;
vB = 0xefcdab89;
vC = 0x98badcfe;
vD = 0x10325476;
// calculate padding
len=0; p=(byte *)(object+idx*(pwlen+1));
//printf("%s", p);
/*while(*p!=0 && len<20){
len++; p++;
}*/
len=pwlen;
mod = (len % 64);
tmppad = (word) mod > 56 ? 64 - (mod % 56) : 56 - mod;
chunks = (len + tmppad + 8) / 64;
// working mem string copy
memset(pnobj, 0, 2048);
memcpy(pnobj, (byte *)(object+idx*(pwlen+1)), len);
*(pnobj + len) = 0x80;
length.wdata = len << 3;
*((word *) (pnobj + len + tmppad)) = length.wdata;
for (pw = (word *)pnobj; pw < ((word *)pnobj + ((chunks) * 16) - 1); pw+=16) {
a = vA;
b = vB;
c = vC;
d = vD;
for (i = 0; i < 64; i++) {
if (i < 16) {
f = (b & c) | ((~b) & d);
g = i;
} else {
if (i < 32) {
f = (d & b) | ((~d) & c);
g = (5 * i + 1) % 16;
} else {
if (i < 48) {
f = b ^ c ^ d;
g = (3 * i + 5) % 16;
} else {
if (i < 64) {
f = c ^ (b | (~d));
g = (7 * i) % 16;
}
}
}
}
temp = d;
d = c;
c = b;
temp2=(a + f + K[i]+*(pw+g));
b += (word)((temp2 << (s[i])) | (temp2 >> (32 - s[i])) );
a = temp;
}
vA += a;
vB += b;
vC += c;
vD += d;
}
byteReverse((word *) &vA, 1);
byteReverse((word *) &vB, 1);
byteReverse((word *) &vC, 1);
byteReverse((word *) &vD, 1);
__syncthreads();
pa[idx*4]=vA; pa[idx*4+1]=vB; pa[idx*4+2]=vC; pa[idx*4+3]=vD;
return;
}
""", ''nvcc'', [], keep=True, no_extern_c=False, arch=None, code=None, cache_dir=None)
PWLIST = []
npasswords = 0
#HASH="900150983cd24fb0d6963f7d28e17f72"
#HASH="0cc175b9c0f1b6a831c399e269772661"
#HASH="e1671797c52e15f763380b45e841ec32"
#HASH="0bf375b600c339bedc9b6f104b64ea66"
HASH="5d41402abc4b2a76b9719d911017c592"
#HASH = "32e6eb142601d344c188b94a8fe5f2cd"
MT = 512
FILLPASSLIMIT = MT * 4
npass=0
def getHash(a):
return "%08x%08x%08x%08x" % (a[0], a[1], a[2], a[3])
def checkHash(a, passw):
rhash = re.compile(HASH)
for i in range(len(a) / 4):
if rhash.match(getHash(a[i * 4:i * 4 + 4])):
print "found pass: %s" % "".join(passw[i])
tend = time.clock()
print "%d passwords checked in %03.2f, %03d c/s" % (npasswords, (tend - tstart) / 100, npasswords/(tend - tstart))
sys.exit(0)
def getChars(file):
return list(open(file, "rb").read())[:-1]
def getIndex(data):
return chars.index(data[-1])
def incData(data, PILA):
global PWLIST, npass
index = getIndex(data[-1])+1
npass+=1
if index>(len(chars)-1):
if len(PILA)==0:
PWLIST.append(list("".join(PILA)+"".join(reversed(data))+''\\0''))
return -1
else:
PWLIST.append(list("".join(PILA)+"".join(reversed(data))+''\\0''))
if PILA[-1]!=chars[-1]:
data[-1]=chars[0]
return 0
else:
data[-1]=chars[index]
PWLIST.append(list("".join(PILA)+"".join(reversed(data))+''\\0''))
return index
def getPasswordsRow3(PILA):
global PWLIST, npass
data=[]
npass=1
PWLIST.append(list("".join(PILA)+''\\0''))
while npass0:
data.append(PILA.pop())
r=incData(data, PILA)
else:
while len(data)>0:
PILA.append(data.pop())
else:
if r==-1:
return len(PILA), PILA
else:
return len(PILA), PILA
def fillList2(chars, PILA):
global nfillpasswords, PWLIST
if PILA==list(chars[-1]*len(PILA)):
n=0
PILA = list(chars[0] * (len(PILA)+1))
return getPasswordsRow3(PILA)
cuda.init()
chars = getChars("chars.txt")
length = 0
current_pass = 0
func = mod.get_function("md5")
tstart = time.clock()
nfillpasswords = 0
passwz = list()
length = 0
while True:
if len(PWLIST)==0:
nfillpasswords=0
length, passwz = fillList2(chars, passwz)
if len(PWLIST) < MT:
M = len(PWLIST)
else:
M = MT
for i in range((len(PWLIST) / M)):
bpassw = numpy.array(PWLIST[i * M:i * M + M], ''S1'')
pbpassw = cuda.mem_alloc(bpassw.nbytes)
cuda.memcpy_htod(pbpassw, bpassw)
a = gpuarray.zeros((M * 4), numpy.uint32)
print "\\rchecking %s to %s %d threads\\r" % ("".join(PWLIST[i * M])[:-1], "".join(PWLIST[i * M + M - 1])[:-1], M),
func(pbpassw, a, numpy.int32(len(passwz)), block=(M, 1, 1))
context.synchronize()
adata = a.get()
npasswords += M
checkHash(adata.tolist(), bpassw.tolist())
else:
del PWLIST[:(i+1)*M]
context.pop()
El codigo :
araMD5.py
blog comments powered by